From fb6359586dbb11709e1afedf6d5a2b94d37a2d02 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 18 Feb 2022 22:40:28 +0300 Subject: [PATCH 001/310] fix ConvertGroupedStridedSlice.py for XLNet (#10496) --- .../mo/middle/ConvertGroupedStridedSlice.py | 4 ++ .../middle/ConvertGroupedStridedSlice_test.py | 39 +++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tools/mo/openvino/tools/mo/middle/ConvertGroupedStridedSlice.py b/tools/mo/openvino/tools/mo/middle/ConvertGroupedStridedSlice.py index ec606ef6191..a0ca5a28a84 100644 --- a/tools/mo/openvino/tools/mo/middle/ConvertGroupedStridedSlice.py +++ b/tools/mo/openvino/tools/mo/middle/ConvertGroupedStridedSlice.py @@ -91,6 +91,10 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern): sorted_out_nodes = sorted(out_nodes, key=lambda n: list(n.slices)) out_nodes = unique_by(sorted_out_nodes, strided_slices_equality) + # if there is only one StridedSlice out_node with unique 'slices', + # there is nothing to optimize, continue to the next data node + if len(out_nodes) <= 1: + continue for node in out_nodes: if len(node.slices) != len(out_nodes[0].slices): diff --git a/tools/mo/unit_tests/mo/middle/ConvertGroupedStridedSlice_test.py b/tools/mo/unit_tests/mo/middle/ConvertGroupedStridedSlice_test.py index 55a37668d5f..15099677bb6 100644 --- a/tools/mo/unit_tests/mo/middle/ConvertGroupedStridedSlice_test.py +++ b/tools/mo/unit_tests/mo/middle/ConvertGroupedStridedSlice_test.py @@ -806,6 +806,45 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase): (flag, resp) = compare_graphs(graph, graph_ref, 'concat_1_data', check_op_attrs=True) self.assertTrue(flag, resp) + # one unuque StridedSlice + def test_12(self): + graph = build_graph(nodes_attributes, + [('placeholder_1', 'placeholder_1_data'), + ('placeholder_1_data', 'sslice_1'), + ('sslice_1', 'sslice_1_data'), + ('placeholder_1_data', 'sslice_2'), + ('sslice_2', 'sslice_2_data'), + ], + {'placeholder_1_data': {'shape': np.array([1, 511])}, + + 'sslice_1': {'slices': np.array([slice(0, 1, 1), slice(0, 1, 1)]), + 'begin_mask': np.array([0, 1, 0]), + 'end_mask': np.array([0, 1, 0]), + 'new_axis_mask': np.array([0, 0, 0]), + 'shrink_axis_mask': np.array([0, 0, 0]), + 'ellipsis_mask': np.array([0, 0, 0])}, + 'sslice_1_data': {'shape': np.array([1, 1, 511])}, + + 'sslice_2': {'slices': np.array([slice(0, 1, 1), slice(0, 1, 1)]), + 'begin_mask': np.array([0, 1, 0]), + 'end_mask': np.array([0, 1, 0]), + 'new_axis_mask': np.array([0, 0, 0]), + 'shrink_axis_mask': np.array([0, 0, 0]), + 'ellipsis_mask': np.array([0, 0, 0])}, + 'sslice_2_data': {'shape': np.array([1, 1, 511])}, + }) + graph.graph['layout'] = 'NHWC' + + graph_ref = graph.copy() + + pattern = ConvertGroupedStridedSlice() + pattern.find_and_replace_pattern(graph) + + (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_1_data', check_op_attrs=True) + self.assertTrue(flag, resp) + (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True) + self.assertTrue(flag, resp) + class AddReshapeAfterStridedSliceTests(unittest.TestCase): def test_ss_1_shrink_last(self): From 2e164b4ddc70d61c295d2342360a6f05f2efe813 Mon Sep 17 00:00:00 2001 From: Anastasia Popova Date: Sat, 19 Feb 2022 02:47:01 +0300 Subject: [PATCH 002/310] AvgPool3D translator, fix of MaxPool translator in TF FE (#10530) * Fixed MaxPool translator, added AvgPool3D translator. * Update src/frontends/tensorflow/src/op/avg_pool.cpp Co-authored-by: Maxim Vafin * Code style. Co-authored-by: Maxim Vafin --- src/frontends/tensorflow/src/op/avg_pool.cpp | 22 +++++++++++++------- src/frontends/tensorflow/src/op/max_pool.cpp | 2 +- src/frontends/tensorflow/src/op_table.cpp | 1 + 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/frontends/tensorflow/src/op/avg_pool.cpp b/src/frontends/tensorflow/src/op/avg_pool.cpp index f44b5748b50..86849a42b3c 100644 --- a/src/frontends/tensorflow/src/op/avg_pool.cpp +++ b/src/frontends/tensorflow/src/op/avg_pool.cpp @@ -21,15 +21,21 @@ OutputVector translate_avg_pool_op(const NodeContext& node) { auto tf_padding_type = node.get_attribute("padding"); auto tf_data_format = node.get_attribute("data_format"); - TENSORFLOW_OP_VALIDATION(node, - tf_data_format == "NHWC" || tf_data_format == "NCHW", - "AvgPool data format is neither NHWC nor NCHW"); + TENSORFLOW_OP_VALIDATION( + node, + tf_data_format == "NHWC" || tf_data_format == "NCHW" || tf_data_format == "NDHWC" || tf_data_format == "NCDHW", + "AvgPool data format is neither NHWC (NDHWC) nor NCHW (NCDHW)"); - bool is_nhwc = (tf_data_format == "NHWC"); + bool is_nhwc = (tf_data_format == "NHWC") || (tf_data_format == "NDHWC"); - Strides ng_strides(2); - Shape ng_image_shape(2); - Shape ng_kernel_shape(2); + int N = 2; + if (node.get_op_type() == "AvgPool3D") { + N = 3; + } + + Strides ng_strides(N); + Shape ng_image_shape(N); + Shape ng_kernel_shape(N); convert_nhwc_to_hw(is_nhwc, tf_strides, ng_strides); convert_nhwc_to_hw(is_nhwc, ng_input.get_shape(), ng_image_shape); convert_nhwc_to_hw(is_nhwc, tf_ksize, ng_kernel_shape); @@ -37,7 +43,7 @@ OutputVector translate_avg_pool_op(const NodeContext& node) { CoordinateDiff padding_below; CoordinateDiff padding_above; - Shape ng_dilations{1, 1}; + Shape ng_dilations(N, 1); make_padding(tf_padding_type, ng_image_shape, ng_kernel_shape, diff --git a/src/frontends/tensorflow/src/op/max_pool.cpp b/src/frontends/tensorflow/src/op/max_pool.cpp index 53873b29444..3e00cc36e7c 100644 --- a/src/frontends/tensorflow/src/op/max_pool.cpp +++ b/src/frontends/tensorflow/src/op/max_pool.cpp @@ -25,7 +25,7 @@ OutputVector translate_max_pool_op(const NodeContext& node) { bool is_nhwc = (tf_data_format == "NHWC") || (tf_data_format == "NDHWC"); int N = 2; - if (node.get_name() == "MaxPool3D") { + if (node.get_op_type() == "MaxPool3D") { N = 3; } Strides ng_strides(N); diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 9ab49e012f9..aeeb0983278 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -157,6 +157,7 @@ const std::map get_supported_ops() { {"ArgMax", translate_arg_max_op}, {"ArgMin", translate_arg_min_op}, {"AvgPool", translate_avg_pool_op}, + {"AvgPool3D", translate_avg_pool_op}, {"BatchToSpaceND", translate_batch_nd_and_space_nd_op}, {"BiasAdd", translate_bias_add_op}, {"Cast", translate_cast_op}, From 71fdcdf899a8669a9d8e8953c59c6e73532bbd2d Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Sat, 19 Feb 2022 02:52:48 +0300 Subject: [PATCH 003/310] Fix Unpack translator in TF FE (#10494) * Fix Unpack translator in TF FE * Apply review feedback --- src/frontends/tensorflow/src/op/unpack.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/frontends/tensorflow/src/op/unpack.cpp b/src/frontends/tensorflow/src/op/unpack.cpp index 9e1de80962b..d5661148751 100644 --- a/src/frontends/tensorflow/src/op/unpack.cpp +++ b/src/frontends/tensorflow/src/op/unpack.cpp @@ -19,9 +19,17 @@ OutputVector translate_unpack_op(const NodeContext& node) { auto num = node.get_attribute("num"); auto axis_const = make_shared(element::i64, Shape{}, axis); - auto res = make_shared(input, axis_const, num); - set_node_name(node.get_name(), res); - return res->outputs(); + auto split = make_shared(input, axis_const, num); + OutputVector res; + int idx = 0; + for (auto out : split->outputs()) { + auto squeezed_res = make_shared(out, axis_const); + squeezed_res->set_friendly_name(node.get_name() + "/squeeze_" + to_string(idx)); + set_out_name(node.get_name() + ":" + std::to_string(idx), squeezed_res->output(0)); + ++idx; + res.push_back(squeezed_res); + } + return res; } } // namespace op } // namespace tensorflow From 661002689f475a8e86b4418b4fc6e9a85cec98fb Mon Sep 17 00:00:00 2001 From: Alexey Lebedev Date: Sat, 19 Feb 2022 05:58:51 +0300 Subject: [PATCH 004/310] latency mode is default for sync (#10521) --- tools/benchmark_tool/openvino/tools/benchmark/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py index 30df72aece3..549fcfcae16 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -88,9 +88,9 @@ def run(args): logger.warning(f"No device {device} performance hint is set.") args.perf_hint = "" else: + args.perf_hint = "THROUGHPUT" if benchmark.api_type == "async" else "LATENCY" logger.warning(f"PerformanceMode was not explicitly specified in command line. " + - f"Device {device} performance hint will be set to THROUGHPUT.") - args.perf_hint = "throughput" + f"Device {device} performance hint will be set to " + args.perf_hint + ".") else: logger.warning(f"Device {device} does not support performance hint property(-hint).") From af62ff22b14c92344186594e19722bfde253570a Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Sat, 19 Feb 2022 21:55:15 +0900 Subject: [PATCH 005/310] [GPU] Mixed precision fix for mask rcnn (#10467) (#10535) * Select proper layout for fp16-int8 mixed precision network * Set proper layout in layout propagation for mixed precision --- .../intel_gpu/src/graph/convolution.cpp | 21 ++++++- .../remove_redundant_reorders.cpp | 20 +++++++ .../graph/graph_optimizer/reorder_inputs.cpp | 55 +++++++++++-------- .../test_cases/concatenation_gpu_test.cpp | 5 ++ 4 files changed, 75 insertions(+), 26 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/convolution.cpp b/src/plugins/intel_gpu/src/graph/convolution.cpp index 9105e987eb9..208b1c45725 100644 --- a/src/plugins/intel_gpu/src/graph/convolution.cpp +++ b/src/plugins/intel_gpu/src/graph/convolution.cpp @@ -165,6 +165,22 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) { input_layout.data_padding}; } + // Adjust output format for mixed precision case in onednn + auto out_fmt = input_layout.format; + if (node.get_preferred_impl_type() == impl_types::onednn) { + if (data_type_traits::is_i8_u8(output_type)) { + if (input_layout.format == format::b_fs_yx_fsv16) + out_fmt = format::b_fs_yx_fsv32; + else if (input_layout.format == format::bs_fs_yx_bsv32_fsv16) + out_fmt = format::bs_fs_yx_bsv32_fsv32; + } else if (data_type_traits::is_floating_point(output_type)) { + if (input_layout.format == format::b_fs_yx_fsv32) + out_fmt = format::b_fs_yx_fsv16; + else if (input_layout.format == format::bs_fs_yx_bsv32_fsv32) + out_fmt = format::bs_fs_yx_bsv32_fsv16; + } + } + // get output feature map from weights. It should be the same as number of biases. Will be verifed in // convolution::create() auto group = desc->groups; @@ -208,7 +224,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) { return {output_type, format::b_fs_yx_32fp, output_size}; } - return {output_type, input_layout.format, output_size}; + return {output_type, out_fmt, output_size}; } auto output_range = calc_sliding_window_output_range(input_layout.size, @@ -231,8 +247,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) { if (output_type == data_types::bin) { return {output_type, format::b_fs_yx_32fp, output_size}; } - - return {output_type, input_layout.format, output_size}; + return {output_type, out_fmt, output_size}; } std::string convolution_inst::to_string(convolution_node const& node) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index 00729427a12..eb6b8ca1ea6 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -16,15 +16,21 @@ #include "permute_inst.h" #include "depth_to_space_inst.h" #include "region_yolo_inst.h" +#include "intel_gpu/runtime/debug_configuration.hpp" using namespace cldnn; +#define LOG_NODE_REMOVAL(id) GPU_DEBUG_IF(debug_config->verbose >= 2) { \ + GPU_DEBUG_COUT << "[remove_redundant_reorders:" << __LINE__ << "] " << "Remove node: " << (id) << std::endl; } + + remove_redundant_reorders::remove_redundant_reorders(layout_optimizer& lo_ref, bool enable_reorder_fusing, bool update_implementations, bool remove_output_reorders) : base_pass("remove_redundant_reorders"), lo(lo_ref), enable_reorder_fusing(enable_reorder_fusing), update_implementations(update_implementations), remove_output_reorders(remove_output_reorders) {} void remove_redundant_reorders::run(program& p) { + GPU_DEBUG_GET_INSTANCE(debug_config); auto update_implementation = [&](program_node& node) { if (!update_implementations) return; @@ -113,6 +119,7 @@ void remove_redundant_reorders::run(program& p) { } node.can_be_optimized(true); + LOG_NODE_REMOVAL(node.id()); p.extract_and_remove(node); for (auto rl : recalc_list) { @@ -168,6 +175,7 @@ void remove_redundant_reorders::run(program& p) { dep_prim->output_format = output_layout.format; dep_prim->output_data_type = output_layout.data_type; + LOG_NODE_REMOVAL(r_node.id()); r_node.can_be_optimized(true); p.add_optimized_primitive_info(r_node.id()); p.extract_and_remove(r_node); @@ -246,6 +254,8 @@ void remove_redundant_reorders::run(program& p) { } else { p.add_optimized_primitive_info(r_node.get_primitive()->id); } + + LOG_NODE_REMOVAL(r_node.id()); p.extract_and_remove( r_node); // try to remove if possible (with respect to r_node not being marked as output) } @@ -292,6 +302,8 @@ void remove_redundant_reorders::run(program& p) { // pointing to, we should increment it again if (remove_reorder_node == *itr) itr++; + + LOG_NODE_REMOVAL(remove_reorder_node->id()); p.replace_all_usages(*remove_reorder_node, *node); p.add_optimized_primitive_info(remove_reorder_node->id()); p.remove_all_connections(*remove_reorder_node); @@ -336,6 +348,8 @@ void remove_redundant_reorders::run(program& p) { if (input.type()->does_possible_implementation_exist(input)) { node.can_be_optimized(true); p.add_optimized_primitive_info(node.id()); + + LOG_NODE_REMOVAL(node.id()); p.extract_and_remove(node); } else { input.set_output_layout(old_output_layout_of_input, false); @@ -363,6 +377,8 @@ void remove_redundant_reorders::run(program& p) { continue; dep.merge_output_padding(node.get_output_layout().data_padding); + + LOG_NODE_REMOVAL(node.id()); p.replace_all_usages(node, dep); p.add_optimized_primitive_info(node.id()); p.remove_all_connections(node); @@ -394,6 +410,7 @@ void remove_redundant_reorders::run(program& p) { return false; dep.merge_output_padding(node->get_output_layout().data_padding); + LOG_NODE_REMOVAL(node->id()); p.replace_all_usages(*node, dep); p.get_processing_order().erase(node); p.add_optimized_primitive_info(node->id()); @@ -455,6 +472,7 @@ void remove_redundant_reorders::run(program& p) { node->set_input_layout(local_desc.input_layout); // remove reorder node + LOG_NODE_REMOVAL(node->id()); node->can_be_optimized(true); p.add_optimized_primitive_info(node->id()); p.extract_and_remove(*node); @@ -522,12 +540,14 @@ void remove_redundant_reorders::run(program& p) { reshape_node.get_fused_activations_funcs().empty() && reshape_node.get_fused_primitives().empty(); if (remove_dep) { + LOG_NODE_REMOVAL(reshape_input_node.id()); reshape_input_node.can_be_optimized(true); p.add_optimized_primitive_info(reshape_input_node.id()); p.extract_and_remove(reshape_input_node); } if (remove_current) { + LOG_NODE_REMOVAL(reshape_node.id()); reshape_node.can_be_optimized(true); p.add_optimized_primitive_info(reshape_node.id()); p.extract_and_remove(reshape_node); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 779eab64e39..13421c69130 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -20,6 +20,7 @@ #include #include #include +#include using namespace cldnn; @@ -562,7 +563,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) } }; - const auto reorder_convolution = [&p, &lo, &rf](typed_program_node& conv_node) { + const auto reorder_convolution = [&p, &lo, &rf, &debug_config](typed_program_node& conv_node) { { // reorder weights convolution auto& weights = conv_node.weights(); @@ -602,35 +603,43 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) conv_node.get_dependencies().front()->set_output_layout(new_layout, false); } - std::vector wrong_format = {format::b_fs_yx_fsv16, format::bs_fs_yx_bsv32_fsv16}; - std::vector correct_format = {format::b_fs_yx_fsv32, format::bs_fs_yx_bsv32_fsv32}; - for (int i = 0; i < wrong_format.size(); i++) { - // reorder for onednn mixed-precision conv - // If the layouts are like below, change input layout to fsv32. - // From: - // (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16) - // To: - // (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16) - // - // Do not apply such change for b=1 first conv - + // reorder for onednn mixed-precision conv + // If the layouts are like below, change input layout to fsv32. + // From: + // (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16) + // To: + // (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16) + // + // Do not apply such change for b=1 first conv + enum class __data_type {i8_u8, floating_point}; + // Errata for mixed precision in onednn + // data_type, wrong_format, correct_format + std::vector> errata = { + {__data_type::i8_u8, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32}, + {__data_type::i8_u8, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32}, + {__data_type::floating_point, format::b_fs_yx_fsv32, format::b_fs_yx_fsv16}, + {__data_type::floating_point, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv16}}; + for (auto &e : errata) { auto prev_node = conv_node.get_dependencies().front(); - auto old_layout = prev_node->get_output_layout(); + auto prev_layout = prev_node->get_output_layout(); auto conv_layout = conv_node.get_output_layout(); + auto is_target_dt_in_errata = (std::get<0>(e) == __data_type::i8_u8 && data_type_traits::is_i8_u8(prev_layout.data_type)) || + (std::get<0>(e) == __data_type::floating_point && data_type_traits::is_floating_point(prev_layout.data_type)); + auto wrong_format = std::get<1>(e); + auto correct_format = std::get<2>(e); if (lo.get_optimization_attributes().use_onednn_impls - && conv_layout.format == wrong_format[i] - && data_type_traits::is_i8_u8(old_layout.data_type) - && (old_layout.format == wrong_format[i]) - && !(old_layout.size.batch[0] == 1 && old_layout.size.feature[0] <= 4)) { - auto new_layout = old_layout; - new_layout.format = correct_format[i]; + && is_target_dt_in_errata + && conv_layout.format == wrong_format + && prev_layout.format == wrong_format + && !(prev_layout.size.batch[0] == 1 && prev_layout.size.feature[0] <= 4)) { + auto new_layout = prev_layout; + new_layout.format = correct_format; auto new_input = rf.get_reorder(prev_node->id(), - old_layout, + prev_layout, new_layout); - if (new_input.first) { + if (new_input.first) p.add_intermediate(new_input.first, conv_node, 0, !new_input.second); - } // Prevent layout propagation as we are using mixed precision for conv conv_node.get_dependencies().front()->set_output_layout(new_layout, false); diff --git a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp index 84da3f44b90..34b18e1828f 100644 --- a/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/concatenation_gpu_test.cpp @@ -1107,6 +1107,11 @@ public: } void test(format::type fmt) { + auto& engine = get_onednn_test_engine(); + if (!engine.get_device_info().supports_immad) { + // This case is only for device that uses onednn. + return; + } auto input = generate_input(); // implicit concat From 5671ca2cf58f84c42fdc9b10d5164f58fe69a6ac Mon Sep 17 00:00:00 2001 From: Alexey Lebedev Date: Sat, 19 Feb 2022 20:19:28 +0300 Subject: [PATCH 006/310] add test (#10531) --- .../test_infer_request.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/bindings/python/tests/test_inference_engine/test_infer_request.py b/src/bindings/python/tests/test_inference_engine/test_infer_request.py index abe51c294b6..4119637c04e 100644 --- a/src/bindings/python/tests/test_inference_engine/test_infer_request.py +++ b/src/bindings/python/tests/test_inference_engine/test_infer_request.py @@ -10,7 +10,7 @@ import time import openvino.runtime.opset8 as ops from openvino.runtime import Core, AsyncInferQueue, Tensor, ProfilingInfo, Model -from openvino.runtime import Type, Shape, Layout +from openvino.runtime import Type, PartialShape, Shape, Layout from openvino.preprocess import PrePostProcessor from ..conftest import model_path, read_image @@ -656,3 +656,20 @@ def test_invalid_inputs_container(device): with pytest.raises(TypeError) as e: request.infer(inputs) assert "Inputs should be either list or dict! Current type:" in str(e.value) + + +def test_infer_dynamic_model(device): + core = Core() + param = ops.parameter(PartialShape([-1, -1])) + model = Model(ops.relu(param), [param]) + compiled = core.compile_model(model, device) + assert compiled.input().partial_shape.is_dynamic + request = compiled.create_infer_request() + + shape1 = [1, 28] + request.infer([np.random.normal(size=shape1)]) + assert request.get_input_tensor().shape == Shape(shape1) + + shape2 = [1, 32] + request.infer([np.random.normal(size=shape2)]) + assert request.get_input_tensor().shape == Shape(shape2) From 5c7be8543540a86b0525ca05e563127f11420e19 Mon Sep 17 00:00:00 2001 From: Alexander Kozlov Date: Sun, 20 Feb 2022 09:43:14 +0300 Subject: [PATCH 007/310] [POT] Documentation update (#10068) * Updated main README * Added saturation fix desciption * Changed Low-precision model representation document * Added Simplified mode desciption. Updated DefaultQuantization, AccuracyAware, API descriptions. * Added Data-free model description. Adjusted other Readmes accordingly * Revised Configuration file description * Revised AA method description * Changed Quantization readme * Cross-links in quantization methods * Fixed reference * Fixed the structure * Removed data-free * Update tools/pot/docs/CLI.md Co-authored-by: Nikita Malinin * Update tools/pot/openvino/tools/pot/api/README.md Co-authored-by: Nikita Malinin * Applied comments * Fixed comments * Applied more comment * Applied comments * Fixed build errors * Fixed build errors * Small changes * Fixed a typo Co-authored-by: Nikita Malinin --- tools/pot/README.md | 74 +++++--- tools/pot/configs/README.md | 33 ++-- tools/pot/docs/BestPractices.md | 2 +- tools/pot/docs/CLI.md | 18 +- tools/pot/docs/ModelRepresentation.md | 29 ++-- tools/pot/docs/SaturationIssue.md | 38 ++++ tools/pot/docs/SimplifiedMode.md | 31 ++++ tools/pot/docs/images/api.png | 3 + tools/pot/docs/images/model_flow.png | 3 + tools/pot/docs/images/use_cases.png | 3 + tools/pot/docs/images/workflow_simple.png | 3 + tools/pot/docs/pot_docs.xml | 11 +- .../pot/algorithms/quantization/README.md | 152 ++-------------- .../quantization/accuracy_aware/README.md | 41 ++--- .../algorithms/quantization/default/README.md | 103 +++-------- .../algorithms/quantization/ranger/README.md | 6 +- tools/pot/openvino/tools/pot/api/README.md | 162 +++++++----------- 17 files changed, 292 insertions(+), 420 deletions(-) create mode 100644 tools/pot/docs/SaturationIssue.md create mode 100644 tools/pot/docs/SimplifiedMode.md create mode 100644 tools/pot/docs/images/api.png create mode 100644 tools/pot/docs/images/model_flow.png create mode 100644 tools/pot/docs/images/use_cases.png create mode 100644 tools/pot/docs/images/workflow_simple.png diff --git a/tools/pot/README.md b/tools/pot/README.md index 45728f7ee1a..aeed38b3732 100644 --- a/tools/pot/README.md +++ b/tools/pot/README.md @@ -13,6 +13,7 @@ Command-line Interface pot_compression_api_README pot_configs_README + Deep neural network protection pot_docs_FrequentlyAskedQuestions @endsphinxdirective @@ -20,50 +21,71 @@ ## Introduction Post-training Optimization Tool (POT) is designed to accelerate the inference of deep learning models by applying -special methods without model retraining or fine-tuning, like post-training quantization. Therefore, the tool does not +special methods without model retraining or fine-tuning, for example, post-training 8-bit quantization. Therefore, the tool does not require a training dataset or a pipeline. To apply post-training algorithms from the POT, you need: * A floating-point precision model, FP32 or FP16, converted into the OpenVINO™ Intermediate Representation (IR) format and run on CPU with the OpenVINO™. -* A representative calibration dataset representing a use case scenario, for example, 300 images. +* A representative calibration dataset representing a use case scenario, for example, 300 images. -Post-training Optimization Tool provides the following key -features: +Figure below shows the optimization workflow: +![](docs/images/workflow_simple.png) + +### Features * Two post-training 8-bit quantization algorithms: fast [DefaultQuantization](openvino/tools/pot/algorithms/quantization/default/README.md) and precise [AccuracyAwareQuantization](openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md). * Compression for different hardware targets such as CPU and GPU. * Multiple domains: Computer Vision, Natural Language Processing, Recommendation Systems, Speech Recognition. +* [Command-line tool](docs/CLI.md) that provides a simple interface for basic use cases. * [API](openvino/tools/pot/api/README.md) that helps to apply optimization methods within a custom inference script written with OpenVINO Python* API. -* Symmetric and asymmetric quantization schemes. For details, see the [Quantization](openvino/tools/pot/algorithms/quantization/README.md) section. -* Per-channel quantization for Convolutional and Fully-Connected layers. - -The tool is aimed to fully automate the model transformation process without a need to change the model on the user's side. For details about -the low-precision flow in OpenVINO™, see the [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md). +* (Experimental) [Ranger algorithm](@ref pot_ranger_README) for model prodection in safity-critical cases. For benchmarking results collected for the models optimized with POT tool, see [INT8 vs FP32 Comparison on Select Networks and Platforms](@ref openvino_docs_performance_int8_vs_fp32). -POT is opensourced on GitHub as a part of [https://github.com/openvinotoolkit/openvino](https://github.com/openvinotoolkit/openvino). +POT is opensourced on GitHub as a part of OpenVINO and available at https://github.com/openvinotoolkit/openvino/tools/pot. -Further documentation presumes that you are familiar with the basic Deep Learning concepts, such as model inference, -dataset preparation, model optimization, as well as with the OpenVINO™ toolkit and its components such -as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) +Further documentation presumes that you are familiar with basic Deep Learning concepts, such as model inference, +dataset preparation, model optimization, as well as with the OpenVINO™ toolkit and its components, such as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) and [Accuracy Checker Tool](@ref omz_tools_accuracy_checker). -## Use POT -![](docs/images/workflow.png) +## Get started -The POT provides three basic usage scenarios: -* **[Command-line interface](docs/CLI.md)**: this is the recommended path if the model from OpenVINO™ -[Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or there is a valid [Accuracy Checker Tool](@ref omz_tools_accuracy_checker) -configuration file for the model that allows validating model accuracy using [Accuracy Checker Tool](@ref omz_tools_accuracy_checker). -* **[Python* API](openvino/tools/pot/api/README.md)**: it allows integrating optimization methods implemented in POT into -a Python* inference script written with [Python* API](ie_python_api/api.html). -This flow is recommended if it is not possible to use [Accuracy Checker Tool](@ref omz_tools_accuracy_checker) -for validation on the dedicated dataset. -* **[Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench)**: a web-based graphical environment that enables you to optimize, fine-tune, analyze, visualize, and compare performance of deep learning models. +### Installation +To install POT, follow the [Installation Guide](docs/InstallationGuide.md). -> **NOTE**: POT also supports optimization in the so-called *Simplified mode* (see [Configuration File Description](configs/README.md)) which is essentially a local implementation of the POT Python API aimed at quantizing Computer Vision with simple pre-processing and inference flow. However using this mode can lead to an inaccurate model after optimization due to the difference in the model preprocessing. +### Usage options + +![](docs/images/use_cases.png) + +The POT provides three basic usage options: +* **Command-line interface (CLI)**: + * [**Simplified mode**](@ref pot_docs_simplified_mode): use this option if the model belongs to the Computer Vision domain and you do have an unannotated dataset for optimization. Note that this optimization method can cause a deviation of model accuracy. + * [**Model Zoo flow**](@ref pot_compression_cli_README): this option is recommended if the model is imported from OpenVINO™ +[Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or there is a valid [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README) +configuration file for the model that allows validating model accuracy using [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README). +* [**Python\* API**](@ref pot_compression_api_README): this option allows integrating the optimization methods implemented in POT into +a Python* inference script that uses [OpenVINO Python* API](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_docs_api_overview.html). + + +POT is also integrated into [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench), a web-based graphical environment +that enables you to optimize, tune, analyze, visualize, and compare performance of deep learning models. + +### Examples + +OpenVINO provides several examples to demonstrate the POT optimization workflow: + +* Command-line example: + * [Quantization of Image Classification model](https://docs.openvino.ai/latest/pot_configs_examples_README.html) +* API tutorials: + * [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) + * [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization) + * [Quantization of Segmentation model for medical data](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize) + * [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert) +* API examples: + * [Quantization of 3D segmentation model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation) + * [Quantization of Face Detection model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection) + * [Quantization of Object Detection model with controable accuracy](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection) + * [Speech example for GNA device](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/speech) -To get started with POT, follow the [Installation Guide](docs/InstallationGuide.md). ## See Also diff --git a/tools/pot/configs/README.md b/tools/pot/configs/README.md index dfa4d284129..7e24a55049c 100644 --- a/tools/pot/configs/README.md +++ b/tools/pot/configs/README.md @@ -1,9 +1,4 @@ # Configuration File Description {#pot_configs_README} - -In the instructions below, the Post-training Optimization Tool directory `/deployment_tools/tools/post_training_optimization_toolkit` is referred to as ``. `` is the directory where Intel® Distribution of OpenVINO™ toolkit is installed. -> **NOTE**: Installation directory is different in the case of PyPI installation and does not contain examples of -> configuration files. - The tool is designed to work with the configuration file where all the parameters required for the optimization are specified. These parameters are organized as a dictionary and stored in a JSON file. JSON file allows using comments that are supported by the `jstyleson` Python* package. Logically all parameters are divided into three groups: @@ -34,21 +29,18 @@ This section contains only three parameters: "config": "./configs/examples/accuracy_checker/mobilenet_v2.yaml" } ``` -The main parameter is `"type"` which can take two possible options: `"accuracy_checher"` (default) and `"simplified"`, -which specify the engine that is used for model inference and validation (if supported): -- **Simplified mode** engine. This engine can be used only with `DefaultQuantization` algorithm to get fully quantized model -using a subset of images. It does not use the Accuracy Checker tool and annotation. To measure accuracy, you should implement -your own validation pipeline with OpenVINO API. - - To run the simplified mode, define engine section similar to the example `mobilenetV2_tf_int8_simple_mode.json` file from the `/configs/examples/quantization/classification/` directory. -- **Accuracy Checker** engine. It relies on the [Deep Learning Accuracy Validation Framework](@ref omz_tools_accuracy_checker) (Accuracy Checker) when inferencing DL models and working with datasets. -The benefit of this mode is you can compute accuracy in case you have annotations. It is possible to use accuracy aware -algorithms family when this mode is selected. -There are two options to define engine parameters in that mode: +The main parameter is `"type"` which can take two possible options: `"accuracy_checher"` (default) or `"simplified"`. It specifies the engine used for model inference and validation (if supported): +- **Simplified mode** engines. These engines can be used only with `DefaultQuantization` algorithm to get a fully quantized model. They do not use the Accuracy Checker tool and annotation. In the case, of this mode the following parameters are applicable: + - `"data_source"` Specifies the path to the directory​ where to calibration data is stored. + - `"layout"` - (Optional) Layout of input data. Supported values: [`"NCHW"`, `"NHWC"`, `"CHW"`, `"CWH"`]​. +- **Accuracy Checker** engine. It relies on the [Deep Learning Accuracy Validation Framework](@ref omz_tools_accuracy_checker_README) (Accuracy Checker) when inferencing DL models and working with datasets. +The benefit of this mode is you can compute accuracy in case you have annotations. When this mode is selected, you can use the accuracy aware algorithms family. +There are two options to define engine parameters in this mode: - Refer to the existing Accuracy Checker configuration file which is represented by the YAML file. It can be a file used for full-precision model validation. In this case, you should define only the `"config"` parameter containing a path to the AccuracyChecker configuration file. - Define all the [required Accuracy Checker parameters](@ref omz_tools_accuracy_checker_dlsdk_launcher) directly in the JSON file. In this case, POT just passes the corresponding dictionary of parameters to the Accuracy Checker when instantiating it. For more details, refer to the corresponding Accuracy Checker information and examples of configuration files provided with the tool: - - For the SSD-MobileNet model:
`/configs/examples/quantization/object_detection/ssd_mobilenetv1_int8.json` + - 8-bit quantization of [SSD-MobileNet model](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples/quantization/object_detection/ssd_mobilenetv1_int8.json) ## Compression Parameters @@ -57,8 +49,11 @@ This section defines optimization algorithms and their parameters. For more deta ## Examples of the Configuration File -For a quick start, many examples of configuration files are provided and placed to the `/configs/examples` - folder. There you can find ready-to-use configurations for the models from various domains: Computer Vision (Image +For a quick start, many examples of configuration files are provided [here](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples). There you can find ready-to-use configurations for the models from various domains: Computer Vision (Image Classification, Object Detection, Segmentation), Natural Language Processing, Recommendation Systems. We basically put configuration files for the models which require non-default configuration settings in order to get accurate results. -For details on how to run the Post-Training Optimization Tool with a sample configuration file, see the [instructions](@ref pot_configs_examples_README). +For details on how to run the Post-Training Optimization Tool with a sample configuration file, see the [example](@ref pot_configs_examples_README). + +## See Also +* [Optimization with Simplified mode](@ref pot_docs_simplified_mode) +* [POT API](@ref pot_compression_api_README) diff --git a/tools/pot/docs/BestPractices.md b/tools/pot/docs/BestPractices.md index 970322f14df..b0906f96372 100644 --- a/tools/pot/docs/BestPractices.md +++ b/tools/pot/docs/BestPractices.md @@ -10,7 +10,7 @@ we suggest reading the following [POT documentation](../README.md). > floating-point model is a prerequisite for model optimization. > It is also worth mentioning that in the case of 8-bit quantization it is recommended to run POT on the same CPU > architecture when optimizing for CPU or VNNI-based CPU when quantizing for a non-CPU device, such as GPU, VPU, or GNA. -> It should help to avoid the impact of the saturation issue that occurs on AVX and SSE based CPU devices. +> It should help to avoid the impact of the [saturation issue](@ref pot_saturation_issue) that occurs on AVX and SSE based CPU devices. ## Get Started with Post-Training Quantization diff --git a/tools/pot/docs/CLI.md b/tools/pot/docs/CLI.md index f4b0111ece2..184e3f03699 100644 --- a/tools/pot/docs/CLI.md +++ b/tools/pot/docs/CLI.md @@ -1,4 +1,4 @@ -# Use Post-Training Optimization Tool Command-Line Interface {#pot_compression_cli_README} +# Use Post-Training Optimization Tool Command-Line Interface (Model Zoo flow){#pot_compression_cli_README} @sphinxdirective @@ -6,6 +6,7 @@ :maxdepth: 1 :hidden: + Simplified mode End-to-end CLI example @endsphinxdirective @@ -16,7 +17,7 @@ its models then you can employ POT CLI to optimize your model. In other cases, you should consider using POT [API](@ref pot_compression_api_README). To start with POT CLI please refer to the following [example](@ref pot_configs_examples_README). -Note: There is also the so-called [**Simplified mode**](@ref pot_configs_README) that is basically aimed at INT8 quantization if the model is from the Computer Vision domain and has a simple dataset preprocessing, like image resize and crop. In this case, you can also use POT CLI for +Note: There is also the so-called [**Simplified mode**](@ref pot_docs_simplified_mode) that is basically aimed at INT8 quantization if the model is from the Computer Vision domain and has a simple dataset preprocessing, like image resize and crop. In this case, you can also use POT CLI for optimization. However, the accuracy results are not guaranteed in this case. Moreover, you are also limited in the optimization methods choice since the accuracy measurement is not available. @@ -29,12 +30,6 @@ optimization methods choice since the accuracy measurement is not available. 3. Prepare the Accuracy Checker configuration file and make sure that the model can be successfully inferred and achieves similar accuracy numbers as the reference model from the original framework. 4. Activate the Python environment in the command-line shell where the POT and the Accuracy Checker were installed. -5. (Optional). Set up the OpenVINO™ environment in the command-line shell with the following script if you -installed it from form the distribution file: - ```sh - source /bin/setupvars.sh - ``` - > **NOTE**: This step is not required if you use PyPI distribution. ## Run POT CLI There are two ways how to run POT via command line: @@ -68,7 +63,9 @@ The following command-line options are available to run the tool: | `--preset` | Use `performance` for fully symmetric quantization or `mixed` preset for symmetric quantization of weight and asymmetric quantization of activations. Applicable only when `-q` option is used.| | `-m`, `--model` | Path to the optimizing model file (.xml). Applicable only when `-q` option is used. | | `-w`, `--weights` | Path to the weights file of the optimizing model (.bin). Applicable only when `-q` option is used. | -| `-n`, `--name` | Model name. Applicable only when `-q` option is used. | +| `-n`, `--name` | Optional. Model name. Applicable only when `-q` option is used. | +| `--engine {accuracy_checker, simplified}` | Engine type used to specify CLI mode. Default: `accuracy_checker`. | +| `--data-source DATA_DIR` | Optional. Valid and required for Simplified mode only. Specifies the path to calibration data. | | `--ac-config` | Path to the Accuracy Checker configuration file. Applicable only when `-q` option is used. | | `--max-drop` | Optional. Maximum accuracy drop. Valid only for accuracy-aware quantization. Applicable only when `-q` option is used and `accuracy_aware` method is selected. | | `-c CONFIG`, `--config CONFIG` | Path to a config file with task- or model-specific parameters. | @@ -83,6 +80,5 @@ The following command-line options are available to run the tool: ## See Also - -* [Installation Guide](@ref pot_InstallationGuide) +* [Optimization with Simplified mode](@ref pot_docs_simplified_mode) * [Post-Training Optimization Best Practices](@ref pot_docs_BestPractices) diff --git a/tools/pot/docs/ModelRepresentation.md b/tools/pot/docs/ModelRepresentation.md index e99681612f5..a62a5b79652 100644 --- a/tools/pot/docs/ModelRepresentation.md +++ b/tools/pot/docs/ModelRepresentation.md @@ -1,29 +1,22 @@ -# Representation of Low-Precision Models +# Low-precision model representation {#pot_docs_model_representation} + +## Introduction The goal of this document is to describe how optimized models are represented in OpenVINO Intermediate Representation (IR) and provide guidance on interpretation rules for such models at runtime. -Currently, there are two groups of optimization methods that can influence on the IR after applying them to the full-precision model: +Currently, there are two groups of optimization methods that can change the IR after applying them to the full-precision model: - **Sparsity**. It is represented by zeros inside the weights and this is up to the hardware plugin how to interpret these zeros (use weights as is or apply special compression algorithms and sparse arithmetic). No additional mask is provided with the model. - **Quantization**. The rest of this document is dedicated to the representation of quantized models. ## Representation of quantized models -The OpenVINO Toolkit represents all the quantized models using the so-called [FakeQuantize](@ref openvino_docs_ops_quantization_FakeQuantize_1) operation. This operation is very expressive and allows mapping values from arbitrary input and output ranges. The whole idea behind that is quite simple: we project (discretize) the input values to the low-precision data type using affine transformation (with clamp and rounding) and then reproject discrete values back to the original range and data type. It can be considered as an emulation of the quantization process which happens at runtime. -In order to be able to execute a particular DL operation in low-precision all its inputs should be quantized i.e. should have FakeQuantize between operation and data blobs. The figure below shows an example of quantized Convolution which contains two FakeQuantize nodes: one for weights and one for activations (bias is quantized using the same parameters). +The OpenVINO Toolkit represents all the quantized models using the so-called [FakeQuantize](https://docs.openvino.ai/latest/openvino_docs_MO_DG_prepare_model_convert_model_Legacy_IR_Layers_Catalog_Spec.html#fakequantize-layer) operation. This operation is very expressive and allows mapping values from arbitrary input and output ranges. The whole idea behind that is quite simple: we project (discretize) the input values to the low-precision data type using affine transformation (with clamp and rounding) and then reproject discrete values back to the original range and data type. It can be considered as an emulation of the quantization/dequantization process which happens at runtime. The figure below shows a part of the DL model, namely the Convolutional layer, that undergoes various transformations on way from being a floating-point model to an integer model executed in the OpenVINO runtime. Column 2 of this figure below shows a model quantized with [Neural Network Compression Framework (NNCF)](https://github.com/openvinotoolkit/nncf). +![](images/model_flow.png) -![](./images/quantized_convolution.png) -
Figure 1. Example of quantized Convolution operation.

+To reduce memory footprint weights of quantized models are transformed to a target data type, e.g. in the case of 8-bit quantization, this is int8. During this transformation, the floating-point weights tensor and one of the FakeQuantize operations that correspond to it are replaced with 8-bit weight tensor and the sequence of Convert, Subtract, Multiply operations that represent the typecast and dequantization parameters (scale and zero-point) as it is shown in column 3 of the figure. -Starting from OpenVINO 2020.2 release all the quantized models are represented in the compressed form. It means that the weights of low-precision operations are converted into the target precision (e.g. INT8). It helps to substantially reduce the model size. The rest of the parameters can be represented by FLOAT32 or FLOAT16 precision depending on the input full-precision model used in the quantization process. Fig. 2 below shows an example of the part of the compressed IR. +## Interpreting FakeQuantize at runtime +At inference time, the quantized model undergoes the second set of transformations that allows interpreting floating-point operations with quantization rules as integer operations. OpenVINO Deep Learning Deployment Toolkit has a special component which is called Low-Precision Transformations (LPT) for that purpose. +At runtime each FakeQuantize can be split into two independent operations: **Quantize** and **Dequantize** (column 4). The former is aimed to transform the input data into the target precision while the latter transforms the resulting values back to the original range. *Dequantize* operations can be propagated forward through the linear layers, such as *Convolution* or *Fully-Connected*, and in some cases fused with the following *Quantize* operation for the next layer into the so-called *Requantize* operation (column 5). -![](./images/quantized_model_example.png) -
Figure 2. Example of compressed quantized model.
- -### Interpreting FakeQuantize at runtime -One important question that arises at inference time is how to correctly interpret quantized models and specifically FakeQuantize operations. OpenVINO Deep Learning Deployment Toolkit has a special component which is called Low-Precision Transformations (LPT). It is responsible for the translation of "fake-quantized" models into the models with low-precision operations. For more information about low-precision flow please refer to the following [document](https://docs.openvino.ai/latest/_docs_IE_DG_Int8Inference.html). Here we provide only a high-level overview of the interpretation rules of FakeQuantize operation. -At runtime each FakeQuantize can be split into two independent operations: **Quantize** and **Dequantize**. The former one is aimed to transform the input data into the target precision while the latter transforms the resulting values back to the original range and precision. In practice *Dequantize* operations can be propagated forward through the linear low-precision layers, such as *Convolution* or *Fully-Connected*, and in some cases fused with the following *Quantize* operation for the next layer into the so-called *Requantize* operation (see Fig. 3). - -![](./images/qdq_propagation.png) -
Figure 3. Quantization operations propagation at runtime. Q, DQ, RQ stand for Quantize, Dequantize, and Requantize correspondingly.

- -From the calculation standpoint, the FakeQuantize formula also is split into two parts accordingly: +From the computation standpoint, the FakeQuantize formula also is split into two parts accordingly: `output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low` The first part of this fomula represents *Quantize* operation: `q = round((x - input_low) / (input_high - input_low) * (levels-1))` diff --git a/tools/pot/docs/SaturationIssue.md b/tools/pot/docs/SaturationIssue.md new file mode 100644 index 00000000000..67821e7dbf6 --- /dev/null +++ b/tools/pot/docs/SaturationIssue.md @@ -0,0 +1,38 @@ +# Saturation (overflow) issue workaround {#pot_saturation_issue} + +## Introduction +8-bit instructions of previous generations of Intel® CPUs, namely that based on SSE, AVX-2, AVX-512 instruction sets, admit so-called saturation (overflow) of the intermediate buffer when calculating the dot product which is an essential part of Convolutional or MatMul operations. This saturation can lead to an accuracy drop on the aforementioned architectures during the inference of 8-bit quantized models. However, it is not possible to predict such degradation since most of the computations are executed in parallel during DL model inference which makes this process non-deterministic. This problem is typical for models with non-ReLU activation functions and a low level of redundancy, e.g. optimized or efficient models. It can prevent deploying the model on legacy HW or creating cross-platform applications. The problem does not occur on the CPUs with Intel Deep Learning Boost (VNNI) technology and further generations as well as GPUs. + +## How to detect +The only way to detect saturation issue is to run inference on the CPU that admits it and on the HW that does not have such a problem (e.g. VNNI-based CPU). If the accuracy difference is significant (e.g. more than 1%) this is the main indicator of the saturation issue impact. + +## Workaround +There is a workaround that helps fully address the saturation issue during the inference. The idea is to use only 7 bits to represent weights (of Convolutional or Fully-Connected layers) while quantizing activations using the full range of 8-bit data types. However, such a trick can lead to accuracy degradation itself due to the reduced representation of weights. On the other hand, using this trick for the first layer can help to mitigate the saturation issue for many models. + +POT tool provides three options to deal with the saturation issue which can be enabled in POT configuration file using the "saturation_fix" parameter: + +* (Default) Fix saturation issue for the first layer: "first_layer" option +* Apply for all layers in the model: "all" option +* Not apply saturation fix at all: "no" option + +Below is an example of the section in POT configuration file with the `saturation_fix` option: +```json +"algorithms": [ + { + "name": "DefaultQuantization", + "params": { + "preset": "performance", + "stat_subset_size": 300, + "saturation_fix": "all" // Apply the saturation fix to all the layers + } + } +] +``` +## Recommendations +If you observe the saturation issue we recommend trying the option "all" during the model quantization. If it does not help to improve the accuracy we recommend using [Quantization-aware training from NNCF](https://github.com/openvinotoolkit/nncf) and fine-tune the model. + +If you are not planning to use legacy CPU HW you can use the option "no" which can also lead to slightly better accuracy. + +## See Also +* [Lower Numerical Precision Deep Learning Inference and Training blogpost](https://www.intel.com/content/www/us/en/developer/articles/technical/lower-numerical-precision-deep-learning-inference-and-training.html) +* [Configuration file desciption](@ref pot_configs_README) \ No newline at end of file diff --git a/tools/pot/docs/SimplifiedMode.md b/tools/pot/docs/SimplifiedMode.md new file mode 100644 index 00000000000..9e2a74874df --- /dev/null +++ b/tools/pot/docs/SimplifiedMode.md @@ -0,0 +1,31 @@ +# Optimization with Simplified mode {#pot_docs_simplified_mode} + +## Introduction +Simplified mode is designed to simplify data preparation for the model optimization process. The mode is represented by an implementation of Engine interface from the POT API that allows reading data from an arbitrary folder specified by the user. For more details about POT API please refer to the corresponding [description](pot_compression_api_README). Currently, Simplified mode is available only for image data stored in a single folder in PNG or JPEG formats. + +Note: This mode cannot be used with accuracy-aware methods, i.e. there is no way to control accuracy after optimization. Nevertheless, this mode can be helpful to estimate performance benefits when using model optimizations. + +## Usage +To use Simplified mode you should prepare data and place them in a separate folder. No other files should be presented in this folder. There are two options to run POT in the Simplified mode: +* Using command-line options only. Here is an example for 8-bit quantization: + + `pot -q default -m -w --engine simplified --data-source ` +* To provide more options you can use the corresponding `"engine"` section in the POT configuration file as follows: + ```json + "engine": { + "type": "simplified", + "layout": "NCHW", // Layout of input data. Supported ["NCHW", + // "NHWC", "CHW", "CWH"] layout + "data_source": "PATH_TO_SOURCE" // You can specify path to directory with images + // Also you can specify template for file names to filter images to load. + // Templates are unix style (This option valid only in simplified mode) + } + ``` + + +A template of configuration file for 8-bit quantization using Simplified mode can be found [here](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/simplified_mode_template.json). + +For more details about how to use POT via CLI please refer to this [document](@ref pot_compression_cli_README). + +## See Also + * [Configuration File Description](@ref pot_configs_README) \ No newline at end of file diff --git a/tools/pot/docs/images/api.png b/tools/pot/docs/images/api.png new file mode 100644 index 00000000000..f049bade9b6 --- /dev/null +++ b/tools/pot/docs/images/api.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5bd3b61d61b7eecb51fa0e932bc8215659d8f5b92f96abba927d9d3f94f277 +size 38993 diff --git a/tools/pot/docs/images/model_flow.png b/tools/pot/docs/images/model_flow.png new file mode 100644 index 00000000000..efe7c338fb5 --- /dev/null +++ b/tools/pot/docs/images/model_flow.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5183c57dc825af40051782818d9bf40236bd6be8fbee3ae4e7a982000e4d6af8 +size 89875 diff --git a/tools/pot/docs/images/use_cases.png b/tools/pot/docs/images/use_cases.png new file mode 100644 index 00000000000..61a4b8952c1 --- /dev/null +++ b/tools/pot/docs/images/use_cases.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5650775fe986b294278186c12b91fadbb758e06783f500b9fd399e474eafe2c +size 34217 diff --git a/tools/pot/docs/images/workflow_simple.png b/tools/pot/docs/images/workflow_simple.png new file mode 100644 index 00000000000..54e92da0ecc --- /dev/null +++ b/tools/pot/docs/images/workflow_simple.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791f253493350d04c62e53f40b086fb73ceb1b96d346c9772e82de9892fee7a4 +size 33789 diff --git a/tools/pot/docs/pot_docs.xml b/tools/pot/docs/pot_docs.xml index 8970cf33571..54c2684b051 100644 --- a/tools/pot/docs/pot_docs.xml +++ b/tools/pot/docs/pot_docs.xml @@ -6,15 +6,13 @@ - - - - - - + + + + @@ -28,6 +26,7 @@ + diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/README.md index 7b353875c6c..f6ba05ea2df 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/README.md @@ -8,152 +8,38 @@ DefaultQuantization Algorithm AccuracyAwareQuantization Algorithm - TunableQuantization Algorithm + TunableQuantization algorithm + Saturation issue workaround + Low-precision model representation + @endsphinxdirective -The primary optimization feature of the Post-training Optimization Tool (POT) is uniform quantization. In general, -this method supports an arbitrary number of bits, greater or equal to two, which represents weights and activations. -During the quantization process, the method inserts [FakeQuantize](@ref openvino_docs_ops_quantization_FakeQuantize_1) -operations into the model graph automatically based on a predefined hardware target in order to produce the most -hardware-friendly optimized model: -![](../../../../../docs/images/convolution_quantization.png) +## Introduction -After that, different quantization algorithms can tune the `FakeQuantize` parameters or remove some of them in order to -meet the accuracy criteria. The resulting *fakequantized* models are interpreted and transformed to real low-precision -models during inference at the OpenVINO™ Inference Engine runtime giving real performance improvement. +The primary optimization feature of the Post-training Optimization Tool (POT) is 8-bit uniform quantization which allows substantially increasing inference performance on all the platforms that have 8-bit instructions, for example, modern generations of CPU and GPU. Another benefit of quantization is a significant reduction of model footprint which in most cases achieves 4x. + +During the quantization process, the POT tool runs inference of the optimizing model to estimate quantization parameters for input activations of the quantizable operation. It means that a calibration dataset is required to perform quantization. This dataset may have or not have annotation depending on the quantization algorithm that is used. ## Quantization Algorithms -Currently, the POT provides two algorithms for 8-bit quantization, which are verified and provide stable results on a +Currently, the POT provides two algorithms for 8-bit quantization, which are verified and guarantee stable results on a wide range of DNN models: -* **DefaultQuantization** is a default method that provides fast and in most cases accurate results for 8-bit - quantization. For details, see the [DefaultQuantization Algorithm](@ref pot_compression_algorithms_quantization_default_README) documentation. +* [**DefaultQuantization**](@ref pot_compression_algorithms_quantization_default_README) is a default method that provides fast and in most cases accurate results for 8-bit + quantization. It requires only a non-annotated dataset for quantization. For details, see the [DefaultQuantization Algorithm](@ref pot_compression_algorithms_quantization_default_README) documentation. -* **AccuracyAwareQuantization** enables remaining at a predefined range of accuracy drop after quantization at the cost - of performance improvement. It may require more time for quantization. For details, see the +* [**AccuracyAwareQuantization**](@ref pot_compression_algorithms_quantization_accuracy_aware_README) enables remaining at a predefined range of accuracy drop after quantization at the cost + of performance improvement. The method requires annotated representative dataset and may require more time for quantization. For details, see the [AccuracyAwareQuantization Algorithm](@ref pot_compression_algorithms_quantization_accuracy_aware_README) documentation. -## Quantization Formula +For more details about the representation of the low-precision model please refer to this [document](@ref pot_docs_model_representation). -Quantization is parametrized by clamping the range and the number of quantization levels: - -\f[ -output = \frac{\left\lfloor (clamp(input; input\_low, input\_high)-input\_low) *s\right \rceil}{s} + input\_low\\ -\f] - -\f[ -clamp(input; input\_low, input\_high) = min(max(input, input\_low), input\_high))) -\f] - -\f[ -s=\frac{levels-1}{input\_high - input\_low} -\f] - -In the formulas: -* `input_low` and `input_high` represent the quantization range -* \f[\left\lfloor\cdot\right \rceil\f] denotes rounding to the nearest integer - -The POT supports symmetric and asymmetric quantization of weights and activations, which are controlled by the `preset`. -The main difference between them is that in the symmetric mode the floating-point zero is mapped directly to the integer -zero, while in asymmetric the mode it can be an arbitrary integer number. In any mode, the floating-point zero is mapped -directly to the quant without rounding an error. See this [tutorial](@ref pot_docs_BestPractices) for details. - -Below is the detailed description of quantization formulas for both modes. These formulas are used both in the POT to -quantize weights of the model and in the OpenVINO™ Inference Engine runtime when quantizing activations during the -inference. - -#### Symmetric Quantization - -The formula is parametrized by the `scale` parameter that is tuned during the quantization process: - -\f[ -input\_low=scale*\frac{level\_low}{level\_high} -\f] - -\f[ -input\_high=scale -\f] +## See also +* [Optimization with Simplified mode](@ref pot_docs_simplified_mode) +* [Use POT Command-line for Model Zoo models](@ref pot_compression_cli_README) +* [POT API](@ref pot_compression_api_README) +* [Post-Training Optimization Best Practices](@ref pot_docs_BestPractices) -Where `level_low` and `level_high` represent the range of the discrete signal. -* For weights: - -\f[ -level\_low=-2^{bits-1}+1 -\f] - -\f[ -level\_high=2^{bits-1}-1 -\f] - -\f[ -levels=255 -\f] - -* For unsigned activations: - -\f[ -level\_low=0 -\f] - -\f[ -level\_high=2^{bits}-1 -\f] - -\f[ -levels=256 -\f] - -* For signed activations: - -\f[ -level\_low=-2^{bits-1} -\f] - -\f[ -level\_high=2^{bits-1}-1 -\f] -\f[ -levels=256 -\f] - -#### Asymmetric Quantization - -The quantization formula is parametrized by `input_low` and `input_range` that are tunable parameters: - -\f[ -input\_high=input\_low + input\_range -\f] - -\f[ -levels=256 -\f] - -For weights and activations the following quantization mode is applied: - -\f[ -{input\_low}' = min(input\_low, 0) -\f] - -\f[ -{input\_high}' = max(input\_high, 0) -\f] - -\f[ -ZP= \left\lfloor \frac{-{input\_low}'*(levels-1)}{{input\_high}'-{input\_low}'} \right \rceil -\f] - -\f[ -{input\_high}''=\frac{ZP-levels+1}{ZP}*{input\_low}' -\f] - -\f[ -{input\_low}''=\frac{ZP}{ZP-levels+1}*{input\_high}' -\f] - -\f[ -{input\_low,input\_high} = \begin{cases} {input\_low}',{input\_high}', & ZP \in $\{0,levels-1\}$ \\ {input\_low}',{input\_high}'', & {input\_high}'' - {input\_low}' > {input\_high}' - {input\_low}'' \\ {input\_low}'',{input\_high}', & {input\_high}'' - {input\_low}' <= {input\_high}' - {input\_low}''\\ \end{cases} -\f] diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md index 1c7e6bddea8..188b1ae403e 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/accuracy_aware/README.md @@ -1,9 +1,9 @@ # AccuracyAwareQuantization Algorithm {#pot_compression_algorithms_quantization_accuracy_aware_README} ## Overview -AccuracyAware algorithm is designed to perform accurate 8-bit quantization and allows the model to stay in the +AccuracyAware algorithm is designed to perform accurate quantization and allows the model to stay in the pre-defined range of accuracy drop, for example 1%, defined by the user in the configuration file. This may cause a -degradation in performance in comparison to [DefaultQuantization](../default/README.md) algorithm because some layers can be reverted back to the original precision. +degradation in performance in comparison to [DefaultQuantization](../default/README.md) algorithm because some layers can be reverted back to the original precision. The algorithm requires annotated dataset and cannot be used with the [Simplified mode](@ref pot_docs_simplified_mode). > **NOTE**: In case of GNA `target_device`, POT moves INT8 weights to INT16 to stay in the pre-defined range of the accuracy drop. Thus, the algorithm works for the `performance` (INT8) preset only. For the `accuracy` preset, this algorithm is not performed, but the parameters tuning is available (if `tune_hyperparams` option is enabled). @@ -55,27 +55,18 @@ Default value is `0.5`. to the floating-point precision. It can bring additional performance and accuracy boost but increase overall quantization time. Default value is `False`. - Below is a fragment of the configuration file that shows overall structure of parameters for this algorithm. +## Examples + +A template and full specification for AccuracyAwareQuantization algorithm can be found: + * [Template](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/accuracy_aware_quantization_template.json) + * [Full specification](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/accuracy_aware_quantization_spec.json) + +Example of using POT API with Accuracy-aware algorithm: + * [Quantization of Object Detection model with control of accuracy](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/object_detection) + + ## See also +* [Optimization with Simplified mode](@ref pot_docs_simplified_mode) +* [Use POT Command-line for Model Zoo models](@ref pot_compression_cli_README) +* [POT API](@ref pot_compression_api_README) +* [Post-Training Optimization Best Practices](@ref pot_docs_BestPractices) -``` -"name": "AccuracyAwareQuantization", // compression algorithm name - "params": { - "ranking_subset_size": 300, // A size of a subset which is used to rank layers by their contribution to the accuracy drop - "max_iter_num": 30, // Maximum number of iterations of the algorithm (maximum of layers that may be reverted back to full-precision) - "maximal_drop": 0.005, // Maximum accuracy drop which has to be achieved after the quantization - "drop_type": "absolute", // Drop type of the accuracy metric: relative or absolute (default) - "use_prev_if_drop_increase": false, // Whether to use NN snapshot from the previous algorithm iteration in case if drop increases - "base_algorithm": "DefaultQuantization", // Base algorithm that is used to quantize model at the beginning - "convert_to_mixed_preset": false, // Whether to convert the model to mixed mode if the accuracy criteria - // of the symmetrically quantized model are not satisfied - "metrics": [ // An optional list of metrics that are taken into account during optimization - // If not specified, all metrics defined in engine config are used - { - "name": "accuracy", // Metric name to optimize - "baseline_value": 0.72 // Baseline metric value of the original model - } - ], - "metric_subset_ratio": 0.5 // A part of the validation set that is used to compare element-wise full-precision and - // quantized models in case of predefined metric values of the original model - } -``` diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md index 3bcf2e9551c..376e59dc31f 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/default/README.md @@ -112,81 +112,30 @@ mode on the existing HW. - `"outlier_prob"` - outlier probability used in the "quantile" estimator - `"use_layerwise_tuning"` - enables layer-wise fine-tuning of model parameters (biases, Convolution/MatMul weights and FakeQuantize scales) by minimizing the mean squared error between original and quantized layer outputs. Enabling this option may increase compressed model accuracy, but will result in increased execution time and memory consumption. - - Below is a fragment of the configuration file that shows overall structure of parameters for this algorithm. -``` -"compression": { - "model_type": "None", // An optional parameter, needed for additional patterns in the model, - default value is None (supported only "Transformer" now) - "inplace_statistic": true, // An optional parameter, needed for change method collect statistics, - reduces the amount of memory consumed, but increases the calibration time - "algorithms": [ - "name": "DefaultQuantization", // optimization algorithm name - "params": { - /* Preset is a collection of optimization algorithm parameters that will specify to the algorithm - to improve which metric the algorithm needs to concentrate. Each optimization algorithm supports - [performance, mixed, accuracy] presets which control the quantization mode (symmetric, mixed(weights symmetric and activations asymmetric), and fully asymmetric respectively)*/ - "preset": "mixed", - "stat_subset_size": 300, // Size of subset to calculate activations statistics that can be used - // For quantization parameters calculation. - "ignored": { - "scope": [ - "" // List of nodes that are excluded from optimization - ], - "operations": [ // List of types that are excluded from optimization - { - "type": "", // Type of ignored operation - "attributes": { // If attributes are defined they will be considered during the ignorance - "": "" // Lists of values to filter by - } - } - ] - }, - /* Manually specified quantization parameters */ - /* Quantization parameters for weights */ - "weights": { // Weights quantization parameters used by MinMaxAlgorithm - "bits": 8, // Bit-width, default is 8 - "mode": "symmetric", // Quantization mode, default is "symmetric" - "level_low": 0, // Minimum level in the integer range in which we quantize to, default is 0 for unsigned range, -2^(bit-1) - for signed - "level_high": 255, // Maximum level in the integer range in which we quantize to, default is 2^bits-1 for unsigned range, 2^(bit-1)-1 - for signed - "granularity": "perchannel", // Quantization scale granularity: ["pertensor" (default), "perchannel"] - "range_estimator": { // Range estimator that is used to get the quantization ranges and filter outliers based on the statistics - "max": { // Parameters to estimate top quantization border - "type": "quantile", // Estimator type: ["max" (default), "quantile"] - "outlier_prob": 0.0001 // Outlier probability used in the "quantile" estimator - }, - "min": { // Parameters to estimate bottom quantization border (used only in asymmetric mode) - "type": "quantile", // Estimator type: ["max" (default), "quantile"] - "outlier_prob": 0.0001 // Outlier probability used in the "quantile" estimator - } - - } - }, - /* Quantization parameters for activations */ - "activations": { - "bits": 8, // Number of quantization bits - "mode": "symmetric", // Quantization mode - "granularity": "pertensor", // Granularity: one scale for output tensor - "range_estimator": { // Range estimator that is used to get the quantization ranges and filter outliers based on the statistics - "preset": "quantile", - /* OR */ - /* minimum of quantization range */ - /* maximum of quantization range */ - "max": { // Parameters to estimate top quantization border - "aggregator": "mean", // Batch aggregation type: ["mean" (default), "max", "min", "median", "mean_no_outliers", "median_no_outliers", "hl_estimator"] - "type": "quantile", // Estimator type: ["max" (default), "quantile"] - "outlier_prob": 0.0001 // Outlier probability used in the "quantile" estimator - }, - "min": { // Parameters to estimate top quantization border - "aggregator": "mean", // Batch aggregation type: ["mean" (default), "max", "min", "median", "mean_no_outliers", "median_no_outliers", "hl_estimator"] - "type": "quantile", // Estimator type [min, max, abs_max, quantile, abs_quantile] - "outlier_prob": 0.0001 // Outlier probability used in the "quantile" estimator - } - } - } - "use_layerwise_tuning": false // An optional parameter, enables layer-wise fine-tuning, false by default - } - ] - } -``` +## Examples + + A template and full specification for DefaultQuantization algorithm can be found: + * [Template](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/default_quantization_template.json) + * [Full specification](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/default_quantization_spec.json) + +Command-line example: +* [Quantization of Image Classification model](https://docs.openvino.ai/latest/pot_configs_examples_README.html) + +API tutorials: +* [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) +* [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization) +* [Quantization of Segmentation model for mediacal data](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/110-ct-segmentation-quantize) +* [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert) + +API examples: +* [Quantization of 3D segmentation model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation) +* [Quantization of Face Detection model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection) +* [Speech example for GNA device](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/speech) + +## See also +* [Optimization with Simplified mode](@ref pot_docs_simplified_mode) +* [Use POT Command-line for Model Zoo models](@ref pot_compression_cli_README) +* [POT API](@ref pot_compression_api_README) +* [Post-Training Optimization Best Practices](@ref pot_docs_BestPractices) + diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/ranger/README.md b/tools/pot/openvino/tools/pot/algorithms/quantization/ranger/README.md index 4426236e2ca..80fe9ff0c44 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/ranger/README.md +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/ranger/README.md @@ -1,4 +1,4 @@ -# Experimental: Deep neural network protection through range supervision ("Ranger") +# Experimental: Deep neural network protection through range supervision ("Ranger") {#pot_ranger_README} # Overview @@ -14,7 +14,7 @@ where $`T_{low}`$ and $`T_{up}`$ are the lower and upper bounds for the particul The process flow follows the diagram [Fig 1](#Schematic). Starting from the internal representation (IR) of an OpenVINO model, the POT Ranger algorithm is called to **add protection layers into the model graph**. This step requires **appropriate threshold values that are automatically extracted from a specified test dataset**. The result is an IR representation of the model with additional "Ranger" layers after each supported activation layer. The original and the modified model can be called in the same way through the OpenVINO inference engine to evaluate the impact on accuracy, performance, and dependability in the presence of potential soft errors (for example using the *benchmark_app* and *accuracy_checker* functions). **The algorithm is designed to provide efficient protection at negligible performance overhead or accuracy impact in the absence of faults.** Bound extraction is a one-time effort and the protected IR model returned by the Ranger algorithm can be used independently from there on. No changes in the learned parameters of the network are needed. -![Schematic](../../../../docs/ranger/images/scheme3.png) +![Schematic](../../../../../../docs/ranger/images/scheme3.png) *Fig 1: Schematic of Ranger process flow.* @@ -22,7 +22,7 @@ The process flow follows the diagram [Fig 1](#Schematic). Starting from the inte The following example shows a traffic camera image and predicted objects using a Yolov3 pretrained on the Coco dataset. A single weight fault was injected in a randomly chosen convolution layer of Yolo, flipping the most significant bit of the selected network parameter. If range supervision is applied, the original network performance is recovered despite the presence of the fault. -![](../../../../docs/ranger/images/img_combined_2.png) +![](../../../../../../docs/ranger/images/img_combined_2.png) *Fig 2: Example of fault mitigation via range supervision.* diff --git a/tools/pot/openvino/tools/pot/api/README.md b/tools/pot/openvino/tools/pot/api/README.md index 9d2d35afe11..c0a66988a09 100644 --- a/tools/pot/openvino/tools/pot/api/README.md +++ b/tools/pot/openvino/tools/pot/api/README.md @@ -42,7 +42,7 @@ should be implemented according to the custom DL model: The pipeline with implemented model specific interfaces such as `Engine`, `DataLoader` and `Metric` we will call the custom optimization pipeline (see the picture below that shows relationships between classes). -![](./custom_optimization_pipeline.png) +![](../../../../docs/images/api.png) ## Use Cases Before diving into the Python* POT API, it is highly recommended to read [Best Practices](@ref pot_docs_BestPractices) document where various @@ -54,6 +54,17 @@ The POT Python* API for model optimization can be used in the following cases: accuracy in this mode. - You already have the Python* script to validate the accuracy of the model using the [OpenVINO™ Runtime](@ref openvino_docs_OV_Runtime_User_Guide). +## Examples + +* API tutorials: + * [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) + * [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization) + * [Quantization of BERT for Text Classification](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/105-language-quantize-bert) +* API examples: + * [Quantization of 3D segmentation model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/3d_segmentation) + * [Quantization of Face Detection model](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/face_detection) + * [Speech example for GNA device](https://github.com/openvinotoolkit/openvino/tree/master/tools/pot/openvino/tools/pot/api/samples/speech) + ## API Description Below is a detailed explanation of POT Python* APIs which should be implemented in order to create a custom optimization @@ -62,7 +73,7 @@ pipeline. ### DataLoader ``` -class openvino.tools.pot.api.DataLoader(config) +class openvino.tools.pot.DataLoader(config) ``` The base class for all DataLoaders. @@ -75,7 +86,7 @@ which supports integer indexing in range of 0 to `len(self)` ### Metric ``` -class openvino.tools.pot.api.Metric() +class openvino.tools.pot.Metric() ``` An abstract class representing an accuracy metric. @@ -98,7 +109,7 @@ All subclasses should override the following methods: ### Engine ``` -class openvino.tools.pot.api.Engine(config, data_loader=None, metric=None) +class openvino.tools.pot.Engine(config, data_loader=None, metric=None) ``` Base class for all Engines. @@ -112,7 +123,7 @@ The engine provides model inference, statistics collection for activations and c All subclasses should override the following methods: - `set_model(model)` - sets/resets a model.

*Parameters* - - `model` - `CompressedModel` instance for inference (see details below). + - `model` - `CompressedModel` instance for inference. - `predict(stats_layout=None, sampler=None, metric_per_sample=False, print_progress=False)` - performs model inference on the specified subset of data.

@@ -157,6 +168,46 @@ on the specified subset of data.

} ``` +### Pipeline + +``` +class openvino.tools.pot.Pipeline(engine) +``` +Pipeline class represents the optimization pipeline. + +*Parameters* +- `engine` - instance of `Engine` class for model inference. + +The pipeline can be applied to the DL model by calling `run(model)` method where `model` is the `NXModel` instance. + +#### Create a pipeline + +The POT Python* API provides the utility function to create and configure the pipeline: +``` +openvino.tools.pot.create_pipeline(algo_config, engine) +``` +*Parameters* +- `algo_config` - a list defining optimization algorithms and their parameters included in the optimization pipeline. + The order in which they are applied to the model in the optimization pipeline is determined by the order in the list. + + Example of the algorithm configuration of the pipeline: + ``` + algo_config = [ + { + 'name': 'DefaultQuantization', + 'params': { + 'preset': 'performance', + 'stat_subset_size': 500 + } + }, + ... + ] + ``` +- `engine` - instance of `Engine` class for model inference. + +*Returns* +- instance of the `Pipeline` class. + ## Helpers and Internal Model Representation In order to simplify implementation of optimization pipelines we provide a set of ready-to-use helpers. Here we also describe internal representation of the DL model and how to work with it. @@ -164,7 +215,7 @@ describe internal representation of the DL model and how to work with it. ### IEEngine ``` -class openvino.tools.pot.engines.ie_engine.IEEngine(config, data_loader=None, metric=None) +class openvino.tools.pot.IEEngine(config, data_loader=None, metric=None) ``` IEEngine is a helper which implements Engine class based on [OpenVINO™ Inference Engine Python* API](ie_python_api/api.html). This class support inference in synchronous and asynchronous modes and can be reused as-is in the custom pipeline or @@ -216,11 +267,11 @@ represented as an instance of this class. The cascaded model is stored as a list - `models` - list of models of the cascaded model. - `is_cascade` - returns True if the loaded model is cascaded model. -#### Loading model from IR +### Read model from OpenVINO IR The Python* POT API provides the utility function to load model from the OpenVINO™ Intermediate Representation (IR): ``` -openvino.tools.pot.graph.model_utils.load_model(model_config) +openvino.tools.pot.load_model(model_config) ``` *Parameters* - `model_config` - dictionary describing a model that includes the following attributes: @@ -263,10 +314,10 @@ openvino.tools.pot.graph.model_utils.load_model(model_config) *Returns* - `CompressedModel` instance -#### Saving model to IR +#### Save model to IR The Python* POT API provides the utility function to save model in the OpenVINO™ Intermediate Representation (IR): ``` -openvino.tools.pot.graph.model_utils.save_model(model, save_path, model_name=None, for_stat_collection=False) +openvino.tools.pot.save_model(model, save_path, model_name=None, for_stat_collection=False) ``` *Parameters* - `model` - `CompressedModel` instance. @@ -314,94 +365,3 @@ class openvino.tools.pot.samplers.batch_sampler.BatchSampler(data_loader, batch_ Sampler provides an iterable over the dataset subset if `subset_indices` is specified or over the whole dataset with given `batch_size`. Returns a list of data items. -## Pipeline - -``` -class openvino.tools.pot.pipeline.pipeline.Pipeline(engine) -``` -Pipeline class represents the optimization pipeline. - -*Parameters* -- `engine` - instance of `Engine` class for model inference. - -The pipeline can be applied to the DL model by calling `run(model)` method where `model` is the `CompressedModel` instance. - -#### Create a pipeline - -The POT Python* API provides the utility function to create and configure the pipeline: -``` -openvino.tools.pot.pipeline.initializer.create_pipeline(algo_config, engine) -``` -*Parameters* -- `algo_config` - a list defining optimization algorithms and their parameters included in the optimization pipeline. - The order in which they are applied to the model in the optimization pipeline is determined by the order in the list. - - Example of the algorithm configuration of the pipeline: - ``` - algo_config = [ - { - 'name': 'DefaultQuantization', - 'params': { - 'preset': 'performance', - 'stat_subset_size': 500 - } - }, - ... - ] - ``` -- `engine` - instance of `Engine` class for model inference. - -*Returns* -- instance of the `Pipeline` class. - -## Usage Example -Before running the optimization tool it's highly recommended to make sure that -- The model was converted to the OpenVINO™ Intermediate Representation (IR) from the source framework using [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide). -- The model can be successfully inferred with OpenVINO™ Inference Engine in floating-point precision. -- The model achieves the same accuracy as in the original training framework. - -As was described above, `DataLoader`, `Metric` and `Engine` interfaces should be implemented in order to create -the custom optimization pipeline for your model. There might be a case you have the Python* validation script for your -model using the [OpenVINO™ Runtime](@ref openvino_docs_OV_Runtime_User_Guide), -which in practice includes loading a dataset, model inference, and calculating the accuracy metric. -So you just need to wrap the existing functions of your validation script in `DataLoader`, `Metric` and `Engine` interfaces. -In another case, you need to implement interfaces from scratch. - -For facilitation of using Python* POT API, we implemented `IEEngine` class providing the model inference of the most models -from the Vision Domain which can be reused for an arbitrary model. - -After `YourDataLoader`, `YourMetric`, `YourEngine` interfaces are implemented, the custom optimization pipeline can be -created and applied to the model as follows: - -``` -# Step 1: Load the model. -model_config = { - 'model_name': 'your_model', - 'model': /your_model.xml, - 'weights': -} -model = load_model(model_config) - -# Step 2: Initialize the data loader. -dataset_config = {} # dictionary with the dataset parameters -data_loader = YourDataLoader(dataset_config) - -# Step 3 (Optional. Required for AccuracyAwareQuantization): Initialize the metric. -metric = YourMetric() - -# Step 4: Initialize the engine for metric calculation and statistics collection. -engine_config = {} # dictionary with the engine parameters -engine = YourEngine(engine_config, data_loader, metric) - -# Step 5: Create a pipeline of compression algorithms. -pipeline = create_pipeline(algorithms, engine) - -# Step 6: Execute the pipeline. -compressed_model = pipeline.run(model) - -# Step 7: Save the compressed model. -save_model(compressed_model, "path_to_save_model") -``` - -For in-depth examples of using Python* POT API, browse the samples included into the OpenVINO™ toolkit installation -and available in the `/api/samples` directory. There are currently five samples that demonstrate the implementation of `Engine`, `Metric` and `DataLoader` interfaces for classification, detection and segmentation tasks. From a312dd4a9f2391e6f33ff3248df8891f250ebaa0 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Sun, 20 Feb 2022 09:44:04 +0300 Subject: [PATCH 008/310] [POT] IEEngine output data order (#10527) * IEEngine fix for multiply-output nets * Update docstrings and docs * Codestyle changes * Update docs * Update docstring * Pylint --- tools/pot/openvino/tools/pot/api/README.md | 7 +++---- .../3d_segmentation/3d_segmentation_sample.py | 5 +++-- .../pot/openvino/tools/pot/engines/ie_engine.py | 16 +++++++++------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tools/pot/openvino/tools/pot/api/README.md b/tools/pot/openvino/tools/pot/api/README.md index c0a66988a09..728711284f3 100644 --- a/tools/pot/openvino/tools/pot/api/README.md +++ b/tools/pot/openvino/tools/pot/api/README.md @@ -222,14 +222,13 @@ This class support inference in synchronous and asynchronous modes and can be re with some modifications, e.g. in case of custom post-processing of inference results. The following methods can be overridden in subclasses: -- `postprocess_output(outputs, metadata)` - processes raw model output using the image metadata obtained during -data loading.

+- `postprocess_output(outputs, metadata)` - Processes model output data using the image metadata obtained during data loading.

*Parameters* - - `outputs` - raw output of the model. + - `outputs` - dictionary of output data per output name. - `metadata` - information about the data used for inference. *Return* - - post-processed model output + - list of the output data in an order expected by the accuracy metric if any is used `IEEngine` supports data returned by `DataLoader` in the format: ``` diff --git a/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py b/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py index 1b8b8d03079..2056d7e215a 100644 --- a/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py +++ b/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py @@ -187,12 +187,13 @@ class SegmentationEngine(IEEngine): """ Processes model raw output for future metric and loss calculation. Uses image metadata that can be passed using dataloader. - :param outputs: network infer result in format of numpy ndarray (batch x image shape) + :param outputs: network infer result in the format of dictionary numpy ndarray + by layer name (batch x image shape) :param metadata: dictionary of image metadata :return: processed numpy ndarray with the same shape as the original output """ processed_outputs = [] - for output, meta in zip(outputs, metadata): + for output, meta in zip(outputs.values(), metadata): # Resize to bounding box size and extend to mask size low = meta['bbox'][0] high = meta['bbox'][1] diff --git a/tools/pot/openvino/tools/pot/engines/ie_engine.py b/tools/pot/openvino/tools/pot/engines/ie_engine.py index 9faffd420a0..bff5a1e0d4d 100644 --- a/tools/pot/openvino/tools/pot/engines/ie_engine.py +++ b/tools/pot/openvino/tools/pot/engines/ie_engine.py @@ -143,8 +143,12 @@ class IEEngine(Engine): @staticmethod def postprocess_output(outputs, _metadata): - """ Processes raw model output using the image metadata obtained during data loading """ - return outputs + """ Processes model output data using the image metadata obtained during data loading + :param outputs: dictionary of output data per output name + :param _metadata: metadata obtained during data loading + :return: list of the output data in an order expected by the accuracy metric if any is used + """ + return list(outputs.values()) def _reset(self): """ Resets collected statistics """ @@ -182,14 +186,12 @@ class IEEngine(Engine): annotations=batch_annotations) # Postprocess network output - outputs = process_raw_output(predictions) - output = outputs[self._output_layers[0]] - outputs[self._output_layers[0]] = self.postprocess_output(output, batch_meta) + processed_outputs = process_raw_output(predictions) + outputs = {name: processed_outputs[name] for name in self._output_layers} + logits = self.postprocess_output(outputs, batch_meta) # Update metrics if batch_annotations: - # TODO: Create some kind of an order for the correct metric calculation - logits = [outputs[name] for name in self._output_layers] # output_layers are in a random order self._update_metrics(output=logits, annotations=batch_annotations, need_metrics_per_sample=need_metrics_per_sample) From 982942fa5dcaab987837cf26e7b7989290f92881 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Sun, 20 Feb 2022 12:39:52 +0300 Subject: [PATCH 009/310] Fix typo in CropAndResize translator (#10541) --- src/frontends/tensorflow/src/op/crop_and_resize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/frontends/tensorflow/src/op/crop_and_resize.cpp b/src/frontends/tensorflow/src/op/crop_and_resize.cpp index 3a75f1f353f..b9eb400123e 100644 --- a/src/frontends/tensorflow/src/op/crop_and_resize.cpp +++ b/src/frontends/tensorflow/src/op/crop_and_resize.cpp @@ -44,8 +44,8 @@ OutputVector translate_crop_and_resize_op(const NodeContext& node) { auto image_depth = spatial_shape[3]; auto const_boxes = dynamic_pointer_cast(ng_boxes.get_node_shared_ptr()); - auto const_box_ind = dynamic_pointer_cast(ng_boxes.get_node_shared_ptr()); - auto const_crop_size = dynamic_pointer_cast(ng_boxes.get_node_shared_ptr()); + auto const_box_ind = dynamic_pointer_cast(ng_box_ind.get_node_shared_ptr()); + auto const_crop_size = dynamic_pointer_cast(ng_size.get_node_shared_ptr()); TENSORFLOW_OP_VALIDATION(node, const_boxes && const_box_ind && const_crop_size, From a52c755d21086405e9e1787e2e92380212e39418 Mon Sep 17 00:00:00 2001 From: Maxim Shevtsov Date: Sun, 20 Feb 2022 20:56:15 +0300 Subject: [PATCH 010/310] refactor the perf counters to get really on-demand (rather than on every inference) (#10526) * refactor the perf counters to get really on-demand (rather than on every inference) * removed the (now) un-needed needPerfCounters flag --- src/plugins/auto_batch/auto_batch.cpp | 75 ++++++++++----------------- src/plugins/auto_batch/auto_batch.hpp | 14 ++--- 2 files changed, 31 insertions(+), 58 deletions(-) diff --git a/src/plugins/auto_batch/auto_batch.cpp b/src/plugins/auto_batch/auto_batch.cpp index a4442f896ed..b5b2665442b 100644 --- a/src/plugins/auto_batch/auto_batch.cpp +++ b/src/plugins/auto_batch/auto_batch.cpp @@ -57,11 +57,9 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector>& outputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, int batch_id, - int num_batch, - bool needPerfCounters) + int num_batch) : IInferRequestInternal(inputs, outputs), _myBatchedRequestWrapper(workerRequest), - _needPerfCounters(needPerfCounters), _batchId(batch_id), _batchSize(num_batch) { ShareBlobsWithBatchRequest(); @@ -71,11 +69,9 @@ AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs, const OutputsDataMap& networkOutputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, int batch_id, - int num_batch, - bool needPerfCounters) + int num_batch) : IInferRequestInternal(networkInputs, networkOutputs), _myBatchedRequestWrapper(workerRequest), - _needPerfCounters(needPerfCounters), _batchId(batch_id), _batchSize(num_batch) { ShareBlobsWithBatchRequest(); @@ -316,13 +312,8 @@ void AutoBatchInferRequest::CopyOutputsIfNeeded() { } } -std::map AutoBatchInferRequest::GetPerformanceCounts() const { - return _perfMap; -} - AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest( const AutoBatchInferRequest::Ptr& inferRequest, - const bool needPerfCounters, InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch, const ITaskExecutor::Ptr& callbackExecutor) : AsyncInferRequestThreadSafeDefault(inferRequest, nullptr, callbackExecutor), @@ -345,27 +336,26 @@ AutoBatchAsyncInferRequest::AutoBatchAsyncInferRequest( }; AutoBatchAsyncInferRequest* _this = nullptr; }; - _pipeline = { - {/*TaskExecutor*/ std::make_shared(this), /*task*/ [this, needPerfCounters] { - if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback - std::rethrow_exception(this->_inferRequest->_exceptionPtr); - auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper; - if (batchReq._exceptionPtr) // when the batchN execution failed - std::rethrow_exception(batchReq._exceptionPtr); - // in the case of non-batched execution the blobs were set explicitly - if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == this->_inferRequest->_wasBatchedRequestUsed) - this->_inferRequest->CopyOutputsIfNeeded(); - if (needPerfCounters) { - try { - if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == - this->_inferRequest->_wasBatchedRequestUsed) - this->_inferRequest->_perfMap = batchReq._inferRequestBatched->GetPerformanceCounts(); - else - this->_inferRequest->_perfMap = this->_inferRequestWithoutBatch->GetPerformanceCounts(); - } catch (...) { - } - } - }}}; + _pipeline = {{/*TaskExecutor*/ std::make_shared(this), /*task*/ [this] { + if (this->_inferRequest->_exceptionPtr) // if the exception happened in the batch1 fallback + std::rethrow_exception(this->_inferRequest->_exceptionPtr); + auto& batchReq = this->_inferRequest->_myBatchedRequestWrapper; + if (batchReq._exceptionPtr) // when the batchN execution failed + std::rethrow_exception(batchReq._exceptionPtr); + // in the case of non-batched execution the blobs were set explicitly + if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == + this->_inferRequest->_wasBatchedRequestUsed) + this->_inferRequest->CopyOutputsIfNeeded(); + }}}; +} + +std::map AutoBatchAsyncInferRequest::GetPerformanceCounts() + const { + CheckState(); + if (AutoBatchInferRequest::eExecutionFlavor::BATCH_EXECUTED == _inferRequest->_wasBatchedRequestUsed) + return _inferRequest->_myBatchedRequestWrapper._inferRequestBatched->GetPerformanceCounts(); + else + return _inferRequestWithoutBatch->GetPerformanceCounts(); } void AutoBatchAsyncInferRequest::Infer_ThreadUnsafe() { @@ -381,14 +371,12 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork( const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch, const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch, const DeviceInformation& networkDevice, - const std::unordered_map& config, - const bool needPerfCounters) + const std::unordered_map& config) : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared()), _network{networkWithBatch}, _networkWithoutBatch{networkWithoutBatch}, - _config{config}, - _needPerfCounters{needPerfCounters} { + _config{config} { // WA for gcc 4.8 ( fails compilation with member init-list) _device = networkDevice; auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT)); @@ -423,8 +411,7 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn networkOutputs, workerRequestPtrAndId.first, workerRequestPtrAndId.second, - _device.batchForDevice, - _needPerfCounters); + _device.batchForDevice); } InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl( @@ -440,8 +427,7 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn outputs, workerRequestPtrAndId.first, workerRequestPtrAndId.second, - _device.batchForDevice, - _needPerfCounters); + _device.batchForDevice); } std::pair AutoBatchExecutableNetwork::GetWorkerInferRequest() { @@ -537,7 +523,6 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn _networkWithoutBatch._so}; return std::make_shared( std::static_pointer_cast(syncRequestImpl), - _needPerfCounters, inferRequestWithoutBatch, _callbackExecutor); } @@ -845,11 +830,6 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN metaDevice.batchForDevice = 1; } - const auto perfConfig = fullConfig.find(PluginConfigParams::KEY_PERF_COUNT); - const auto perfConfigInTargetPlugin = - core->GetConfig(deviceName, PluginConfigParams::KEY_PERF_COUNT).as() == PluginConfigParams::YES; - const bool enablePerfCounters = perfConfigInTargetPlugin || ((fullConfig.end() != perfConfig) && - (perfConfig->second == PluginConfigParams::YES)); auto report_footprint = [](std::shared_ptr pCore, std::string device) -> size_t { size_t footprint = 0; // TODO: use the per-network metric (22.2) rather than plugin-level @@ -901,8 +881,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN return std::make_shared(executableNetworkWithBatch, executableNetworkWithoutBatch, metaDevice, - networkConfig, - enablePerfCounters); + networkConfig); } InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl( diff --git a/src/plugins/auto_batch/auto_batch.hpp b/src/plugins/auto_batch/auto_batch.hpp index fd1fc0ec535..e5aff7f3c0f 100644 --- a/src/plugins/auto_batch/auto_batch.hpp +++ b/src/plugins/auto_batch/auto_batch.hpp @@ -49,8 +49,7 @@ public: const InferenceEngine::SoExecutableNetworkInternal& networkForDevice, const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch, const DeviceInformation& networkDevices, - const std::unordered_map& config, - const bool needPerfCounters = false); + const std::unordered_map& config); void SetConfig(const std::map& config) override; InferenceEngine::Parameter GetConfig(const std::string& name) const override; @@ -90,16 +89,13 @@ public: const InferenceEngine::OutputsDataMap& networkOutputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, int batch_id, - int num_batch, - bool _needPerfCounters = false); + int num_batch); explicit AutoBatchInferRequest(const std::vector>& inputs, const std::vector>& outputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, int batch_id, - int num_batch, - bool _needPerfCounters = false); + int num_batch); - std::map GetPerformanceCounts() const override; // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request) void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req); void CopyInputsIfNeeded(); @@ -111,10 +107,8 @@ public: BATCH_EXECUTED, TIMEOUT_EXECUTED } _wasBatchedRequestUsed = eExecutionFlavor::NOT_EXECUTED; - std::map _perfMap; protected: - bool _needPerfCounters = false; void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput); void ShareBlobsWithBatchRequest(); size_t _batchId; @@ -126,11 +120,11 @@ public: using Ptr = std::shared_ptr; explicit AutoBatchAsyncInferRequest(const AutoBatchInferRequest::Ptr& inferRequest, - const bool needPerfCounters, InferenceEngine::SoIInferRequestInternal& inferRequestWithoutBatch, const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); void Infer_ThreadUnsafe() override; virtual ~AutoBatchAsyncInferRequest(); + std::map GetPerformanceCounts() const override; InferenceEngine::SoIInferRequestInternal _inferRequestWithoutBatch; AutoBatchInferRequest::Ptr _inferRequest; From 7c93902dac5534f13a13e71ac333bc8d841390a3 Mon Sep 17 00:00:00 2001 From: Ilya Znamenskiy Date: Mon, 21 Feb 2022 06:03:23 +0300 Subject: [PATCH 011/310] [GPU] Fix issues with floating point fusings support for cldnn / onednn fully connected kernels (#10519) * [GPU] Fix of floating point fusings inside fc kernels * [GPU] Fix for related tests --- .../graph/graph_optimizer/prepare_primitive_fusing.cpp | 8 ++------ .../tests/fusions/fully_connected_fusion_test.cpp | 6 +++--- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 9e3212f938e..8a0579af82b 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -554,13 +554,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { }; auto fc_supports_fusings = [](fully_connected_node& node) -> bool { - auto out_fmt = node.get_output_layout().format; - auto in_l = node.get_dependency(0).get_output_layout(); - auto in_dt = in_l.data_type; - auto in_b = in_l.size.batch[0]; + auto in_dt = node.get_dependency(0).get_output_layout().data_type; - return (data_type_traits::is_i8_u8(in_dt) || (data_type_traits::is_floating_point(in_dt) && in_b > 1)) && - out_fmt != format::yxfb; + return data_type_traits::is_i8_u8(in_dt); }; auto gemm_supports_fusings = [](gemm_node& node) -> bool { diff --git a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp index 1a91ae56e84..3ebde11eb6f 100644 --- a/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/fully_connected_fusion_test.cpp @@ -359,9 +359,9 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_scale_activation_quantize_i8, ::te fully_connected_test_params{ CASE_FC_U8S8_3D_2, 2, 5 }, fully_connected_test_params{ CASE_FC_U8S8_3D_3, 2, 5 }, - fully_connected_test_params{ CASE_FC_FP32_3D_1, 2, 5 }, - fully_connected_test_params{ CASE_FC_FP32_3D_2, 2, 5 }, - fully_connected_test_params{ CASE_FC_FP32_3D_3, 2, 5 }, + fully_connected_test_params{ CASE_FC_FP32_3D_1, 3, 5 }, + fully_connected_test_params{ CASE_FC_FP32_3D_2, 3, 5 }, + fully_connected_test_params{ CASE_FC_FP32_3D_3, 3, 5 }, })); #ifdef ENABLE_ONEDNN_FOR_GPU From ea3bd087c4e12bfc464c84a8e8121c279963832f Mon Sep 17 00:00:00 2001 From: Xuejun Zhai Date: Mon, 21 Feb 2022 11:21:52 +0800 Subject: [PATCH 012/310] [CVS-78727][python version] bug fix for -d AUTO:CPU,GPU the return device should be AUTO only (#10506) Signed-off-by: xuejun --- tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py index f0d34d304d8..ce39d5d49f4 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py @@ -246,6 +246,8 @@ def can_measure_as_static(app_input_info): def parse_devices(device_string): if device_string in ['MULTI', 'HETERO']: return list() + if device_string.find("AUTO") != -1: + return ['AUTO'] devices = device_string if ':' in devices: devices = devices.partition(':')[2] From 575ded54a9ed6117d7bc92a416cda91334a2114e Mon Sep 17 00:00:00 2001 From: Andrei Molotkov Date: Mon, 21 Feb 2022 09:30:00 +0300 Subject: [PATCH 013/310] [GPU] Move adding biases to the end convolution_bfyx_to_bfyx_f16 kernel (#10533) --- .../convolution_gpu_bfyx_to_bfyx_f16.cl | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl index f3ba4f9cc77..7f2515136af 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_to_bfyx_f16.cl @@ -90,17 +90,7 @@ KERNEL(convolution_bfyx_to_bfyx_f16)( const uint filter_offset = f_block * filter_os_pitch; #endif -#if BIAS_TERM - uint bias_offset = f_block * FEATURE_SLICE_SIZE; - -# if GROUPED && !DEPTHWISE_SEPARABLE_OPT - bias_offset += split_idx * BIAS_LENGTH; -# endif - - MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset)); -#else MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO; -#endif INPUT0_TYPE line_cache[INPUT0_FEATURE_NUM * INPUT_BLOCK_SIZE]; for (int ic = 0; ic < INPUT0_FEATURE_NUM; ic++) @@ -151,6 +141,16 @@ KERNEL(convolution_bfyx_to_bfyx_f16)( } } +#if BIAS_TERM + uint bias_offset = f_block * FEATURE_SLICE_SIZE; + +# if GROUPED && !DEPTHWISE_SEPARABLE_OPT + bias_offset += split_idx * BIAS_LENGTH; +# endif + + dst += (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset)); +#endif + OUTPUT_PACKED_TYPE res; #ifndef HAS_FUSED_OPS res = TO_OUTPUT_PACKED_TYPE(ACTIVATION(dst, ACTIVATION_PARAMS)); From a7fff7447c93fb2ddd41838369c2fdf5ec118f1a Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Mon, 21 Feb 2022 15:35:44 +0900 Subject: [PATCH 014/310] Fix to extract scores for each class in consideration of background label's id (#10500) Signed-off-by: Andrew Kwangwoong Park --- .../src/graph/impls/cpu/detection_output.cpp | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp index b2412d56e78..f5dbd39e59c 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/detection_output.cpp @@ -268,7 +268,7 @@ struct detection_output_impl : typed_primitive_impl { const auto& args = instance.argument; // Per image -> For each label: Pair (score, prior index) - std::vector>>> final_detections; + std::vector>>> final_detections; for (int image = 0; image < num_of_images; ++image) { const std::vector>& bboxes_per_image = all_bboxes[image]; std::vector>>& conf_per_image = confidences[image]; @@ -323,15 +323,15 @@ struct detection_output_impl : typed_primitive_impl { comp_score_descend>); score_index_pairs.resize(args.keep_top_k); - std::vector>> new_indices(args.num_classes); + std::map>> new_indices; for (int j = 0; j < static_cast(score_index_pairs.size()); ++j) { int label = score_index_pairs[j].second.first; int idx = score_index_pairs[j].second.second; - new_indices[label].emplace_back(score_index_pairs[j].first, idx); + new_indices[label].push_back(std::make_pair(score_index_pairs[j].first, idx)); } - final_detections.emplace_back(new_indices); + final_detections.push_back(new_indices); } else { - std::vector>> new_indices(args.num_classes); + std::map>> new_indices; for (auto it = indices.begin(); it != indices.end(); ++it) { int label = it->first; const std::vector& labelIndices = it->second; @@ -340,23 +340,23 @@ struct detection_output_impl : typed_primitive_impl { int idx = labelIndices[j]; for (const auto& s : scores) { if (s.second == idx) { - new_indices[label].emplace_back(s.first, idx); + new_indices[label].push_back(std::make_pair(s.first, idx)); } } } } - final_detections.emplace_back(new_indices); + final_detections.push_back(new_indices); } } int count = 0; for (int image = 0; image < num_of_images; ++image) { const std::vector>& bboxes_per_image = all_bboxes[image]; - auto& final_detections_per_image = final_detections[image]; - for (int label = 0; label < static_cast(final_detections_per_image.size()); ++label) { + for (auto it = final_detections[image].begin(); it != final_detections[image].end(); ++it) { + int label = it->first; int loc_label = args.share_location ? 0 : label; const std::vector& bboxes = bboxes_per_image[loc_label]; - const std::vector>& label_detections = final_detections_per_image[label]; + std::vector>& label_detections = it->second; for (std::pair score_prior : label_detections) { out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (dtype)static_cast(image); out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 1] = @@ -609,7 +609,7 @@ struct detection_output_impl : typed_primitive_impl { const int num_of_priors, std::vector>>>& scoreIndexPairs) { const int num_classes = instance.argument.num_classes; - + const int background_label_id = instance.argument.background_label_id; const int num_of_images = static_cast(confidences.size()); auto input_confidence = instance.confidence_memory(); const float confidence_threshold = instance.argument.confidence_threshold; @@ -645,10 +645,14 @@ struct detection_output_impl : typed_primitive_impl { confidence_ptr_float += idx; __m128 threshold = _mm_load_ps1(&confidence_threshold); for (int prior = 0; prior < num_of_priors; ++prior) { - int cls = 0; + int idx_start = (background_label_id == 0 ? 1 : 0); + int cls = idx_start; float max_score = 0; int max_cls = 0; for (; cls + 3 < num_classes; cls += 4) { + if ((background_label_id == 0) && (cls == idx_start)) { + confidence_ptr_float += 1; + } __m128 scores = _mm_loadu_ps(confidence_ptr_float); confidence_ptr_float += 4; __m128i mask128 = _mm_castps_si128(_mm_cmpgt_ps(scores, threshold)); @@ -657,9 +661,10 @@ struct detection_output_impl : typed_primitive_impl { } int mask = _mm_movemask_ps(_mm_castsi128_ps(mask128)); if (mask & 1) { - label_to_scores[cls + 0].emplace_back(_mm_cvtss_f32(scores), prior); - if (_mm_cvtss_f32(scores) > max_score && cls + 0 != 0) { - max_score = _mm_cvtss_f32(scores); max_cls = cls + 0; + float s = _mm_cvtss_f32(scores); + label_to_scores[cls + 0].emplace_back(s, prior); + if ((cls == idx_start) || (s > max_score)) { + max_score = s; max_cls = cls + 0; } } if (mask & 2) { From 0ee6959537a3000d1c101378d60c0de25ff6b7ee Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Mon, 21 Feb 2022 09:41:24 +0300 Subject: [PATCH 015/310] [GPU] Replacing get_shape() with get_partial_shape() (#10525) --- .../src/plugin/transformations_pipeline.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 0b79a7f1d2c..7fbc43833b1 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -94,8 +94,10 @@ namespace { template static bool disableReduceDecomposition(const std::shared_ptr node) { if (auto op = std::dynamic_pointer_cast(node)) { - bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_shape()[0] != 1; - return !fp16_batch_not_1; + if (op->input(0).get_partial_shape()[0].is_static()) { + bool fp16_batch_not_1 = op->get_element_type() == ngraph::element::f16 && op->input(0).get_partial_shape()[0] != 1; + return !fp16_batch_not_1; + } } return false; } @@ -174,8 +176,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { pass_config->set_callback( [](const_node_ptr &node) -> bool { - return node->input_value(0).get_shape().size() <= 5lu && - node->input_value(0).get_shape().size() == node->get_output_shape(0).size(); + return node->input_value(0).get_partial_shape().size() <= 5lu && + node->input_value(0).get_partial_shape().size() == node->get_output_partial_shape(0).size(); }); pass_config->set_callback func) { if (mvn != nullptr && node->get_input_size() == 2) { if (auto axesNode = dynamic_cast(mvn->get_input_node_ptr(1))) { auto axesVal = axesNode->cast_vector(); - auto& mvnShape = mvn->get_output_shape(0); + auto& mvnShape = mvn->get_output_partial_shape(0); for (int32_t& axis : axesVal) axis = axis < 0 ? axis + mvnShape.size() : axis; std::sort(axesVal.begin(), axesVal.end()); From 73a6d50dbc4e35d54ff1f00c4f662f764a8fb4e5 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Mon, 21 Feb 2022 15:42:08 +0900 Subject: [PATCH 016/310] [GPU] Fixed batch size again to 8 as a workaround of compiler restriction. (#10502) --- src/plugins/intel_gpu/src/runtime/kernels_cache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp index b44c0a1cfed..2df73474e44 100644 --- a/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp +++ b/src/plugins/intel_gpu/src/runtime/kernels_cache.cpp @@ -148,7 +148,7 @@ bool kernels_cache::is_cache_enabled() const { } size_t kernels_cache::get_max_kernels_per_batch() const { - return 9; + return 8; } From e89c7ed8e5bf17ca1cd8d869fe922cbd563ff350 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Mon, 21 Feb 2022 09:55:26 +0300 Subject: [PATCH 017/310] Describe MakeStateful transformation in MO help (#10536) * Update --transform help for MakeStateful transformation * add quotes --- tools/mo/openvino/tools/mo/utils/cli_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py index 8af8dd87a6d..0b75e063fd4 100644 --- a/tools/mo/openvino/tools/mo/utils/cli_parser.py +++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py @@ -365,8 +365,10 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): 'Usage: "--transform transformation_name1[args],transformation_name2..." ' + 'where [args] is key=value pairs separated by semicolon. ' + 'Examples: "--transform LowLatency2" or ' + - ' "--transform LowLatency2[use_const_initializer=False]" ' + - 'Available transformations: "LowLatency2"', + ' "--transform LowLatency2[use_const_initializer=False]" or ' + + ' "--transform \"MakeStateful[param_res_names=' + '{\'input_name_1\':\'output_name_1\',\'input_name_2\':\'output_name_2\'}]\"" ' + + 'Available transformations: "LowLatency2", "MakeStateful"', default="") common_group.add_argument('--disable_fusing', help='Turn off fusing of linear operations to Convolution', From f7a85c59fe2ce93058b3799fed7ec13ed5a90c2b Mon Sep 17 00:00:00 2001 From: Nikita Demashov Date: Mon, 21 Feb 2022 10:01:37 +0300 Subject: [PATCH 018/310] [LPT] Disable Move Fake Quantize on shuffle channels pattern (#10389) * added shuffle channels check * refactoring --- .../src/move_fake_quantize.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/common/low_precision_transformations/src/move_fake_quantize.cpp b/src/common/low_precision_transformations/src/move_fake_quantize.cpp index a98af1da53a..37e6c643fee 100644 --- a/src/common/low_precision_transformations/src/move_fake_quantize.cpp +++ b/src/common/low_precision_transformations/src/move_fake_quantize.cpp @@ -176,6 +176,18 @@ bool MoveFakeQuantize::canBeTransformed(const TransformationContext& context, st if (q_dq && (convert_q->get_output_size() != 1 || layer->get_output_size() != 1)) { return false; } + bool only_split = true; + const size_t id = concat->get_input_node_ptr(0)->get_instance_id(); + for (size_t i = 1; i < concat->get_input_size(); ++i) { + if (!is_type(concat->get_input_node_ptr(i)) || + concat->get_input_node_ptr(i)->get_instance_id() != id) { + only_split = false; + break; + } + } + if (only_split) { + return false; + } return true; } From 2cc66296249e8f30ec4b439aeda48309e43ca096 Mon Sep 17 00:00:00 2001 From: Tingqian Li Date: Mon, 21 Feb 2022 15:02:31 +0800 Subject: [PATCH 019/310] [CPU] Avoid using xmm0 for input to store_emitter (#6566) --- src/plugins/intel_cpu/src/nodes/roi_pooling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp index 9c7ec610b4d..5db94de3f15 100644 --- a/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/roi_pooling.cpp @@ -100,7 +100,7 @@ private: const int step = vlen / sizeof(float); Vmm vmm_mask = Vmm(0); - Vmm vmm_zero = Vmm(0); + Vmm vmm_zero = Vmm(2); // avoid using xmm0 (reserved as mask reg in sse41-instruction blendvps) Xmm xmm_yf = Xmm(0); Vmm vmm_yf = Vmm(0); From e5d6f1836683dc4ee9beb91c8d9703e87d13b52d Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Mon, 21 Feb 2022 10:14:08 +0300 Subject: [PATCH 020/310] [TF FE] Fix BatchToSpace op translator (#10511) * use shape value, not rank in batch_to_space conversion * codestyle * resolve review comment --- src/frontends/tensorflow/src/op/space_to_batch_nd.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/frontends/tensorflow/src/op/space_to_batch_nd.cpp b/src/frontends/tensorflow/src/op/space_to_batch_nd.cpp index 12365d2f51f..8de61f179e2 100644 --- a/src/frontends/tensorflow/src/op/space_to_batch_nd.cpp +++ b/src/frontends/tensorflow/src/op/space_to_batch_nd.cpp @@ -36,7 +36,13 @@ OutputVector translate_batch_nd_and_space_nd_op(const NodeContext& node) { } auto N = input_pshape.rank().get_length(); - auto M = block_shape_pshape.rank().get_length(); + + // TODO: support dynamic shape + TENSORFLOW_OP_VALIDATION(node, + block_shape_pshape[0].is_static(), + "First dimension of block_shape input should be static."); + auto M = static_cast(block_shape_pshape[0].get_length()); + auto padded_crops = make_shared(crops, make_shared(crops.get_element_type(), Shape{2}, std::vector{1, 0}), From 31f517a3b45976b35547652df3c0ae375548824b Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Mon, 21 Feb 2022 10:16:29 +0300 Subject: [PATCH 021/310] [CPU] Fix error message for shape infer (#10522) --- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 12 ++++++++++++ .../src/utils/shape_inference/shape_inference.cpp | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 0dc689644e0..43c4d83213b 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -1841,6 +1841,18 @@ std::vector MKLDNNEltwiseNode::shapeInfer() const { ov::PartialShape::broadcast_merge_into(outShape, getParentEdgesAtPort(i)[0]->getMemory().GetShape().toPartialShape(), ov::op::AutoBroadcastType::NUMPY); } + + if (outShape.is_dynamic()) { + std::ostringstream errorMessage; + errorMessage << "Can't compute static output shape for Eltwise node with name: " << getName(); + errorMessage << ". Input shapes = ( "; + for (size_t i = 0; i < getParentEdges().size(); i++) { + errorMessage << i << " port = " << getParentEdgesAtPort(i)[0]->getMemory().GetShape().toString() << ", "; + } + errorMessage << "). Output shape = ( " << outShape << " )"; + OPENVINO_ASSERT(false, errorMessage.str()); + } + return {outShape.get_shape()}; } diff --git a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp index 467fd405eeb..11378e53be2 100644 --- a/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp +++ b/src/plugins/intel_cpu/src/utils/shape_inference/shape_inference.cpp @@ -252,7 +252,7 @@ public: if (partial_shape.is_dynamic()) { std::ostringstream errorMessage; errorMessage << "Can't compute static output shape on " << i - << " port for node with name: " << op->get_name(); + << " port for " << op->get_type_name() << " node with name: " << op->get_name(); errorMessage << ". Input shapes = ( "; for (size_t in = 0; in < op->get_input_size(); in++) { errorMessage << in << " port = " << op->get_input_partial_shape(in) << ", "; From 33ab7f90636c7687dfc990cc7781eb4868ea5219 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Mon, 21 Feb 2022 10:27:11 +0300 Subject: [PATCH 022/310] remove redundant node_context.hpp files, fix handling nodes with several output ports (#10484) --- src/frontends/paddle/src/node_context.hpp | 142 ------------------ .../tensorflow/src/decoder_proto.cpp | 6 +- src/frontends/tensorflow/src/node_context.hpp | 108 ------------- 3 files changed, 5 insertions(+), 251 deletions(-) delete mode 100644 src/frontends/paddle/src/node_context.hpp delete mode 100644 src/frontends/tensorflow/src/node_context.hpp diff --git a/src/frontends/paddle/src/node_context.hpp b/src/frontends/paddle/src/node_context.hpp deleted file mode 100644 index eb7a88e5bb7..00000000000 --- a/src/frontends/paddle/src/node_context.hpp +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include "exceptions.hpp" -#include "openvino/core/any.hpp" -#include "openvino/core/node.hpp" - -namespace ov { -namespace frontend { -namespace paddle { - -using InPortName = std::string; -using OutPortName = std::string; -using TensorName = std::string; -using NamedOutputs = std::map; -using NamedInputs = std::map; - -class DecoderBase { -public: - /// \brief Get attribute value by name and requested type - /// - /// \param name Attribute name - /// \param type_info Attribute type information - /// \return Shared pointer to appropriate value if it exists, 'nullptr' otherwise - virtual ov::Any get_attribute(const std::string& name, const std::type_info& type_info) const = 0; - - virtual std::vector get_output_names() const = 0; - - virtual size_t get_output_size() const = 0; - - /// \brief Get output port type - /// - /// Current API assumes that output port has only one output type. - /// If decoder supports multiple types for specified port, it shall throw general - /// exception - /// - /// \param port_name Port name for the node - /// - /// \return Type of specified output port - virtual ov::element::Type get_out_port_type(const std::string& port_name) const = 0; - - virtual std::string get_op_type() const = 0; -}; - -/// Keep necessary data for a single node in the original FW graph to facilitate -/// conversion process in the rules code. -class NodeContext { - const DecoderBase& decoder; - const NamedInputs& name_map; - -public: - NodeContext(const DecoderBase& _decoder, const NamedInputs& _name_map) : decoder(_decoder), name_map(_name_map) {} - - /// Returns node attribute by name. Returns 'def' value if attribute does not exist - template - T get_attribute(const std::string& name, const T& def) const { - auto res = decoder.get_attribute(name, typeid(T)); - if (!res.empty()) { - return res.as(); - } else { - return def; - } - } - - template - T get_attribute(const std::string& name) const { - auto res = decoder.get_attribute(name, typeid(T)); - FRONT_END_GENERAL_CHECK(!res.empty(), "Attribute with name '", name, "' does not exist"); - return res.as(); - } - - template - bool has_attribute(const std::string& name) const { - return !decoder.get_attribute(name, typeid(T)).empty(); - } - - /// Detects if there is at least one input attached with a given name - bool has_ng_input(const std::string& name) const { - auto found = name_map.find(name); - if (found != name_map.end()) - return !found->second.empty(); - return false; - } - - /// Returns exactly one input with a given name; throws if there is no inputs or - /// there are more than one input - Output get_ng_input(const std::string& name) const { - FRONT_END_GENERAL_CHECK(name_map.at(name).size() == 1); - return name_map.at(name).at(0); - } - - /// Returns all inputs with a given name - OutputVector get_ng_inputs(const std::string& name) const { - return name_map.at(name); - } - - /// Returns all inputs in order they appear in map. This is used for FrameworkNode - /// creation - OutputVector get_all_ng_inputs() const { - OutputVector res; - for (const auto& entry : name_map) { - res.insert(res.end(), entry.second.begin(), entry.second.end()); - } - return res; - } - - std::vector get_output_names() const { - return decoder.get_output_names(); - } - - ov::element::Type get_out_port_type(const std::string& port_name) const { - return decoder.get_out_port_type(port_name); - } - - std::string get_op_type() const { - return decoder.get_op_type(); - } - - NamedOutputs default_single_output_mapping(const std::shared_ptr& node, - const std::vector& required_paddle_out_names) const; -}; - -inline NamedOutputs NodeContext::default_single_output_mapping( - const std::shared_ptr& node, - const std::vector& required_paddle_out_names) const { - NamedOutputs named_outputs; - const auto& outputs = node->outputs(); - const auto& paddle_op_output_names = this->get_output_names(); - FRONT_END_GENERAL_CHECK(outputs.size() == 1, "OV node must have exactly one output"); - for (const auto& paddle_name : paddle_op_output_names) { - if (std::find(required_paddle_out_names.begin(), required_paddle_out_names.end(), paddle_name) != - required_paddle_out_names.end()) - named_outputs[paddle_name] = {outputs[0]}; - } - return named_outputs; -} - -} // namespace paddle -} // namespace frontend -} // namespace ov diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp index 0c3f50f4c70..c8fd8982341 100644 --- a/src/frontends/tensorflow/src/decoder_proto.cpp +++ b/src/frontends/tensorflow/src/decoder_proto.cpp @@ -148,7 +148,11 @@ void DecoderProto::get_input_node(size_t input_port_idx, auto delim_pos = producer_port_name.find(':'); if (delim_pos != std::string::npos) { producer_name = producer_port_name.substr(0, delim_pos); - producer_output_port_index = std::stoi(producer_port_name.substr(delim_pos)); + auto port_id = producer_port_name.substr(delim_pos + 1); + FRONT_END_GENERAL_CHECK(!port_id.empty() && std::all_of(port_id.begin(), port_id.end(), ::isdigit), + "Port id is not specified or not a number. Value: ", + port_id); + producer_output_port_index = std::stoi(port_id); return; } producer_name = producer_port_name; diff --git a/src/frontends/tensorflow/src/node_context.hpp b/src/frontends/tensorflow/src/node_context.hpp deleted file mode 100644 index 170cce7ec2f..00000000000 --- a/src/frontends/tensorflow/src/node_context.hpp +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once -#include - -#include "exceptions.hpp" -#include "place.hpp" -#include "tensor.pb.h" -#include "types.pb.h" - -namespace ov { -namespace frontend { -namespace tensorflow { - -using InPortName = size_t; -using OutPortName = size_t; -using NamedOutputs = std::map; -using NamedInputs = std::map; - -/// Keep necessary data for a single node in the original FW graph to facilitate -/// conversion process in the rules code. -class NodeContext { - const DecoderBase& m_decoder; - const NamedInputs& m_name_map; - -public: - NodeContext(const DecoderBase& decoder, const NamedInputs& name_map) : m_decoder(decoder), m_name_map(name_map) {} - - /// Returns node attribute by name. Returns 'def' value if attribute does not exist - template - T get_attribute(const std::string& name, const T& def) const { - auto res = m_decoder.get_attribute(name, typeid(T)); - if (!res.empty()) { - return res.as(); - } - return def; - } - - /// Returns node attribute by name - template - T get_attribute(const std::string& name) const { - auto res = m_decoder.get_attribute(name, typeid(T)); - FRONT_END_GENERAL_CHECK(!res.empty(), "Attribute with name '", name, "' does not exist"); - return res.as(); - } - - /// Check if an attribute of a given name exists - template - bool has_attribute(const std::string& name) const { - return !m_decoder.get_attribute(name, typeid(T)).empty(); - } - - /// Detects if there is at least one input attached with a given name - bool has_input(const size_t& port_index) const { - auto found = m_name_map.find(port_index); - if (found != m_name_map.end()) - return !found->second.empty(); - return false; - } - - /// Returns exactly one input with a given name; throws if there is no inputs or - /// there are more than one input - Output get_input(const size_t& port_index) const { - FRONT_END_GENERAL_CHECK(m_name_map.at(port_index).size() == 1); - return m_name_map.at(port_index).at(0); - } - - /// Returns all inputs with a given name - OutputVector get_inputs(const size_t& port_index) const { - return m_name_map.at(port_index); - } - - /// Returns all inputs in order they appear in map. This is used for FrameworkNode - /// creation - OutputVector get_all_inputs() const { - OutputVector res; - for (const auto& entry : m_name_map) { - res.insert(res.end(), entry.second.begin(), entry.second.end()); - } - return res; - } - - /// Get a number of inputs - size_t get_input_size() const { - return m_name_map.size(); - } - - /// Get operation type - std::string get_op_type() const { - return m_decoder.get_op_type(); - } - - /// Get a node name - std::string get_name() const { - return m_decoder.get_op_name(); - } - - /// Get a decoder - const DecoderBase* get_decoder() const { - return &m_decoder; - } -}; - -} // namespace tensorflow -} // namespace frontend -} // namespace ov From 1fa5d4476973a11ccdfdd92cd9c91e2eaff34638 Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Mon, 21 Feb 2022 11:30:24 +0300 Subject: [PATCH 023/310] [CPU] WA for MergeTransposeAndReorder after conv + sum (#10466) --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 19 +++++- .../include/fuse_transpose_reorder.hpp | 5 ++ .../src/fuse_transpose_reorder.cpp | 59 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 110cc0f41aa..fadac2d363e 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -1862,9 +1862,26 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); auto isSuitableParentNode = [](MKLDNNNodePtr node) { + // WA: to avoid broken memory pointer for conv + sum + auto prevNodeIsConvSum = [](MKLDNNNodePtr node) -> bool { + const auto parent = node->getParentEdgesAtPort(0)[0]->getParent(); + if (parent->getType() == Convolution) { + for (const auto& fusedNode : parent->getFusedWith()) { + if (fusedNode->getAlgorithm() == EltwiseAdd) { + const auto addNode = std::dynamic_pointer_cast(fusedNode); + if (addNode && addNode->isSpecialConvolutionAddFusing()) { + return true; + } + } + } + } + return false; + }; + return node->getType() == Transpose && node->getChildEdges().size() == 1 - && !node->isDynamicNode(); // TODO [DS]: enable for dynamic shapes when inPlace in the dynamic case is available (CVS-74863) + && !node->isDynamicNode() // TODO [DS]: enable for dynamic shapes when inPlace in the dynamic case is available (CVS-74863) + && !prevNodeIsConvSum(node); }; auto isSuitableChildNode = [](MKLDNNNodePtr node) { diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp b/src/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp index a9b59b362f3..eaf6172159a 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/include/fuse_transpose_reorder.hpp @@ -46,4 +46,9 @@ protected: void CreateGraph() override; }; +class FuseTransposeAndReorderTest3 : public FuseTransposeAndReorderTest { +protected: + void CreateGraph() override; +}; + } // namespace SubgraphTestsDefinitions diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp index 4135c439ce5..908925821cc 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp @@ -238,6 +238,65 @@ TEST_P(FuseTransposeAndReorderTest2, CompareWithRefs) { INSTANTIATE_TEST_SUITE_P(smoke_Basic, FuseTransposeAndReorderTest2, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName); +/* FuseTransposeAndReorderTest3 graph + Parameter + \ + \ + Convolution (nhwc) + \ + \ Parameter + \ / + Add + | + Transpose (0,2,3,1) + | + Result +*/ + +void FuseTransposeAndReorderTest3::CreateGraph() { + IE_ASSERT(inputShape.size() == 4); + + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); + + auto memFmt = nhwc; + ngraph::op::PadType padType = ngraph::op::PadType::SAME_UPPER; + InferenceEngine::SizeVector kernel{3, 3}, stride{1, 1}, dilation{1, 1}; + std::vector padBegin{0, 0}, padEnd{0, 0}; + size_t convOutChannels = 32; + + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + IE_ASSERT(inputShape[1] >= 8 && (inputShape[1] % 8 == 0)); + + auto convolutionNode = ngraph::builder::makeConvolution(params.front(), ngPrc, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels); + convolutionNode->get_rt_info() = makeCPUInfo({memFmt}, {memFmt}, {}); + + auto sndAddIn = std::make_shared(ngPrc, convolutionNode->get_output_shape(0)); + params.push_back(sndAddIn); + auto add = std::make_shared(convolutionNode->output(0), sndAddIn); + + auto order = std::vector{0, 2, 3, 1}; + auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {order.size()}, order); + auto transpose = std::make_shared(add, constOrder); + transpose->get_rt_info() = makeCPUInfo({memFmt}, {memFmt}, {}); + + ngraph::ResultVector results{std::make_shared(transpose)}; + function = std::make_shared(results, params, "TransposeReorder"); +} + +TEST_P(FuseTransposeAndReorderTest3, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + Run(); + CheckTransposeCount(1); +} + +const auto convSumTranposeParams = ::testing::Combine(::testing::Values(SizeVector{1, 16, 32, 35}), + ::testing::Values(Precision::FP32) +); + +INSTANTIATE_TEST_SUITE_P(smoke_Basic, FuseTransposeAndReorderTest3, convSumTranposeParams, FuseTransposeAndReorderTest::getTestCaseName); + TEST(smoke_Basic, FuseDynamicTransposeAndReorderTest) { auto model = ov::builder::preprocess::create_preprocess_1input(ov::element::u8, ov::PartialShape{1, 3, 224, 224}); auto p = ov::preprocess::PrePostProcessor(model); From 430e898c339f5f0ad29e2e5fe2317bec98401ae8 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Mon, 21 Feb 2022 09:37:17 +0100 Subject: [PATCH 024/310] Add bf16, f64, i4, u4, i16, u16 types to Equal's evaluate (#10508) * Add f64 type to Equal's evaluate Required by t2t-vit models. Ticket: 79610. * add also i16 u16 because prior_box tests fail with "Check eval_status failed at" * code style * add i4, u4, bf16 to equal's evaluate --- src/core/src/op/equal.cpp | 8 +++++++- src/core/src/validation_util.cpp | 3 ++- src/core/tests/constant_folding.cpp | 20 +++++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/core/src/op/equal.cpp b/src/core/src/op/equal.cpp index 0fada0dc463..4ea2ccef238 100644 --- a/src/core/src/op/equal.cpp +++ b/src/core/src/op/equal.cpp @@ -35,14 +35,20 @@ bool evaluate_equal(const HostTensorPtr& arg0, out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean); switch (arg0->get_element_type()) { NGRAPH_TYPE_CASE(evaluate_equal, boolean, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, i4, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, i8, arg0, arg1, out, broadcast_spec); - NGRAPH_TYPE_CASE(evaluate_equal, u8, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, i16, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, i32, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, i64, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, u4, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, u8, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, u16, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, u32, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, u64, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, bf16, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, f16, arg0, arg1, out, broadcast_spec); NGRAPH_TYPE_CASE(evaluate_equal, f32, arg0, arg1, out, broadcast_spec); + NGRAPH_TYPE_CASE(evaluate_equal, f64, arg0, arg1, out, broadcast_spec); default: rc = false; break; diff --git a/src/core/src/validation_util.cpp b/src/core/src/validation_util.cpp index 762b1b33fd8..7f4667791ca 100644 --- a/src/core/src/validation_util.cpp +++ b/src/core/src/validation_util.cpp @@ -1223,7 +1223,8 @@ bool are_equal(const HostTensorPtr& lhs, const HostTensorPtr& rhs, size_t max_el return false; auto mask = std::make_shared(element::boolean, lhs_shape); const auto& param = std::make_shared(lhs_et, lhs_shape); - op::v1::Equal(param, param, ngraph::op::AutoBroadcastType::NUMPY).evaluate({mask}, {lhs, rhs}); + bool eval_status = op::v1::Equal(param, param, ngraph::op::AutoBroadcastType::NUMPY).evaluate({mask}, {lhs, rhs}); + OPENVINO_ASSERT(eval_status); auto equal = op::Constant(mask).cast_vector(); return std::all_of(equal.begin(), equal.end(), [](bool i) { return i; diff --git a/src/core/tests/constant_folding.cpp b/src/core/tests/constant_folding.cpp index 9f8e3395393..9ffb9b2fd85 100644 --- a/src/core/tests/constant_folding.cpp +++ b/src/core/tests/constant_folding.cpp @@ -283,6 +283,11 @@ TEST(constant_folding, constant_unary_binary) { auto j = make_shared(element::i8, Shape{2}, values_j); auto k = make_shared(element::u8, Shape{2}, values_k); auto doubles = make_shared(element::f64, Shape{2}, std::vector{4.0, 9.0}); + auto doubles2 = make_shared(element::f64, Shape{2}, std::vector{4.0, 1.0}); + auto shorts = make_shared(element::i16, Shape{3}, std::vector{14, -3, -3}); + auto shorts2 = make_shared(element::i16, Shape{1}, std::vector{-3}); + auto unsigned_shorts = make_shared(element::u16, Shape{3}, std::vector{14, 300, 14}); + auto unsigned_shorts2 = make_shared(element::u16, Shape{1}, std::vector{300}); auto add = make_shared(a, b); auto sub = make_shared(a, b); @@ -312,6 +317,10 @@ TEST(constant_folding, constant_unary_binary) { auto doubles_sqrt = make_shared(doubles); auto sub_int8 = make_shared(j, j); auto sub_uint8 = make_shared(k, k); + auto equal_doubles = make_shared(doubles, doubles2, op::AutoBroadcastType::NUMPY); + auto equal_shorts = make_shared(shorts, shorts2, op::AutoBroadcastType::NUMPY); + auto equal_unsigned_shorts = + make_shared(unsigned_shorts, unsigned_shorts2, op::AutoBroadcastType::NUMPY); auto neg_sqrt = make_shared(c); @@ -342,7 +351,10 @@ TEST(constant_folding, constant_unary_binary) { logical_xor_autob_numpy, doubles_sqrt, sub_int8, - sub_uint8}, + sub_uint8, + equal_doubles, + equal_shorts, + equal_unsigned_shorts}, ParameterVector{}); auto func_error = make_shared(NodeVector{neg_sqrt}, ParameterVector{}); @@ -378,6 +390,9 @@ TEST(constant_folding, constant_unary_binary) { vector doubles_sqrt_expected{2.0, 3.0}; vector sub_int8_expected{0, 0}; vector sub_uint8_expected{0, 0}; + vector equal_doubles_expected{1, 0}; + vector equal_shorts_expected{0, 1, 1}; + vector equal_unsigned_shorts_expected{0, 1, 0}; ASSERT_EQ(get_result_constant(func, 0), add_expected); ASSERT_EQ(get_result_constant(func, 1), sub_expected); @@ -407,6 +422,9 @@ TEST(constant_folding, constant_unary_binary) { ASSERT_EQ(get_result_constant(func, 25), doubles_sqrt_expected); ASSERT_EQ(get_result_constant(func, 26), sub_int8_expected); ASSERT_EQ(get_result_constant(func, 27), sub_uint8_expected); + ASSERT_EQ(get_result_constant(func, 28), equal_doubles_expected); + ASSERT_EQ(get_result_constant(func, 29), equal_shorts_expected); + ASSERT_EQ(get_result_constant(func, 30), equal_unsigned_shorts_expected); ASSERT_NO_THROW(pass_manager.run_passes(func_error)); } From cd77b33f3a0d4dd582b4196eef847f26d09df844 Mon Sep 17 00:00:00 2001 From: Yuan Xu Date: Mon, 21 Feb 2022 17:11:08 +0800 Subject: [PATCH 025/310] docker installation updates for 22/1 (#10341) * Add Overview page * Revert "Add Overview page" * update * update install flows * update * Update docs/install_guides/installing-openvino-docker-linux.md Co-authored-by: Ilya Naumov * Update docs/install_guides/installing-openvino-docker-linux.md Co-authored-by: Ilya Naumov * update structure * small changes * improve structure * Update docs/install_guides/installing-openvino-docker-linux.md Co-authored-by: Ilya Naumov * integrate comments * remove outdated note * Update docs/install_guides/installing-openvino-docker-windows.md Co-authored-by: Ilya Naumov * Update installing-openvino-docker-windows.md * Update docs/install_guides/installing-openvino-docker-linux.md Co-authored-by: Ilya Naumov * Update docs/install_guides/installing-openvino-docker-linux.md Co-authored-by: Ilya Naumov * Update docs/install_guides/installing-openvino-docker-linux.md Co-authored-by: Ilya Naumov * integrate comments * adding an issue: Permission Errors for `/dev/shm` * Update docs/install_guides/troubleshooting.md Co-authored-by: Ilya Naumov * update comments * fix mistake * fix mistake * fix a link Co-authored-by: Ilya Naumov --- .../installing-openvino-docker-linux.md | 371 ++++++------------ .../installing-openvino-docker-windows.md | 188 +++++---- 2 files changed, 227 insertions(+), 332 deletions(-) diff --git a/docs/install_guides/installing-openvino-docker-linux.md b/docs/install_guides/installing-openvino-docker-linux.md index 3ea708abbd3..c40e36b8069 100644 --- a/docs/install_guides/installing-openvino-docker-linux.md +++ b/docs/install_guides/installing-openvino-docker-linux.md @@ -1,68 +1,65 @@ -# Install Intel® Distribution of OpenVINO™ toolkit for Linux* from a Docker* Image {#openvino_docs_install_guides_installing_openvino_docker_linux} +# Install Intel® Distribution of OpenVINO™ toolkit for Linux from a Docker Image {#openvino_docs_install_guides_installing_openvino_docker_linux} -The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The Intel® Distribution of OpenVINO™ toolkit includes the Intel® Deep Learning Deployment Toolkit. +This guide provides steps on creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Linux and using the image on different devices. -This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Linux* and its further usage. +## System Requirements -## System Requirements +@sphinxdirective +.. tab:: Target Operating Systems -**Target Operating Systems** + * Ubuntu 18.04 long-term support (LTS), 64-bit + * Ubuntu 20.04 long-term support (LTS), 64-bit + * Red Hat Enterprise Linux 8, 64-bit -- Ubuntu\* 18.04 long-term support (LTS), 64-bit -- Ubuntu\* 20.04 long-term support (LTS), 64-bit -- CentOS\* 7 -- Red Hat\* Enterprise Linux* 8 (64 bit) +.. tab:: Host Operating Systems -**Host Operating Systems** + * Linux + * Windows Subsystem for Linux 2 (WSL2) on CPU or GPU + * macOS on CPU only + + To launch a Linux image on WSL2 when trying to run inferences on a GPU, make sure that the following requirements are met: -- Linux + - Only Windows 10 with 21H2 update or above installed and Windows 11 are supported. + - Intel GPU driver on Windows host with version 30.0.100.9684 or above need be installed. Please see [this article](https://www.intel.com/content/www/us/en/artificial-intelligence/harness-the-power-of-intel-igpu-on-your-machine.html#articleparagraph_983312434) for more details. + - From 2022.1 release, the Docker images contain preinstalled recommended version of OpenCL Runtime with WSL2 support. -## Prebuilt images +@endsphinxdirective -Prebuilt images are available on: +## Installation Flow + +There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs: +* Use a prebuilt image. Do the following steps: + 1. Get a prebuilt image from provided sources. + 2. Run the image on different devices. To run inferences on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, configure the Docker image first before you run the image. + 3. (Optional) Run samples in the Docker image. +* If you want to customize your image, you can also build a Docker image manually by using the following steps: + 1. Prepare a Dockerfile. + 2. Configure the Docker image. + 3. Run the image on different devices. + 4. (Optional) Run samples in the Docker image. + +## Getting a Prebuilt Image from Provided Sources + +You can find prebuilt images on: - [Docker Hub](https://hub.docker.com/u/openvino) -- [Red Hat* Quay.io](https://quay.io/organization/openvino) -- [Red Hat* Ecosystem Catalog](https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3) +- [Red Hat Quay.io](https://quay.io/organization/openvino) +- [Red Hat Ecosystem Catalog (runtime image)](https://catalog.redhat.com/software/containers/intel/openvino-runtime/606ff4d7ecb5241699188fb3) +- [Red Hat Ecosystem Catalog (development image)](https://catalog.redhat.com/software/containers/intel/openvino-dev/613a450dc9bc35f21dc4a1f7) +- [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino) -## Build a Docker* Image +## Preparing a Dockerfile -You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). -The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. -You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of Docker containers with Intel® Distribution of OpenVINO™ toolkit. You can find device specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile below. +You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the the Intel® Distribution of OpenVINO™ toolkit. +You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of Docker containers with OpenVINO. -## Use Docker* Image for CPU +## Configuring the Image for Different Devices -- Kernel reports the same information for all containers as for native application, for example, CPU, memory information. -- All instructions that are available to host process available for process in container, including, for example, AVX2, AVX512. No restrictions. -- Docker\* does not use virtualization or emulation. The process in Docker* is just a regular Linux process, but it is isolated from external world on kernel level. Performance penalty is small. +If you want to run inferences on a CPU or Intel® Neural Compute Stick 2, no extra configuration is needed. Go to Running the image on different devices for the next step. -### Configure a Docker* Image for CPU +### Configuring Docker Image for GPU -You don't need to do specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile for CPU. You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). - -### Run the Docker* Image for CPU - -Run the image with the following command: - -```sh -docker run -it --rm -``` - -## Use a Docker* Image for GPU - -### Configure a Docker* Image for GPU - -> **NOTE**: Only Intel® integrated graphics are supported. - -**Prerequisites:** - -- GPU is not available in container by default, you must attach it to the container. -- Kernel driver must be installed on the host. -- Intel® OpenCL™ runtime package must be included into the container. -- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md). - -To configure a OpenVINO Docker* image with access to GPU, add the following commands to a Dockerfile: +By default, the distributed Docker image for OpenVINO has the the recommended version of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver for the operating system installed inside. If you want to build an image with a custom version of OpenCL Runtime included, you need to modify the Dockerfile using the lines below (the 19.41.14441 version is used as an example) and build the image manually: **Ubuntu 18.04/20.04**: @@ -84,22 +81,7 @@ RUN apt-get update && \ rm /tmp/opencl ``` -or you can use the installation script `install_NEO_OCL_driver.sh` if you previously installed OpenVINO in the Dockerfile, where `INTEL_OPENCL` is the variable to store the default version of Intel® Graphics Compute Runtime for OpenCL™ Driver: - -```sh -WORKDIR /tmp/opencl -RUN useradd -ms /bin/bash -G video,users openvino && \ - chown openvino -R /home/openvino - -# Please use `20.35.17767` for 10th generation Intel® Core™ processor (formerly Ice Lake) or 11th generation Intel® Core™ processor (formerly Tiger Lake) -ARG INTEL_OPENCL=19.41.14441 - -WORKDIR ${INTEL_OPENVINO_DIR}/install_dependencies -RUN ./install_NEO_OCL_driver.sh --no_numa -y --install_driver ${INTEL_OPENCL} && \ - rm -rf /var/lib/apt/lists/* -``` - -**CentOS 7/RHEL 8**: +**RHEL 8**: ```sh WORKDIR /tmp/opencl @@ -108,7 +90,7 @@ RUN useradd -ms /bin/bash -G video,users openvino && \ RUN groupmod -g 44 video RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm && \ - yum update -y && yum install -y ocl-icd ocl-icd-devel && \ + yum update -y && yum install -y ocl-icd ocl-icd-devel && \ yum clean all && rm -rf /var/cache/yum && \ curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-gmmlib-19.3.2-1.el7.x86_64.rpm/download -o intel-gmmlib-19.3.2-1.el7.x86_64.rpm && \ curl -L https://sourceforge.net/projects/intel-compute-runtime/files/19.41.14441/centos-7/intel-gmmlib-devel-19.3.2-1.el7.x86_64.rpm/download -o intel-gmmlib-devel-19.3.2-1.el7.x86_64.rpm && \ @@ -122,245 +104,142 @@ RUN yum update -y && yum install -y https://dl.fedoraproject.org/pub/epel/epel-r yum remove -y epel-release ``` -or you can use the installation script `install_NEO_OCL_driver.sh` if you previously installed OpenVINO in the Dockerfile, where `INTEL_OPENCL` is the variable to store the default version of Intel® Graphics Compute Runtime for OpenCL™ Driver: +### Configuring Docker Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs -```sh -WORKDIR /tmp/opencl -RUN useradd -ms /bin/bash -G video,users openvino && \ - chown openvino -R /home/openvino -RUN groupmod -g 44 video +> **NOTE**: When building the Docker image, create a user in the Dockerfile that has the same UID (User Identifier) and GID (Group Identifier) as the user which that runs hddldaemon on the host, and then run the application in the Docker image with this user. This step is necessary to run the container as a non-root user. -# Please use `20.35.17767` for 10th generation Intel® Core™ processor (formerly Ice Lake) or 11th generation Intel® Core™ processor (formerly Tiger Lake) -ARG INTEL_OPENCL=19.41.14441 +To use the Docker container for inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, do the following steps: -WORKDIR ${INTEL_OPENVINO_DIR}/install_dependencies -RUN ./install_NEO_OCL_driver.sh --no_numa -y --install_driver ${INTEL_OPENCL} && \ - yum clean all && rm -rf /var/cache/yum && \ - yum remove -y epel-release +1. Set up the environment on the host machine to be used for running Docker. It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board. To learn how to set up the environment (the OpenVINO package or HDDL package must be pre-installed), see [Configuration guide for HDDL device](https://github.com/openvinotoolkit/docker_ci/blob/master/install_guide_vpu_hddl.md) or [Configurations for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs on Linux](installing-openvino-config-ivad-vpu.md). +2. Run `hddldaemon` on the host in a separate terminal session using the following command: + ```sh + $HDDL_INSTALL_DIR/hddldaemon + ``` + +## Running the Docker Image on Different Devices + +### Running the Image on CPU + +Run the Docker image with the following command: +``` +docker run -it --rm ``` -### Run the Docker* Image for GPU +Note the following things: + +- Kernel reports the same information for all containers as for native application, for example, CPU, memory information. +- All instructions that are available to host process available for process in container, including, for example, AVX2, AVX512. No restrictions. +- Docker does not use virtualization or emulation. The process in Docker is just a regular Linux process, but it is isolated from external world on kernel level. Performance loss is minor. + + +### Running the Image on GPU + +> **NOTE**: Only Intel® integrated graphics are supported. + +Note the following things: + +- GPU is not available in the container by default. You must attach it to the container. +- Kernel driver must be installed on the host. +- In the container, non-root user must be in the `video` and `render` groups. To add a user to the render group, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md). To make GPU available in the container, attach the GPU to the container using `--device /dev/dri` option and run the container: -```sh -docker run -it --rm --device /dev/dri -``` -> **NOTE**: If your host system is Ubuntu 20, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md). +* Ubuntu 18 or RHEL 8: + ```sh + docker run -it --rm --device /dev/dri + ``` + > **NOTE**: If your host system is Ubuntu 20, follow the [Configuration Guide for the Intel® Graphics Compute Runtime for OpenCL™ on Ubuntu* 20.04](https://github.com/openvinotoolkit/docker_ci/blob/master/configure_gpu_ubuntu20.md). -## Use a Docker* Image for Intel® Neural Compute Stick 2 +* WSL2: + ```sh + docker run -it --rm --device /dev/dxg --volume /usr/lib/wsl:/usr/lib/wsl + ``` + > **NOTE**: To launch a Linux image on WSL2, make sure that the additional requirements in System Requirements are met. -### Configure and Run the Docker* Image for Intel® Neural Compute Stick 2 -**Known limitations:** +### Running the Image on Intel® Neural Compute Stick 2 -- Intel® Neural Compute Stick 2 device changes its VendorID and DeviceID during execution and each time looks for a host system as a brand new device. It means it cannot be mounted as usual. -- UDEV events are not forwarded to the container by default it does not know about device reconnection. -- Only one device per host is supported. - -Use one of the following options as **Possible solutions for Intel® Neural Compute Stick 2:** - -#### Option 1 - -1. Get rid of UDEV by rebuilding `libusb` without UDEV support in the Docker* image (add the following commands to a `Dockerfile`): - - **Ubuntu 18.04/20.04**: -```sh -ARG BUILD_DEPENDENCIES="autoconf \ - automake \ - build-essential \ - libtool \ - unzip \ - udev" -RUN apt-get update && \ - apt-get install -y --no-install-recommends ${BUILD_DEPENDENCIES} && \ - rm -rf /var/lib/apt/lists/* - -WORKDIR /opt -RUN curl -L https://github.com/libusb/libusb/archive/v1.0.22.zip --output v1.0.22.zip && \ - unzip v1.0.22.zip - -WORKDIR /opt/libusb-1.0.22 -RUN ./bootstrap.sh && \ - ./configure --disable-udev --enable-shared && \ - make -j4 - -WORKDIR /opt/libusb-1.0.22/libusb -RUN /bin/mkdir -p '/usr/local/lib' && \ - /bin/bash ../libtool --mode=install /usr/bin/install -c libusb-1.0.la '/usr/local/lib' && \ - /bin/mkdir -p '/usr/local/include/libusb-1.0' && \ - /usr/bin/install -c -m 644 libusb.h '/usr/local/include/libusb-1.0' && \ - /bin/mkdir -p '/usr/local/lib/pkgconfig' - -WORKDIR /opt/libusb-1.0.22/ -RUN /usr/bin/install -c -m 644 libusb-1.0.pc '/usr/local/lib/pkgconfig' && \ - cp /opt/intel/openvino_2022/runtime/3rdparty/97-myriad-usbboot.rules /etc/udev/rules.d/ && \ - ldconfig -``` - - **CentOS 7**: -```sh -ARG BUILD_DEPENDENCIES="autoconf \ - automake \ - libtool \ - unzip \ - udev" - -RUN yum update -y && yum install -y ${BUILD_DEPENDENCIES} && \ - yum group install -y "Development Tools" && \ - yum clean all && rm -rf /var/cache/yum - -WORKDIR /opt -RUN curl -L https://github.com/libusb/libusb/archive/v1.0.22.zip --output v1.0.22.zip && \ - unzip v1.0.22.zip && rm -rf v1.0.22.zip - -WORKDIR /opt/libusb-1.0.22 -RUN ./bootstrap.sh && \ - ./configure --disable-udev --enable-shared && \ - make -j4 - -WORKDIR /opt/libusb-1.0.22/libusb -RUN /bin/mkdir -p '/usr/local/lib' && \ - /bin/bash ../libtool --mode=install /usr/bin/install -c libusb-1.0.la '/usr/local/lib' && \ - /bin/mkdir -p '/usr/local/include/libusb-1.0' && \ - /usr/bin/install -c -m 644 libusb.h '/usr/local/include/libusb-1.0' && \ - /bin/mkdir -p '/usr/local/lib/pkgconfig' && \ - printf "\nexport LD_LIBRARY_PATH=\${LD_LIBRARY_PATH}:/usr/local/lib\n" >> /opt/intel/openvino_2022/setupvars.sh - -WORKDIR /opt/libusb-1.0.22/ -RUN /usr/bin/install -c -m 644 libusb-1.0.pc '/usr/local/lib/pkgconfig' && \ - cp /opt/intel/openvino_2022/runtime/3rdparty/97-myriad-usbboot.rules /etc/udev/rules.d/ && \ - ldconfig -``` -2. Run the Docker* image: +Run the Docker image with the following command: ```sh docker run -it --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb ``` -#### Option 2 -Run container in the privileged mode, enable the Docker network configuration as host, and mount all devices to the container: +While the command above is not working, you can also run container in the privileged mode, enable the Docker network configuration as host, and mount all devices to the container. Run the following command: ```sh docker run -it --rm --privileged -v /dev:/dev --network=host ``` -> **NOTES**: -> -> - It is not secure. -> - Conflicts with Kubernetes* and other tools that use orchestration and private networks may occur. -## Use a Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs +> **NOTE**: This option is not recommended, as conflicts with Kubernetes and other tools that use orchestration and private networks may occur. Please use it with caution and only for troubleshooting purposes. -### Configure Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs -To use the Docker container for inference on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs: +#### Known Limitations -1. Set up the environment on the host machine, that is going to be used for running Docker*. -It is required to execute `hddldaemon`, which is responsible for communication between the HDDL plugin and the board. -To learn how to set up the environment (the OpenVINO package or HDDL package must be pre-installed), see [Configuration guide for HDDL device](https://github.com/openvinotoolkit/docker_ci/blob/master/install_guide_vpu_hddl.md) or [Configuration Guide for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs](installing-openvino-config-ivad-vpu.md). -2. Prepare the Docker* image (add the following commands to a Dockerfile). - - **Ubuntu 18.04**: -```sh -WORKDIR /tmp -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - libboost-filesystem1.65-dev \ - libboost-thread1.65-dev \ - libjson-c3 libxxf86vm-dev && \ - rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* -``` - - **Ubuntu 20.04**: -```sh -WORKDIR /tmp -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - libboost-filesystem-dev \ - libboost-thread-dev \ - libjson-c4 \ - libxxf86vm-dev && \ - rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* -``` - - **CentOS 7**: -```sh -WORKDIR /tmp -RUN yum update -y && yum install -y \ - boost-filesystem \ - boost-thread \ - boost-program-options \ - boost-system \ - boost-chrono \ - boost-date-time \ - boost-regex \ - boost-atomic \ - json-c \ - libXxf86vm-devel && \ - yum clean all && rm -rf /var/cache/yum -``` -3. Run `hddldaemon` on the host in a separate terminal session using the following command: -```sh -$HDDL_INSTALL_DIR/hddldaemon -``` +- Intel® Neural Compute Stick 2 device changes its VendorID and DeviceID during execution and each time looks for a host system as a brand new device. It means it cannot be mounted as usual. +- UDEV events are not forwarded to the container by default, and it does not know about the device reconnection. The prebuilt Docker images and provided Dockerfiles include `libusb` rebuilt without UDEV support. +- Only one NCS2 device connected to the host can be used when running inference in a container. -### Run the Docker* Image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs -To run the built Docker* image for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, use the following command: +### Running the Image on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs +> **NOTE**: To run inferences on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs, make sure that you have configured the Docker image first. + +Use the following command: ```sh docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp ``` -> **NOTES**: -> -> - The device `/dev/ion` needs to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel. -> - Since separate inference tasks share the same HDDL service communication interface (the service creates mutexes and a socket file in `/var/tmp`), `/var/tmp` needs to be mounted and shared among them. - -In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu (Input-Output Memory Management Unit) incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve, use the following command: - +If your application runs inference of a network with a big size (>4MB) of input/output, the HDDL plugin will use shared memory. In this case, you must mount `/dev/shm` as volume: ```sh -docker run -it --rm --net=host -v /var/tmp:/var/tmp –-ipc=host +docker run -it --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v /dev/shm:/dev/shm ``` -> **NOTES**: -> -> - When building Docker images, create a user in the Dockerfile that has the same UID(User Identifier) and GID(Group Identifier) as the user which runs hddldaemon on the host. -> - Run the application in the Docker image with this user. -> - Alternatively, you can start hddldaemon with the root user on host, but this approach is not recommended. +Note the following things: +* The device `/dev/ion` needs to be shared to be able to use ion buffers among the plugin, `hddldaemon` and the kernel. +* Since separate inference tasks share the same HDDL service communication interface (the service creates mutexes and a socket file in `/var/tmp`), `/var/tmp` needs to be mounted and shared among them. -### Run Demos in the Docker* Image -To run the Classification Demo Using SqueezeNet on a specific inference device, run the following commands with the root privileges (additional third-party dependencies will be installed): +#### If the ion Driver is Not Enabled + +In some cases, the ion driver is not enabled (for example, due to a newer kernel version or iommu (Input-Output Memory Management Unit) incompatibility). `lsmod | grep myd_ion` returns empty output. To resolve this issue, use the following command: +```sh +docker run -it --rm --ipc=host --net=host -v /var/tmp:/var/tmp +``` +If that still does not solve the issue, try starting `hddldaemon` with the root user on host. However, this approach is not recommended. Please use with caution. + + +## Running Samples in Docker Image + +To run the `Hello Classification Sample` on a specific inference device, run the following commands: **CPU**: ```sh -docker run -itu root:root --rm -/bin/bash -c "apt update && apt install sudo && samples/scripts/run_sample_squeezenet.sh -d CPU" +docker run -it --rm +/bin/bash -c "cd ~ && omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 /opt/intel/openvino/samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp CPU" ``` **GPU**: ```sh -docker run -itu root:root --rm --device /dev/dri:/dev/dri -/bin/bash -c "apt update && apt install sudo && samples/scripts/run_sample_squeezenet.sh -d GPU" +docker run -itu root:root --rm --device /dev/dri:/dev/dri +/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp GPU" ``` **MYRIAD**: ```sh docker run -itu root:root --rm --device-cgroup-rule='c 189:* rmw' -v /dev/bus/usb:/dev/bus/usb -/bin/bash -c "apt update && apt install sudo && samples/scripts/run_sample_squeezenet.sh -d MYRIAD" +/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp MYRIAD" ``` **HDDL**: ```sh -docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -/bin/bash -c "apt update && apt install sudo && samples/scripts/run_sample_squeezenet.sh -d HDDL" +docker run -itu root:root --rm --device=/dev/ion:/dev/ion -v /var/tmp:/var/tmp -v /dev/shm:/dev/shm +/bin/bash -c "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -O https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && umask 000 && python3 samples/python/hello_classification/hello_classification.py public/googlenet-v1/FP16/googlenet-v1.xml car_1.bmp HDDL" ``` -## Troubleshooting - -If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Run_Locally) topic. - ## Additional Resources - [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. - - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) - - Intel® Neural Compute Stick 2 Get Started: [https://software.intel.com/en-us/neural-compute-stick/get-started](https://software.intel.com/en-us/neural-compute-stick/get-started) diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md index de667d1eeb7..3910eb09489 100644 --- a/docs/install_guides/installing-openvino-docker-windows.md +++ b/docs/install_guides/installing-openvino-docker-windows.md @@ -1,112 +1,115 @@ -# Install Intel® Distribution of OpenVINO™ toolkit for Windows* from Docker* Image {#openvino_docs_install_guides_installing_openvino_docker_windows} +# Install Intel® Distribution of OpenVINO™ toolkit for Windows from Docker Image {#openvino_docs_install_guides_installing_openvino_docker_windows} -The Intel® Distribution of OpenVINO™ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNN), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The Intel® Distribution of OpenVINO™ toolkit includes the Intel® Deep Learning Deployment Toolkit. +This guide provides steps for creating a Docker image with Intel® Distribution of OpenVINO™ toolkit for Windows and using the Docker image on different devices. -This guide provides device specifics for a Docker* image creation with Intel® Distribution of OpenVINO™ toolkit for Windows* and its further usage. +## System Requirements -## System Requirements +@sphinxdirective +.. tab:: Target Operating Systems -**Target Operating Systems** + * Windows Server Core OS + * Windows base OS -- Windows Server Core* +.. tab:: Host Operating Systems -**Host Operating Systems** + * Windows 10, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions + * Windows Server 2016 or higher + +.. tab:: Additional Requirements for GPU -- Windows 10*, 64-bit Pro, Enterprise or Education (1607 Anniversary Update, Build 14393 or later) editions -- Windows Server* 2016 or higher + To use GPU Acceleration in Windows containers, make sure that the following requirements for Windows host, OpenVINO and Docker are met: -## Prebuilt Images + - [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration): + - The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher. + - The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported. + - The container host must be running Docker Engine 19.03 or higher. + - The container host must have GPU running display drivers of version WDDM 2.5 or higher. + - [GPU requirement for OpenVINO](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU): Intel Graphics Driver for Windows of version 15.65 or higher. + - [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container): + - Windows host and container version tags must match. + - [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility) -Prebuilt images are available on [Docker Hub](https://hub.docker.com/u/openvino). +@endsphinxdirective -## Build a Docker* Image -You can use [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your setting via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci). -The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can find device specific steps to configure an Intel® Distribution of OpenVINO™ toolkit Dockerfile below. +## Installation Flow -## Configure and Run the Docker* Image for CPU +There are two ways to install OpenVINO with Docker. You can choose either of them according to your needs: +* Use a prebuilt image. Do the following steps: + 1. Get a prebuilt image from provided sources. + 2. Run the image on different devices. +* If you want to customize your image, you can also build a Docker image manually by using the following steps: + 1. Prepare a Dockerfile. + 2. Configure the Docker image. + 3. Run the image on different devices. -## Install Additional Dependencies +## Getting a Prebuilt Image from Provided Sources -### Install CMake +You can find prebuilt images on: -To add CMake to the image, add the following commands to the Dockerfile: +- [Docker Hub](https://hub.docker.com/u/openvino) +- [Azure Marketplace](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/intel_corporation.openvino) -```bat -RUN powershell.exe -Command ` - Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; ` - Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; ` - Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force +## Preparing a Dockerfile -RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%" -``` +You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the the Intel® Distribution of OpenVINO™ toolkit. -In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image: +## Configuring the Docker Image for Different Devices -```bat -docker build . -t ` ---build-arg HTTPS_PROXY= -``` +### Installing Additional Dependencies for CPU -### Install Microsoft Visual Studio* Build Tools +#### Installing CMake -You can add Microsoft Visual Studio Build Tools* to a Windows* OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools. -Microsoft Visual Studio Build Tools* are licensed as a supplement your existing Microsoft Visual Studio* license. -Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio* and Windows* licenses. + To add CMake to the image, add the following commands to the Dockerfile: + ```bat + RUN powershell.exe -Command ` + Invoke-WebRequest -URI https://cmake.org/files/v3.14/cmake-3.14.7-win64-x64.msi -OutFile %TMP%\\cmake-3.14.7-win64-x64.msi ; ` + Start-Process %TMP%\\cmake-3.14.7-win64-x64.msi -ArgumentList '/quiet /norestart' -Wait ; ` + Remove-Item %TMP%\\cmake-3.14.7-win64-x64.msi -Force -To add MSBuild 2019 to the image, add the following commands to the Dockerfile: + RUN SETX /M PATH "C:\Program Files\CMake\Bin;%PATH%" + ``` -```bat -RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe + In case of proxy issues, please add the `ARG HTTPS_PROXY` and `-Proxy %%HTTPS_PROXY%` settings to the `powershell.exe` command to the Dockerfile. Then build a Docker image: + ```bat + docker build . -t ` + --build-arg HTTPS_PROXY= + ``` + +#### Installing Microsoft Visual Studio Build Tools -RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache ` - --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" ` - --add Microsoft.VisualStudio.Workload.MSBuildTools ` - --add Microsoft.VisualStudio.Workload.UniversalBuildTools ` - --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended ` - --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 ` - --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` - --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` - --remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned -``` + You can add Microsoft Visual Studio Build Tools to a Windows OS Docker image using the [offline](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019) or [online](https://docs.microsoft.com/en-us/visualstudio/install/build-tools-container?view=vs-2019) installers for Build Tools. + + Microsoft Visual Studio Build Tools are licensed as a supplement your existing Microsoft Visual Studio license. + + Any images built with these tools should be for your personal use or for use in your organization in accordance with your existing Visual Studio and Windows licenses. -In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019). + To add MSBuild 2019 to the image, add the following commands to the Dockerfile: + ```bat + RUN powershell.exe -Command Invoke-WebRequest -URI https://aka.ms/vs/16/release/vs_buildtools.exe -OutFile %TMP%\\vs_buildtools.exe -## Run the Docker* Image for CPU + RUN %TMP%\\vs_buildtools.exe --quiet --norestart --wait --nocache ` + --installPath "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools" ` + --add Microsoft.VisualStudio.Workload.MSBuildTools ` + --add Microsoft.VisualStudio.Workload.UniversalBuildTools ` + --add Microsoft.VisualStudio.Workload.VCTools --includeRecommended ` + --remove Microsoft.VisualStudio.Component.Windows10SDK.10240 ` + --remove Microsoft.VisualStudio.Component.Windows10SDK.10586 ` + --remove Microsoft.VisualStudio.Component.Windows10SDK.14393 ` + --remove Microsoft.VisualStudio.Component.Windows81SDK || IF "%ERRORLEVEL%"=="3010" EXIT 0 && powershell set-executionpolicy remotesigned + ``` -To start the interactive session, run the following command allows inference on the CPU: + In case of proxy issues, please use the [offline installer for Build Tools](https://docs.microsoft.com/en-us/visualstudio/install/create-an-offline-installation-of-visual-studio?view=vs-2019). -```bat -docker run -it --rm -``` +### Configuring the Image for GPU -If you want to try some demos then run image with the root privileges (some additional 3-rd party dependencies will be installed): - -```bat -docker run -itu ContainerAdministrator --rm cmd /S /C "cd samples\scripts && run_sample_squeezenet.bat -d CPU" -``` - -## Configure and Run the Docker* Image for GPU - -GPU Acceleration in Windows containers feature requires to meet Windows host, OpenVINO toolkit and Docker* requirements: - -- [Windows requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/gpu-acceleration): - - The container host must be running Windows Server 2019 or Windows 10 of version 1809 or higher. - - The container base image must be `mcr.microsoft.com/windows:1809` or higher. Windows Server Core and Nano Server container images are not currently supported. - - The container host must be running Docker Engine 19.03 or higher. - - The container host must have GPU running display drivers of version WDDM 2.5 or higher. -- [OpenVINO™ GPU requirement](https://docs.openvino.ai/latest/openvino_docs_install_guides_installing_openvino_windows.html#Install-GPU): - - Intel Graphics Driver for Windows of version 15.65 or higher. -- [Docker isolation mode requirement](https://docs.microsoft.com/en-us/virtualization/windowscontainers/manage-containers/hyperv-container): - - Windows host and container version tags must match. - - [Windows host and container isolation process support](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility) - -## Build a Docker* Image for Your Host System +> **NOTE**: Since GPU is not supported in prebuilt images or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure +the Additional Requirements for GPU in System Requirements are met, and do the following steps to build the image manually. 1. Reuse one of [available Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles). You can also use your own Dockerfile. 2. Check your [Windows host and container isolation process compatibility](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/version-compatibility). -3. Find the appropriate Windows container base image on [DockerHub*](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction. - For example, in [openvino_c_dev_2021.dockerfile](https://github.com/openvinotoolkit/docker_ci/blob/master/dockerfiles/winserver2019/openvino_c_dev_2021.dockerfile), change: +3. Find the appropriate Windows container base image on [DockerHub](https://hub.docker.com/_/microsoft-windows) and set up your host/container version in the `FROM` Dockerfile instruction. + For example, in the `openvino_c_dev_.dockerfile`, change: ```bat FROM mcr.microsoft.com/windows/servercore:ltsc2019 AS ov_base ``` @@ -114,7 +117,7 @@ GPU Acceleration in Windows containers feature requires to meet Windows host, Op ```bat FROM mcr.microsoft.com/windows:20H2 ``` -4. Build the Docker image +4. Build the Docker image by running the following command: ```bat docker build --build-arg package_url= -f -t . ``` @@ -124,7 +127,25 @@ GPU Acceleration in Windows containers feature requires to meet Windows host, Op copy C:\Windows\System32\OpenCL.dll C:\tmp ``` -## Run the Docker* Image for GPU +## Running the Docker Image on Different Devices + +### Running the Image on CPU + +To start the interactive session, run the following command: +```bat +docker run -it --rm +``` + +If you want to try some samples, run the image with the following command: +```bat +docker run -it --rm +cmd /S /C "omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp CPU" +``` + +### Running the Image on GPU + +> **NOTE**: Since GPU is not supported in prebuilt images or [default Dockerfiles](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles), you must make sure +the Additional Requirements for GPU in System Requirements are met, and configure and build the image manually before you can run inferences on a GPU. 1. To try inference on a GPU, run the image with the following command: ```bat @@ -138,18 +159,13 @@ GPU Acceleration in Windows containers feature requires to meet Windows host, Op ```bat copy C:\tmp\OpenCL.dll C:\Windows\System32\ && reg add "HKLM\SOFTWARE\Khronos\OpenCL\Vendors" /v "C:\Windows\System32\DriverStore\FileRepository\iigd_dch.inf_amd64_518f2921ba495409\ocl\bin\x64\intelocl64.dll" /t REG_DWORD /d 0 ``` -3. For example, run the `run_sample_squeezenet` demo with the command below: + For example, run the `Hello Classification Python` sample with the following command: ```bat - cd samples\scripts && run_sample_squeezenet.bat -d GPU + omz_downloader --name googlenet-v1 --precisions FP16 && omz_converter --name googlenet-v1 --precision FP16 && curl -kO https://storage.openvinotoolkit.org/data/test_data/images/car_1.bmp && python samples\python\hello_classification\hello_classification.py public\googlenet-v1\FP16\googlenet-v1.xml car_1.bmp GPU ``` - > **NOTE**: Addittional third-party dependencies will be installed. -## Troubleshooting - -If you got proxy issues, please setup proxy settings for Docker. See the Proxy section in the [Install the DL Workbench from Docker Hub* ](@ref workbench_docs_Workbench_DG_Run_Locally) topic. ## Additional Resources - [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) for Intel® Distribution of OpenVINO™ toolkit. The Framework can generate a Dockerfile, build, test, and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can reuse available Dockerfiles, add your layer and customize the image of OpenVINO™ for your needs. - - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) From 828d9d810a82ff5fa4189ffd4003861d8321619b Mon Sep 17 00:00:00 2001 From: Yuan Xu Date: Mon, 21 Feb 2022 17:15:15 +0800 Subject: [PATCH 026/310] updating apt, yum, conda installation for 22/1 (#10219) * Add Overview page * update yum installation * update apt installation * update conda installation * Revert "Add Overview page" * Update docs/install_guides/installing-openvino-apt.md Co-authored-by: Sergey Lyubimtsev * update Ubuntu version format * update as per review comments * integrate comments * update version format * add a configurations chapter * update * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-conda.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-apt.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * update comments * Update docs/install_guides/installing-openvino-apt.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * Update docs/install_guides/installing-openvino-yum.md Co-authored-by: Andrey Zaytsev * update references to OpenVINO Runtime User Guide Co-authored-by: Sergey Lyubimtsev Co-authored-by: Andrey Zaytsev --- .../install_guides/installing-openvino-apt.md | 226 ++++++++---------- .../installing-openvino-conda.md | 66 ++--- .../installing-openvino-linux.md | 6 +- .../install_guides/installing-openvino-yum.md | 191 +++++++-------- 4 files changed, 219 insertions(+), 270 deletions(-) diff --git a/docs/install_guides/installing-openvino-apt.md b/docs/install_guides/installing-openvino-apt.md index e46b34804c9..c2e45d71ad0 100644 --- a/docs/install_guides/installing-openvino-apt.md +++ b/docs/install_guides/installing-openvino-apt.md @@ -1,164 +1,144 @@ -# Install Intel® Distribution of OpenVINO™ toolkit for Linux* Using APT Repository {#openvino_docs_install_guides_installing_openvino_apt} +# Install Intel® Distribution of OpenVINO™ Toolkit for Linux Using APT Repository {#openvino_docs_install_guides_installing_openvino_apt} -This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit for Linux* distributed through the APT repository. +This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit for Linux distributed through the APT repository. -> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please, review the content inside the `/licensing` folder for more details. +From 2022.1 release, the OpenVINO™ Development Tools can only be installed via PyPI. If you want to develop or optimize your models with OpenVINO, see [Install OpenVINO Development Tools](installing-model-dev-tools.md) for detailed steps. -> **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ APT distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime). +> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please review the content inside the `/licensing` folder for more details. ## System Requirements -The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8). +The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html). **Operating Systems** -- Ubuntu 18.04.x long-term support (LTS), 64-bit -- Ubuntu 20.04.0 long-term support (LTS), 64-bit +- Ubuntu 18.04 long-term support (LTS), 64-bit +- Ubuntu 20.04 long-term support (LTS), 64-bit -## Included with Runtime Package +## Install OpenVINO Runtime -The following components are installed with the OpenVINO runtime package: +### Step 1: Set Up the OpenVINO Toolkit APT Repository -| Component | Description| -|-----------|------------| -| [OpenVINO™ Runtime](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md)| The engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications. | -| [OpenCV*](https://docs.opencv.org/master/) | OpenCV* community version compiled for Intel® hardware. | -| Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/Elements), [Tutorial](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/DL-Streamer-Tutorial). | +1. Install the GPG key for the repository -## Included with Developer Package + a. Download the [GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB](https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB). You can also use the following command: + ```sh + wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + ``` + b. Add this key to the system keyring: + ```sh + sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB + ``` + > **NOTE**: You might need to install GnuPG: `sudo apt-get install gnupg` + +2. Add the repository via the following command: -The following components are installed with the OpenVINO developer package: + * On Ubuntu 18 + ```sh + echo "deb https://apt.repos.intel.com/openvino/2022/bionic all main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2022.list + ``` + + * On Ubuntu 20 + ```sh + echo "deb https://apt.repos.intel.com/openvino/2022/focal all main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2022.list + ``` -| Component | Description| -|-----------|------------| -| [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) | This tool imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. 
Popular frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. | -| [OpenVINO™ Runtime](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) | The engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications.| -| [OpenCV*](https://docs.opencv.org/master/) | OpenCV\* community version compiled for Intel® hardware | -| [Sample Applications](../OV_Runtime_UG/Samples_Overview.md) | A set of simple console applications demonstrating how to use the Inference Engine in your applications. | -| [Demo Applications](@ref omz_demos) | A set of console applications that demonstrate how you can use the Inference Engine in your applications to solve specific use cases. | -| Additional Tools | A set of tools to work with your models including [Accuracy Checker utility](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool Guide](@ref pot_README), [Model Downloader](@ref omz_tools_downloader) and other | -| [Documentation for Pre-Trained Models ](@ref omz_models_group_intel) | Documentation for the pre-trained models available in the [Open Model Zoo repo](https://github.com/openvinotoolkit/open_model_zoo). | -| Deep Learning Streamer (DL Streamer) | Streaming analytics framework, based on GStreamer\*, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/Elements), [Tutorial](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/DL-Streamer-Tutorial). | +3. Update the list of packages via the update command: + ```sh + sudo apt update + ``` + +4. Verify that the APT repository is properly set up. Run the apt-cache command to see a list of all available OpenVINO packages and components: + ```sh + apt-cache search openvino + ``` + +### Step 2: Install OpenVINO Runtime Using the APT Package Manager -## Install Packages +OpenVINO will be installed in: `/opt/intel/openvino_..` + +A symlink will be created: `/opt/intel/openvino_` -### Set up the OpenVINO™ Toolkit APT Repository - -#### Install the GPG key for the Repository - -1. Download the public key from [https://apt.repos.intel.com/openvino/2021/GPG-PUB-KEY-INTEL-OPENVINO-2021](https://apt.repos.intel.com/openvino/2021/GPG-PUB-KEY-INTEL-OPENVINO-2021) and save it to a file. -2. Add this key to the system keyring: -```sh -sudo apt-key add -``` -> **NOTE**: You might need to install GnuPG: `sudo apt-get install gnupg` - -3. Check the list of APT keys running the following command: -```sh -sudo apt-key list -``` - -#### Add the Repository +#### To Install the Latest Version Run the following command: ```sh -echo "deb https://apt.repos.intel.com/openvino/2021 all main" | sudo tee /etc/apt/sources.list.d/intel-openvino-2021.list +sudo apt install openvino ``` -#### Update the List of Packages - -Run the `update` command: -```sh -sudo apt update -``` - -#### Verify that the APT Repository is Properly Set Up - -Run the `apt-cache` command to see a list of all available OpenVINO packages and components: -```sh -apt-cache search openvino -``` -See the example commands below: - -* **Runtime Packages** - - On Ubuntu 18.04: - ```sh - sudo apt-cache search intel-openvino-runtime-ubuntu18 - ``` - On Ubuntu 20.04: - ```sh - sudo apt-cache search intel-openvino-runtime-ubuntu20 - ``` -* **Developer Packages** - - On Ubuntu 18.04: - ```sh - sudo apt-cache search intel-openvino-dev-ubuntu18 - ``` - On Ubuntu 20.04: - ```sh - sudo apt-cache search intel-openvino-dev-ubuntu20 - ``` - -### Install Runtime or Developer Packages using the APT Package Manager -Intel® OpenVINO™ Toolkit will be installed in: `/opt/intel/openvino_..` - -A symlink will be created: `/opt/intel/openvino_` - #### To Install a Specific Version -1. Get a list of OpenVINO packages available for installation: -```sh -sudo apt-cache search intel-openvino-runtime-ubuntu18 -``` -2. Install a specific version of an OpenVINO package: -```sh -sudo apt install intel-openvino--ubuntu-.. -``` -See the example commands below: -* **Runtime Package**
- On Ubuntu 18.04: - ```sh - sudo apt install intel-openvino-runtime-ubuntu18-2021.1.105 - ``` - On Ubuntu 20.04: - ```sh - sudo apt install intel-openvino-runtime-ubuntu20-2021.1.105 - ``` -* **Developer Package**
- On Ubuntu 18.04: - ```sh - sudo apt install intel-openvino-dev-ubuntu18-2021.1.105 - ``` - On Ubuntu 20.04: - ```sh - sudo apt install intel-openvino-dev-ubuntu20-2021.1.105 - ``` -#### To check for Installed Packages and Versions +1. Get a list of OpenVINO packages available for installation: + ```sh + sudo apt-cache search openvino + ``` +2. Install a specific version of an OpenVINO package: + ```sh + sudo apt install openvino-.. + ``` + For example: + ```sh + sudo apt install openvino-2022.1.0 + ``` -To get a list of installed OpenVINO packages: +#### To Check for Installed Packages and Versions +Run the following command: ```sh apt list --installed | grep openvino ``` -#### To Uninstall a Specific Version +#### To Uninstall the Latest Version -To uninstall a specific package: +Run the following command: ```sh -sudo apt autoremove intel-openvino--ubuntu-.. +sudo apt autoremove openvino ``` +#### To Uninstall a Specific Version -**Additional Resources** +Run the following command: +```sh +sudo apt autoremove openvino-.. +``` -- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit). -- OpenVINO™ toolkit online documentation: [https://docs.openvino.ai](https://docs.openvino.ai). +### Step 3 (Optional): Install OpenCV from APT + +OpenCV is necessary to run C++ demos from Open Model Zoo. Some C++ samples and demos also use OpenCV as a dependency. OpenVINO provides a package to install OpenCV from APT: + +#### To Install the Latest Version of OpenCV + +Run the following command: +```sh +sudo apt install openvino-opencv +``` + +#### To Install a Specific Version of OpenCV + +Run the following command: +```sh +sudo apt install openvino-opencv-.. +``` + +### Step 4 (Optional): Install Software Dependencies + +After you have installed OpenVINO Runtime, if you decided to [install OpenVINO Development Tools](installing-model-dev-tools.md), make sure that you install external software dependencies first. + +Refer to Install External Software Dependencies for detailed steps. + + +## Configurations for Non-CPU Devices + +If you are using Intel® Processor Graphics, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs or Intel® Neural Compute Stick 2, please follow the configuration steps in [Configurations for GPU](configurations-for-intel-gpu.md), [Configurations for VPU](installing-openvino-config-ivad-vpu.md) or [Configurations for NCS2](configurations-for-ncs2.md) accordingly. + + +## Additional Resources + +- Intel® Distribution of OpenVINO™ toolkit home page: . +- OpenVINO™ toolkit online documentation: . - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). -- [OpenVINO™ Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md). +- [OpenVINO Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../OV_Runtime_UG/Samples_Overview.md). - For IoT Libraries & Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). - diff --git a/docs/install_guides/installing-openvino-conda.md b/docs/install_guides/installing-openvino-conda.md index 76bb49956ed..a6e7b55e5ef 100644 --- a/docs/install_guides/installing-openvino-conda.md +++ b/docs/install_guides/installing-openvino-conda.md @@ -1,57 +1,37 @@ -# Install Intel® Distribution of OpenVINO™ toolkit from Anaconda* Cloud {#openvino_docs_install_guides_installing_openvino_conda} +# Install Intel® Distribution of OpenVINO™ toolkit from Anaconda Cloud {#openvino_docs_install_guides_installing_openvino_conda} -This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit distributed through the Anaconda* Cloud. +This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit for Linux distributed through the Anaconda Cloud. -> **NOTE**: Only runtime packages are available from Anaconda* Cloud. - -## Introduction - -OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applications and solutions that solve a variety of tasks including emulation of human vision, automatic speech recognition, natural language processing, recommendation systems, and many others. Based on latest generations of artificial neural networks, including Convolutional Neural Networks (CNNs), recurrent and attention-based networks, the toolkit extends computer vision and non-vision workloads across Intel® hardware, maximizing performance. It accelerates applications with high-performance, AI and deep learning inference deployed from edge to cloud. - -The Intel® Distribution of OpenVINO™ toolkit\*: -- Enables CNN-based deep learning inference on the edge -- Supports heterogeneous execution across Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidius™ VPUs -- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels -The **runtime package** includes the following components installed by default: - -| Component | Description | -|-----------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [OpenVINO™ Runtime](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) | This is the engine that runs the deep learning model. It includes a set of libraries for an easy inference integration into your applications. | +From 2022.1 release, the OpenVINO™ Development Tools can only be installed via PyPI. If you want to develop or optimize your models with OpenVINO, see [Install OpenVINO Development Tools](installing-model-dev-tools.md) for detailed steps. ## System Requirements **Software** - - [Anaconda* distribution](https://www.anaconda.com/products/individual/) + - [Anaconda distribution](https://www.anaconda.com/products/individual/) **Operating Systems** -| Supported Operating System | [Python* Version (64-bit)](https://www.python.org/) | +| Supported Operating System | [Python Version (64-bit)](https://www.python.org/) | | :------------------------------------------------------------| :---------------------------------------------------| -| Ubuntu* 18.04 long-term support (LTS), 64-bit | 3.6, 3.7 | -| Ubuntu* 20.04 long-term support (LTS), 64-bit | 3.6, 3.7 | -| CentOS* 7.6, 64-bit | 3.6, 3.7 | -| macOS* 10.15.x | 3.6, 3.7 | -| Windows 10*, 64-bit | 3.6, 3.7 | +| Ubuntu 18.04 long-term support (LTS), 64-bit | 3.6, 3.7, 3.8, 3.9 | +| Ubuntu 20.04 long-term support (LTS), 64-bit | 3.6, 3.7, 3.8, 3.9 | +| Red Hat Enterprise Linux 8, 64-bit | 3.6, 3.7, 3.8, 3.9 | +| macOS 10.15 | 3.6, 3.7, 3.8, 3.9 | +| Windows 10, 64-bit | 3.6, 3.7, 3.8, 3.9 | -## Install the Runtime Package using the Anaconda* Package Manager +## Install OpenVINO Runtime Using the Anaconda Package Manager -1. Set up the Anaconda* environment:  +1. Set up the Anaconda environment (taking Python 3.7 for example):  ```sh conda create --name py37 python=3.7 - ``` - ```sh conda activate py37 ``` 2. Update Anaconda environment to the latest version: ```sh conda update --all ``` -3. Install pre-requisites: - ```sh - conda install numpy - ``` -4. Install the Intel® Distribution of OpenVINO™ Toolkit: +3. Install the Intel® Distribution of OpenVINO™ toolkit: - Ubuntu* 20.04 ```sh conda install openvino-ie4py-ubuntu20 -c intel @@ -60,28 +40,28 @@ The **runtime package** includes the following components installed by default: ```sh conda install openvino-ie4py-ubuntu18 -c intel ``` - - CentOS* 7.6 + - Red Hat Enterprise Linux 8, 64-bit ```sh - conda install openvino-ie4py-centos7 -c intel + conda install openvino-ie4py-rhel8 -c intel ``` - - Windows* 10 and macOS* + - Windows 10 and macOS ```sh conda install openvino-ie4py -c intel ``` -5. Verify the package is installed: +4. Verify the package is installed: ```sh - python -c "from openvino.inference_engine import IECore" + python -c "from openvino.runtime import Core" ``` If installation was successful, you will not see any error messages (no console output). Now you can start developing your application. + ## Additional Resources -- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit). -- OpenVINO™ toolkit online documentation: [https://docs.openvino.ai](https://docs.openvino.ai). +- Intel® Distribution of OpenVINO™ toolkit home page: . +- OpenVINO™ toolkit online documentation: . - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). -- [OpenVINO™ Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md). +- [OpenVINO Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../OV_Runtime_UG/Samples_Overview.md). -- Intel® Distribution of OpenVINO™ toolkit Anaconda* home page: [https://anaconda.org/intel/openvino-ie4py](https://anaconda.org/intel/openvino-ie4py) - +- Intel® Distribution of OpenVINO™ toolkit Anaconda home page: [https://anaconda.org/intel/openvino-ie4py](https://anaconda.org/intel/openvino-ie4py) diff --git a/docs/install_guides/installing-openvino-linux.md b/docs/install_guides/installing-openvino-linux.md index fb8a7e3bf04..4ea4225234e 100644 --- a/docs/install_guides/installing-openvino-linux.md +++ b/docs/install_guides/installing-openvino-linux.md @@ -1,4 +1,4 @@ -# Install and Configure Intel® Distribution of OpenVINO™ toolkit for Linux {#openvino_docs_install_guides_installing_openvino_linux} +# Install and Configure Intel® Distribution of OpenVINO™ Toolkit for Linux {#openvino_docs_install_guides_installing_openvino_linux} > **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter are not part of the installer. These tools are now only available on [pypi.org](https://pypi.org/project/openvino-dev/). @@ -7,8 +7,8 @@ @sphinxdirective .. tab:: Operating Systems - * Ubuntu 18.04.x long-term support (LTS), 64-bit - * Ubuntu 20.04.x long-term support (LTS), 64-bit + * Ubuntu 18.04 long-term support (LTS), 64-bit + * Ubuntu 20.04 long-term support (LTS), 64-bit .. note:: Since the OpenVINO™ 2022.1 release, CentOS 7.6, 64-bit is not longer supported. diff --git a/docs/install_guides/installing-openvino-yum.md b/docs/install_guides/installing-openvino-yum.md index dc17b4792aa..eea119a72b5 100644 --- a/docs/install_guides/installing-openvino-yum.md +++ b/docs/install_guides/installing-openvino-yum.md @@ -1,144 +1,133 @@ -# Install Intel® Distribution of OpenVINO™ toolkit for Linux* Using YUM Repository {#openvino_docs_install_guides_installing_openvino_yum} +# Install Intel® Distribution of OpenVINO™ Toolkit for Linux Using YUM Repository {#openvino_docs_install_guides_installing_openvino_yum} -This guide provides installation steps for the Intel® Distribution of OpenVINO™ toolkit for Linux* distributed through the YUM repository. +This guide provides installation steps for Intel® Distribution of OpenVINO™ toolkit for Linux distributed through the YUM repository. -> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please, review the content inside the `/licensing` folder for more details. +From 2022.1 release, the OpenVINO™ Development Tools can only be installed via PyPI. If you want to develop or optimize your models with OpenVINO, see [Install OpenVINO Development Tools](installing-model-dev-tools.md) for detailed steps. -> **NOTE**: Intel® Graphics Compute Runtime for OpenCL™ is not a part of OpenVINO™ YUM distribution. You can install it from the [Intel® Graphics Compute Runtime for OpenCL™ GitHub repo](https://github.com/intel/compute-runtime). - -> **NOTE**: Only runtime packages are available via the YUM repository. +> **IMPORTANT**: By downloading and using this container and the included software, you agree to the terms and conditions of the [software license agreements](https://software.intel.com/content/dam/develop/external/us/en/documents/intel-openvino-license-agreements.pdf). Please review the content inside the `/licensing` folder for more details. ## System Requirements -The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html#inpage-nav-8). +The complete list of supported hardware is available in the [Release Notes](https://software.intel.com/content/www/us/en/develop/articles/openvino-relnotes.html). -**Operating Systems** +**Operating systems** -- CentOS 7.6, 64-bit +- Red Hat Enterprise Linux 8, 64-bit -## Included with Runtime Package +## Install OpenVINO Runtime -The following components are installed with the OpenVINO runtime package: +### Step 1: Set Up the Repository -| Component | Description| -|-----------|------------| -| [OpenVINO™ Runtime](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md)| The engine that runs a deep learning model. It includes a set of libraries for an easy inference integration into your applications. | -| [OpenCV*](https://docs.opencv.org/master/) | OpenCV* community version compiled for Intel® hardware. | -| Deep Learning Stream (DL Streamer) | Streaming analytics framework, based on GStreamer, for constructing graphs of media analytics components. For the DL Streamer documentation, see [DL Streamer Samples](@ref gst_samples_README), [API Reference](https://openvinotoolkit.github.io/dlstreamer_gst/), [Elements](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/Elements), [Tutorial](https://github.com/openvinotoolkit/dlstreamer_gst/wiki/DL-Streamer-Tutorial). | - -## Install Packages - -## Set up the Repository - -> **NOTE**: You must be logged in as root to set up and install the repository. -
-Configure YUM with the OpenVINO repository to install OpenVINO. You have two options for this, using the `yum-config-manager` or manually by creating a text file and pointing YUM to the file. - -* **OPTION 1:** Import the `.repo` file using the `yum-config-manager`: - 1. `yum-utils` must be installed on your system. If it’s not currently installed, run the command: - ```sh - sudo yum install yum-utils +1. Create the YUM repo file in the `/tmp` directory as a normal user: ``` - 2. Add repository using the `yum-config-manager`: - ```sh - sudo yum-config-manager --add-repo https://yum.repos.intel.com/openvino/2021/setup/intel-openvino-2021.repo - ``` - 3. Import the gpg public key for the repository: - ```sh - sudo rpm --import https://yum.repos.intel.com/openvino/2021/setup/RPM-GPG-KEY-INTEL-OPENVINO-2021 - ``` - -* **OPTION 2:** Create the repository file manually: - - 1. Create the YUM repo file in the /tmp directory as a normal user: - ```sh - tee > /tmp/openvino-2021.repo << EOF - [intel-openvino-2021] - name=Intel(R) Distribution of OpenVINO 2021 - baseurl=https://yum.repos.intel.com/openvino/2021 + tee > /tmp/openvino-2022.repo << EOF + [OpenVINO] + name=Intel(R) Distribution of OpenVINO 2022 + baseurl=https://yum.repos.intel.com/openvino/2022 enabled=1 gpgcheck=1 - gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-INTEL-OPENVINO-2021 + repo_gpgcheck=1 + gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB EOF ``` - 2. Move the newly created openvino-2021.repo file to the YUM configuration directory /etc/yum.repos.d: - ```sh - sudo mv /tmp/openvino-2021.repo /etc/yum.repos.d - ``` - 3. Import the gpg public key for the repository: +2. Move the new openvino-2022.repo file to the YUM configuration directory `/etc/yum.repos.d`: ```sh - sudo rpm --import https://yum.repos.intel.com/openvino/2021/setup/RPM-GPG-KEY-INTEL-OPENVINO-2021 + sudo mv /tmp/openvino-2022.repo /etc/yum.repos.d ``` +3. Verify that the new repo is properly setup by running the following command: + ```sh + yum repolist | grep -i openvino + ``` + You will see the available list of packages. -### Verify that the new repo is properly setup -Run the following command: -```sh -yum repolist | grep -i openvino + +To list available OpenVINO packages, use the following command: +``` +yum list 'openvino*' ``` -Results: -```sh -intel-openvino-2021 Intel(R) Distribution of OpenVINO 2021 -``` - -### To list available OpenVINO packages -Use the following command: -```sh -yum list intel-openvino* -``` +### Step 2: Install OpenVINO Runtime Using the YUM Package Manager ---- - -## Install Runtime Packages Using the YUM Package Manager +Intel® Distribution of OpenVINO™ toolkit will be installed in: `/opt/intel/openvino_..` -Intel® OpenVINO will be installed in: `/opt/intel/openvino_..` -
A symlink will be created: `/opt/intel/openvino_` ---- +You can select one of the following procedures according to your need: -### To install the latest version -To install the full runtime version of the OpenVINO package: +#### To Install the Latest Version + +Run the following command: ```sh -sudo yum install intel-openvino-runtime-centos7 +sudo yum install openvino ``` ---- +#### To Install a Specific Version -### To install a specific version -To install the full runtime version of the OpenVINO package: +Run the following command: ```sh -sudo yum install intel-openvino-runtime-centos7-.. +sudo yum install openvino-.. ``` + For example: - ```sh -sudo yum install intel-openvino-runtime-centos7-2021.3.394 - ``` - ---- - -### To check for installed packages and version - -To check a specific version of an OpenVINO package: -```sh -yum list installed intel-openvino* +sudo yum install openvino-2022.1.0 ``` ---- +#### To Check for Installed Packages and Version -### To Uninstall a specific version - -To uninstall a specific full runtime package: +Run the following command: ```sh -sudo yum autoremove intel-openvino-runtime-centos-.. +yum list installed 'openvino*' ``` -**Additional Resources** -- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) -- OpenVINO™ toolkit online documentation: [https://docs.openvino.ai](https://docs.openvino.ai) +#### To Uninstall the Latest Version + +Run the following command: +```sh +sudo yum autoremove openvino +``` + +#### To Uninstall a Specific Version + +Run the following command: +```sh +sudo yum autoremove openvino-.. +``` + +### Step 3 (Optional): Install OpenCV from YUM + +OpenCV is necessary to run C++ demos from Open Model Zoo. Some C++ samples and demos also use OpenCV as a dependency. OpenVINO provides a package to install OpenCV from YUM: + +#### To Install the Latest Version of OpenCV + +Run the following command: +```sh +sudo yum install openvino-opencv +``` + +#### To Install a Specific Version of OpenCV + +Run the following command: +```sh +sudo yum install openvino-opencv-.. +``` + +### Step 4 (Optional): Install Software Dependencies + +After you have installed OpenVINO Runtime, if you decided to [install OpenVINO Model Development Tools](installing-model-dev-tools.md), make sure that you install external software dependencies first. + +Refer to Install External Software Dependencies for detailed steps. + +## Configurations for Non-CPU Devices + +If you are using Intel® Processor Graphics, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs or Intel® Neural Compute Stick 2, please follow the configuration steps in [Configurations for GPU](configurations-for-intel-gpu.md), [Configurations for VPU](installing-openvino-config-ivad-vpu.md) or [Configurations for NCS2](configurations-for-ncs2.md) accordingly. + + +## Additional Resources + +- Intel® Distribution of OpenVINO™ toolkit home page: . +- OpenVINO™ toolkit online documentation: . - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). -- [OpenVINO™ Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md). +- [OpenVINO Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide). - For more information on Sample Applications, see the [Inference Engine Samples Overview](../OV_Runtime_UG/Samples_Overview.md). - For IoT Libraries & Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). - From 206442fb19122ad700041fa429036b54a6307bd6 Mon Sep 17 00:00:00 2001 From: Jan Iwaszkiewicz Date: Mon, 21 Feb 2022 10:45:55 +0100 Subject: [PATCH 027/310] [PYTHON] Add OV Types support to parameter and constant from opsets (#10489) * Add OV Types to parameter and constant node factory, refactor tests and error handling * Fix name mismatch in docstring * Fix docs and hints --- .../python/src/openvino/runtime/exceptions.py | 10 +-- .../python/src/openvino/runtime/opset1/ops.py | 19 +++-- .../src/openvino/runtime/utils/types.py | 73 +++++++++---------- .../python/tests/test_ngraph/test_basic.py | 35 ++++++--- .../python/tests/test_ngraph/test_einsum.py | 2 +- .../python/tests/test_ngraph/test_ops.py | 24 ++++++ .../tests/test_ngraph/test_ops_fused.py | 4 +- .../python/tests/test_onnx/test_ops_unary.py | 4 +- 8 files changed, 107 insertions(+), 64 deletions(-) diff --git a/src/bindings/python/src/openvino/runtime/exceptions.py b/src/bindings/python/src/openvino/runtime/exceptions.py index 622c9d1d6d8..8a6f8760310 100644 --- a/src/bindings/python/src/openvino/runtime/exceptions.py +++ b/src/bindings/python/src/openvino/runtime/exceptions.py @@ -1,16 +1,16 @@ # Copyright (C) 2018-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -"""openvino exceptions hierarchy. All exceptions are descendants of NgraphError.""" +"""openvino exceptions hierarchy. All exceptions are descendants of OVError.""" -class NgraphError(Exception): - """Base class for Ngraph exceptions.""" +class OVError(Exception): + """Base class for OV exceptions.""" -class UserInputError(NgraphError): +class UserInputError(OVError): """User provided unexpected input.""" -class NgraphTypeError(NgraphError, TypeError): +class OVTypeError(OVError, TypeError): """Type mismatch error.""" diff --git a/src/bindings/python/src/openvino/runtime/opset1/ops.py b/src/bindings/python/src/openvino/runtime/opset1/ops.py index 347870c2c35..7507acb2a3f 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset1/ops.py @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 """Factory functions for all openvino ops.""" -from typing import Callable, Iterable, List, Optional, Set, Union +from typing import List, Optional, Union import numpy as np from functools import partial -from openvino.runtime import Node, PartialShape, Shape +from openvino.runtime import Node, PartialShape, Type from openvino.runtime.op import Constant, Parameter, tensor_iterator from openvino.runtime.opset_utils import _get_node_factory from openvino.runtime.utils.decorators import binary_op, nameable_op, unary_op @@ -312,7 +312,11 @@ def concat(nodes: List[NodeInput], axis: int, name: Optional[str] = None) -> Nod @nameable_op -def constant(value: NumericData, dtype: NumericType = None, name: Optional[str] = None) -> Constant: +def constant( + value: NumericData, + dtype: Union[NumericType, Type] = None, + name: Optional[str] = None, +) -> Constant: """Create a Constant node from provided value. :param value: One of: array of values or scalar to initialize node with. @@ -1544,7 +1548,6 @@ def matmul( :param transpose_b: should the second matrix be transposed returns MatMul operation node """ - print("transpose_a", transpose_a, "transpose_b", transpose_b) return _get_node_factory_opset1().create( "MatMul", as_nodes(data_a, data_b), {"transpose_a": transpose_a, "transpose_b": transpose_b} ) @@ -1792,11 +1795,13 @@ def pad( @nameable_op def parameter( - shape: TensorShape, dtype: NumericType = np.float32, name: Optional[str] = None + shape: TensorShape, dtype: Union[NumericType, Type] = np.float32, name: Optional[str] = None ) -> Parameter: """Return an openvino Parameter object.""" - element_type = get_element_type(dtype) - return Parameter(element_type, PartialShape(shape)) + return Parameter(get_element_type(dtype) + if isinstance(dtype, (type, np.dtype)) + else dtype, + PartialShape(shape)) @binary_op diff --git a/src/bindings/python/src/openvino/runtime/utils/types.py b/src/bindings/python/src/openvino/runtime/utils/types.py index 94d68cf9d91..450b8bf4192 100644 --- a/src/bindings/python/src/openvino/runtime/utils/types.py +++ b/src/bindings/python/src/openvino/runtime/utils/types.py @@ -8,9 +8,8 @@ from typing import List, Union import numpy as np -from openvino.runtime.exceptions import NgraphTypeError -from openvino.runtime import Node, Shape, Output -from openvino.runtime import Type as NgraphType +from openvino.runtime.exceptions import OVTypeError +from openvino.runtime import Node, Shape, Output, Type from openvino.runtime.op import Constant log = logging.getLogger(__name__) @@ -22,19 +21,19 @@ ScalarData = Union[int, float] NodeInput = Union[Node, NumericData] openvino_to_numpy_types_map = [ - (NgraphType.boolean, np.bool), - (NgraphType.f16, np.float16), - (NgraphType.f32, np.float32), - (NgraphType.f64, np.float64), - (NgraphType.i8, np.int8), - (NgraphType.i16, np.int16), - (NgraphType.i32, np.int32), - (NgraphType.i64, np.int64), - (NgraphType.u8, np.uint8), - (NgraphType.u16, np.uint16), - (NgraphType.u32, np.uint32), - (NgraphType.u64, np.uint64), - (NgraphType.bf16, np.uint16), + (Type.boolean, np.bool), + (Type.f16, np.float16), + (Type.f32, np.float32), + (Type.f64, np.float64), + (Type.i8, np.int8), + (Type.i16, np.int16), + (Type.i32, np.int32), + (Type.i64, np.int64), + (Type.u8, np.uint8), + (Type.u16, np.uint16), + (Type.u32, np.uint32), + (Type.u64, np.uint64), + (Type.bf16, np.uint16), ] openvino_to_numpy_types_str_map = [ @@ -53,23 +52,23 @@ openvino_to_numpy_types_str_map = [ ] -def get_element_type(data_type: NumericType) -> NgraphType: +def get_element_type(data_type: NumericType) -> Type: """Return an ngraph element type for a Python type or numpy.dtype.""" if data_type is int: log.warning("Converting int type of undefined bitwidth to 32-bit ngraph integer.") - return NgraphType.i32 + return Type.i32 if data_type is float: log.warning("Converting float type of undefined bitwidth to 32-bit ngraph float.") - return NgraphType.f32 + return Type.f32 - ng_type = next( - (ng_type for (ng_type, np_type) in openvino_to_numpy_types_map if np_type == data_type), None + ov_type = next( + (ov_type for (ov_type, np_type) in openvino_to_numpy_types_map if np_type == data_type), None ) - if ng_type: - return ng_type + if ov_type: + return ov_type - raise NgraphTypeError("Unidentified data type %s", data_type) + raise OVTypeError("Unidentified data type %s", data_type) def get_element_type_str(data_type: NumericType) -> str: @@ -82,27 +81,27 @@ def get_element_type_str(data_type: NumericType) -> str: log.warning("Converting float type of undefined bitwidth to 32-bit ngraph float.") return "f32" - ng_type = next( - (ng_type for (ng_type, np_type) in openvino_to_numpy_types_str_map if np_type == data_type), + ov_type = next( + (ov_type for (ov_type, np_type) in openvino_to_numpy_types_str_map if np_type == data_type), None, ) - if ng_type: - return ng_type + if ov_type: + return ov_type - raise NgraphTypeError("Unidentified data type %s", data_type) + raise OVTypeError("Unidentified data type %s", data_type) -def get_dtype(ngraph_type: NgraphType) -> np.dtype: - """Return a numpy.dtype for an ngraph element type.""" +def get_dtype(openvino_type: Type) -> np.dtype: + """Return a numpy.dtype for an openvino element type.""" np_type = next( - (np_type for (ng_type, np_type) in openvino_to_numpy_types_map if ng_type == ngraph_type), + (np_type for (ov_type, np_type) in openvino_to_numpy_types_map if ov_type == openvino_type), None, ) if np_type: return np.dtype(np_type) - raise NgraphTypeError("Unidentified data type %s", ngraph_type) + raise OVTypeError("Unidentified data type %s", openvino_type) def get_ndarray(data: NumericData) -> np.ndarray: @@ -121,11 +120,11 @@ def get_shape(data: NumericData) -> TensorShape: return [] -def make_constant_node(value: NumericData, dtype: NumericType = None) -> Constant: - """Return an ngraph Constant node with the specified value.""" +def make_constant_node(value: NumericData, dtype: Union[NumericType, Type] = None) -> Constant: + """Return an openvino Constant node with the specified value.""" ndarray = get_ndarray(value) - if dtype: - element_type = get_element_type(dtype) + if dtype is not None: + element_type = get_element_type(dtype) if isinstance(dtype, (type, np.dtype)) else dtype else: element_type = get_element_type(ndarray.dtype) diff --git a/src/bindings/python/tests/test_ngraph/test_basic.py b/src/bindings/python/tests/test_ngraph/test_basic.py index 2adae148073..a355bb80bee 100644 --- a/src/bindings/python/tests/test_ngraph/test_basic.py +++ b/src/bindings/python/tests/test_ngraph/test_basic.py @@ -18,17 +18,19 @@ from openvino.runtime import Tensor from openvino.pyopenvino import DescriptorTensor from openvino.runtime.op import Parameter from tests.runtime import get_runtime +from openvino.runtime.utils.types import get_dtype from tests.test_ngraph.util import run_op_node def test_ngraph_function_api(): shape = [2, 2] parameter_a = ops.parameter(shape, dtype=np.float32, name="A") - parameter_b = ops.parameter(shape, dtype=np.float32, name="B") + parameter_b = ops.parameter(shape, dtype=Type.f32, name="B") parameter_c = ops.parameter(shape, dtype=np.float32, name="C") model = (parameter_a + parameter_b) * parameter_c assert parameter_a.element_type == Type.f32 + assert parameter_b.element_type == Type.f32 assert parameter_a.partial_shape == PartialShape([2, 2]) parameter_a.layout = ov.Layout("NC") assert parameter_a.layout == ov.Layout("NC") @@ -74,6 +76,17 @@ def test_ngraph_function_api(): np.uint16, np.uint32, np.uint64, + Type.f16, + Type.f32, + Type.f64, + Type.i8, + Type.i16, + Type.i32, + Type.i64, + Type.u8, + Type.u16, + Type.u32, + Type.u64, ], ) def test_simple_computation_on_ndarrays(dtype): @@ -86,17 +99,19 @@ def test_simple_computation_on_ndarrays(dtype): model = (parameter_a + parameter_b) * parameter_c computation = runtime.computation(model, parameter_a, parameter_b, parameter_c) - value_a = np.array([[1, 2], [3, 4]], dtype=dtype) - value_b = np.array([[5, 6], [7, 8]], dtype=dtype) - value_c = np.array([[2, 3], [4, 5]], dtype=dtype) - result = computation(value_a, value_b, value_c) - assert np.allclose(result, np.array([[12, 24], [40, 60]], dtype=dtype)) + np_dtype = get_dtype(dtype) if isinstance(dtype, Type) else dtype - value_a = np.array([[9, 10], [11, 12]], dtype=dtype) - value_b = np.array([[13, 14], [15, 16]], dtype=dtype) - value_c = np.array([[5, 4], [3, 2]], dtype=dtype) + value_a = np.array([[1, 2], [3, 4]], dtype=np_dtype) + value_b = np.array([[5, 6], [7, 8]], dtype=np_dtype) + value_c = np.array([[2, 3], [4, 5]], dtype=np_dtype) result = computation(value_a, value_b, value_c) - assert np.allclose(result, np.array([[110, 96], [78, 56]], dtype=dtype)) + assert np.allclose(result, np.array([[12, 24], [40, 60]], dtype=np_dtype)) + + value_a = np.array([[9, 10], [11, 12]], dtype=np_dtype) + value_b = np.array([[13, 14], [15, 16]], dtype=np_dtype) + value_c = np.array([[5, 4], [3, 2]], dtype=np_dtype) + result = computation(value_a, value_b, value_c) + assert np.allclose(result, np.array([[110, 96], [78, 56]], dtype=np_dtype)) def test_serialization(): diff --git a/src/bindings/python/tests/test_ngraph/test_einsum.py b/src/bindings/python/tests/test_ngraph/test_einsum.py index 2ace1e58079..146969282ca 100644 --- a/src/bindings/python/tests/test_ngraph/test_einsum.py +++ b/src/bindings/python/tests/test_ngraph/test_einsum.py @@ -34,7 +34,7 @@ def einsum_op_exec(input_shapes: list, equation: str, data_type: np.dtype, ng_inputs = [] np_inputs = [] for i in range(num_inputs): - input_i = np.random.random_integers(10, size=input_shapes[i]).astype(data_type) + input_i = np.random.randint(1, 10 + 1, size=input_shapes[i]).astype(data_type) np_inputs.append(input_i) ng_inputs.append(ov.parameter(input_i.shape, dtype=data_type)) diff --git a/src/bindings/python/tests/test_ngraph/test_ops.py b/src/bindings/python/tests/test_ngraph/test_ops.py index 1871f9c8730..c9eaff88ff7 100644 --- a/src/bindings/python/tests/test_ngraph/test_ops.py +++ b/src/bindings/python/tests/test_ngraph/test_ops.py @@ -482,6 +482,30 @@ def test_constant(): assert np.allclose(result, expected) +def test_constant_opset_ov_type(): + parameter_list = [] + function = Model([ov.constant(np.arange(9).reshape(3, 3), Type.f32)], parameter_list, "test") + + runtime = get_runtime() + computation = runtime.computation(function, *parameter_list) + result = computation()[0] + + expected = np.arange(9).reshape(3, 3) + assert np.allclose(result, expected) + + +def test_constant_opset_numpy_type(): + parameter_list = [] + function = Model([ov.constant(np.arange(9).reshape(3, 3), np.float32)], parameter_list, "test") + + runtime = get_runtime() + computation = runtime.computation(function, *parameter_list) + result = computation()[0] + + expected = np.arange(9).reshape(3, 3) + assert np.allclose(result, expected) + + def test_concat(): element_type = Type.f32 diff --git a/src/bindings/python/tests/test_ngraph/test_ops_fused.py b/src/bindings/python/tests/test_ngraph/test_ops_fused.py index 82a202ef358..5e9c122ebe5 100644 --- a/src/bindings/python/tests/test_ngraph/test_ops_fused.py +++ b/src/bindings/python/tests/test_ngraph/test_ops_fused.py @@ -48,7 +48,7 @@ def test_fake_quantize(): input_high_value = np.float32(23) output_low_value = np.float32(2) output_high_value = np.float32(16) - levels = np.float32(4) + levels = np.int32(4) data_shape = [1, 2, 3, 4] bound_shape = [] @@ -114,7 +114,7 @@ def test_depth_to_space(): dtype=np.float32, ) mode = "blocks_first" - block_size = np.float32(2) + block_size = np.int32(2) data_shape = [1, 4, 2, 3] parameter_data = ov.parameter(data_shape, name="Data", dtype=np.float32) diff --git a/src/bindings/python/tests/test_onnx/test_ops_unary.py b/src/bindings/python/tests/test_onnx/test_ops_unary.py index ae79735a96a..6ecc0035d6a 100644 --- a/src/bindings/python/tests/test_onnx/test_ops_unary.py +++ b/src/bindings/python/tests/test_onnx/test_ops_unary.py @@ -7,7 +7,7 @@ import onnx.mapping import pytest from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info -from openvino.runtime.exceptions import NgraphTypeError +from openvino.runtime.exceptions import OVTypeError from tests.runtime import get_runtime from tests.test_onnx.utils import get_node_model, import_onnx_model, run_model, run_node @@ -425,7 +425,7 @@ def test_cast_errors(): graph = make_graph([node], "compute_graph", input_tensors, output_tensors) model = make_model(graph, producer_name="NgraphBackend") - with pytest.raises((RuntimeError, NgraphTypeError)): + with pytest.raises((RuntimeError, OVTypeError)): import_onnx_model(model) # unsupported output tensor data type: From 5dbf2f7088db7d7d929ba9e5c92a0d3fc3d473c0 Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Mon, 21 Feb 2022 14:05:36 +0300 Subject: [PATCH 028/310] [GNA] Compact mode ordering fix (#10408) * Compact mode ordering fix * Fixed comment --- src/plugins/intel_gna/gna_graph_compiler.cpp | 8 ++++++-- src/plugins/intel_gna/gna_plugin.cpp | 9 +++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_gna/gna_graph_compiler.cpp b/src/plugins/intel_gna/gna_graph_compiler.cpp index 8b0024a0210..8cb40bb83e1 100644 --- a/src/plugins/intel_gna/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/gna_graph_compiler.cpp @@ -921,7 +921,9 @@ void GNAGraphCompiler::PoolingPrimitive(InferenceEngine::CNNLayerPtr layer) { num_data_bytes_in = num_rows * num_columns * inputs->getPrecision().size(); } - connectInput(layer, ptr_inputs, num_data_bytes_in); + auto fused_to_layer = connectInput(layer, ptr_inputs, num_data_bytes_in); + // Pooling will be fused with the previous layer and we need to use it's order id + layer->userValue.v_int = fused_to_layer.input->userValue.v_int; connectOutput(layer, ptr_outputs, num_data_bytes_out); } @@ -2056,7 +2058,9 @@ case name:\ ptr_outputs, ptr_pwl_segments_target); - connectInput(layer, ptr_inputs, num_data_bytes_in); + auto fused_to_layer = connectInput(layer, ptr_inputs, num_data_bytes_in); + // PWL will be fused with the previous layer and we need to use it's order id + layer->userValue.v_int = fused_to_layer.input->userValue.v_int; connectOutput(layer, ptr_outputs, num_data_bytes_out); if (ptr_pwl_segments_target != nullptr) { diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp index 6af143e79e9..7d095ec4793 100644 --- a/src/plugins/intel_gna/gna_plugin.cpp +++ b/src/plugins/intel_gna/gna_plugin.cpp @@ -856,7 +856,12 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { std::vector sortedNoMem; std::unordered_map> memoryPairs; // find all memory layers pairs and mark which one used as outputs + uint16_t id = 0; for (auto &layer : sortedNet) { + // set order id for layers to use it in compact mode + IE_SUPPRESS_DEPRECATED_START + layer->userValue.v_int = id++; + IE_SUPPRESS_DEPRECATED_END auto generic = dynamic_cast(layer.get()); if (generic == nullptr) { sortedNoMem.push_back(layer); @@ -909,11 +914,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) { } // Creating Layer primitives - uint16_t id = 0; for (auto & layer : sortedNoMem) { - IE_SUPPRESS_DEPRECATED_START - layer->userValue.v_int = id++; - IE_SUPPRESS_DEPRECATED_END graphCompiler.CreateLayerPrimitive(layer); } From 11bf5400188c0801b52abb16e0a64ae5c6c93794 Mon Sep 17 00:00:00 2001 From: Maksim Derbasov Date: Mon, 21 Feb 2022 14:24:27 +0300 Subject: [PATCH 029/310] Simple patch for fix random bool vector generation (#10493) * Dirty patch for fix bool generation * Bernoulli distribution for bool --- .../include/ngraph_functions/utils/data_utils.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp index 8e7dc75d6ad..ee7d6ed7184 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp +++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp @@ -35,6 +35,12 @@ generateVector(size_t vec_len, res[i] = static_cast(dist(gen)); } return res; + } else if (std::is_same()) { + std::bernoulli_distribution dist; + for (size_t i = 0; i < vec_len; i++) { + res[i] = static_cast(dist(gen)); + } + return res; } else { // chose values between this range to avoid type overrun (e.g. in case of I8 precision) std::uniform_int_distribution dist(static_cast(startFrom), static_cast(upTo)); From 0ce255e56aa92514cee2ba22671315b77a0c3adc Mon Sep 17 00:00:00 2001 From: Alexey Lebedev Date: Mon, 21 Feb 2022 15:35:07 +0300 Subject: [PATCH 030/310] [tools][benchmark_app] update readme (#10518) * Save work * update readme * Name refactoring * Remove duplicated readme * Add note about default hint --- tools/benchmark_tool/README.md | 149 ++++++++++---- .../openvino/tools/benchmark/README.md | 193 ------------------ .../openvino/tools/benchmark/main.py | 11 +- .../openvino/tools/benchmark/parameters.py | 10 +- 4 files changed, 122 insertions(+), 241 deletions(-) delete mode 100644 tools/benchmark_tool/openvino/tools/benchmark/README.md diff --git a/tools/benchmark_tool/README.md b/tools/benchmark_tool/README.md index e4d8ba48458..9bacb159065 100644 --- a/tools/benchmark_tool/README.md +++ b/tools/benchmark_tool/README.md @@ -14,7 +14,7 @@ usually, the resulting values are not very performance-portable, so the values from one machine or device are not necessarily optimal for another. From this perspective, the most portable way is experimenting only the performance hints. To learn more, refer to the section below. -> **NOTE**: By default, Inference Engine samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md). +> **NOTE**: By default, OpenVINO samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md). ### Latency and Throughput-focused Inference Modes In many cases the primary performance metric is the time (in milliseconds) for an individual inference request. @@ -81,7 +81,9 @@ Options: -h, --help Show this help message and exit. -i PATH_TO_INPUT, --path_to_input PATH_TO_INPUT Optional. Path to a folder with images and/or binaries - or to specific image or binary file. + or to specific image or binary file. To map input files + to the specific input use next syntax: + "input1:image_path1/folder_path1,input2:image_path2/folder_path2" -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob file with a trained @@ -104,8 +106,10 @@ Options: none). Performance hint allows the OpenVINO device to select the right network-specific settings. 'throughput': device performance mode will be set to - THROUGHPUT. 'latency': device performance mode will be - set to LATENCY. 'none': no device performance mode + THROUGHPUT, default value for -api sync. + 'latency': device performance mode will be + set to LATENCY, default value for -api async. + 'none': no device performance mode will be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none' -api {sync,async}, --api_type {sync,async} @@ -128,19 +132,37 @@ Options: -shape SHAPE Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. + -data_shape DATA_SHAPE + Optional. Define shape of data to infer dynamic + model. To process images with original shapes + this parameter can be ommited, but it's required + in other cases to benchmark dynamic model. + For example "[shape1],[shape2], ..." can be + used to set several data shapes in case one input + or to set shape1 to input1, shape2 to input2 + and so on in case several inputs. Input mapping + is also supported: "input1[shape1,shape2],input2[shape3,shape4]". -layout LAYOUT Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one - input size. + input size. Also can be defined partially - + "input1[N...],input2[N...C]" -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS - Optional. Number of streams to use for inference on the CPU/GPU/MYX in throughput mode - (for HETERO and MULTI device cases use format :,: or just ). - Default value is determined automatically for a device. - Please note that although the automatic selection usually provides a reasonable performance, - it still may be non-optimal for some cases, especially for very small networks. + Optional. Number of streams to use for inference on the CPU/GPU/MYX in throughput mode + (for HETERO and MULTI device cases use format :,: or just ). + Default value is determined automatically for a device. + Please note that although the automatic selection usually provides a reasonable performance, + it still may be non-optimal for some cases, especially for very small networks. -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS Number of threads to use for inference on the CPU (including HETERO and MULTI cases). + --latency_percentile LATENCY_PERCENTILE + Optional. Defines the percentile to be reported in latency metric. + The valid range is [1, 100]. The default value is 50 (median). + -enforcebf16 ENFORCEBF16, --enforce_bfloat16 ENFORCEBF16 + Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform. + True - enable bfloat16 regardless of platform support. + False - disable bfloat16 regardless of platform support. -pin {YES,NO,NUMA,HYBRID_AWARE}, --infer_threads_pinning {YES,NO,NUMA,HYBRID_AWARE} Optional. Enable threads->cores ('YES' which is OpenVINO runtime's default for conventional CPUs), threads->(NUMA)nodes ('NUMA'), @@ -152,9 +174,42 @@ Options: graph information serialized. -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS] Optional. Report performance counters. - -ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network. - -op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network. - -iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers. + -pcseq PCSEQ --pcseq PCSEQ + Optional. Report latencies for each shape in -data_shape sequence. + -inference_only INFERENCE_ONLY, --inference_only INFERENCE_ONLY + Optional. If true inputs filling only once before measurements. + True - fill inputs once before the measurements loop, default value for static models + False - fill inputs each time before inference, default value for dynamic models + -report_type REPORT_TYPE, --report_type REPORT_TYPE + Optional. Enable collecting statistics report. + "--report_type no_counters" report contains configuration options specified, resulting FPS and latency. + "--report_type average_counters" + "report extends \"no_counters\" report and additionally includes average PM " + "counters values for each layer from the network. \"detailed_counters\" report " + "extends \"average_counters\" report and additionally includes per-layer PM " + "counters and latency for each executed infer request. + -dump_config DUMP_CONFIG + Optional. Path to JSON file to dump OpenVINO parameters, which were set by application. + -load_config LOAD_CONFIG + Optional. Path to JSON file to load custom OpenVINO parameters. + Please note, command line parameters have higher priority then parameters from configuration file. + -cdir CACHE_DIR -cache_dir + Optional. Enable model caching to specified directory. + -lfile LOAD_FROM_FILE --load_from_file LOAD_FROM_FILE + Optional. Loads model from file directly without read_network. + -qb QUANTIZATION_BITS --quantization_bits QUANTIZATION_BITS + Optional. Weight bits for quantization: 8 (I8) or 16 (I16) + -iscale INPUT_SCALE --input_scale INPUT_SCALE + Optional. Scale values to be used for the input image per channel. + Values to be provided in the [R, G, B] format. Can be defined for desired input of the model. + Example: -iscale data[255,255,255],info[255,255,255] + -imean INPUT_MEAN --input_mean INPUT_MEAN + Optional. Mean values to be used for the input image per channel. + Values to be provided in the [R, G, B] format. Can be defined for desired input of the model. + Example: -imean data[255,255,255],info[255,255,255] + -ip "u8"/"f16"/"f32" Optional. Specifies precision for all input layers of the network. + -op "u8"/"f16"/"f32" Optional. Specifies precision for all output layers of the network. + -iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers. ``` Running the application with the empty list of options yields the usage message given above and an error message. @@ -166,7 +221,7 @@ If a model has mixed input types, input folder should contain all required files To run the tool, you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). -> **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). +> **NOTE**: Before running the tool with a trained model, make sure the model is converted to the OpenVINO format (\*.xml + \*.bin) using the [Model Optimizer tool](../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). ## Examples of Running the Tool @@ -181,7 +236,7 @@ This section provides step-by-step instructions on how to run the Benchmark Tool ```sh python3 downloader.py --name googlenet-v1 -o ``` -2. Convert the model to the Inference Engine IR format. Run Model Optimizer with the path to the model, model format (which must be FP32 for CPU and FPG) and output directory to generate the IR files: +2. Convert the model to the OpenVINO IR format. Run Model Optimizer with the path to the model, model format (which must be FP32 for CPU and FPG) and output directory to generate the IR files: ```sh mo --input_model /public/googlenet-v1/googlenet-v1.caffemodel --data_type FP32 --output_dir ``` @@ -200,33 +255,51 @@ The application outputs number of executed iterations, total duration of executi Additionally, if you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. -Below are fragments of sample output for CPU and GPU devices: -* For CPU: +Below are fragments of sample output for static and dynamic models: +* For static model: ``` - [Step 8/9] Measuring performance (Start inference asynchronously, 60000 ms duration, 4 inference requests in parallel using 4 streams) - Progress: |................................| 100.00% - - [Step 9/9] Dumping statistics report - Progress: |................................| 100.00% - - Count: 4408 iterations - Duration: 60153.52 ms - Latency: 51.8244 ms - Throughput: 73.28 FPS - ``` -* For GPU: - ``` - [Step 10/11] Measuring performance (Start inference asynchronously, 5 inference requests using 1 streams for CPU, limits: 120000 ms duration) - Progress: |................................| 100% - + [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, inference only: True, limits: 60000 ms duration) + [ INFO ] Benchmarking in inference only mode (inputs filling are not included in measurement loop). + [ INFO ] First inference took 5.00 ms [Step 11/11] Dumping statistics report - Count: 98075 iterations - Duration: 120011.03 ms - Latency: 5.65 ms - Throughput: 817.22 FPS + Count: 29936 iterations + Duration: 60010.13 ms + Latency: + Median: 7.30 ms + AVG: 7.97 ms + MIN: 5.02 ms + MAX: 29.26 ms + Throughput: 498.85 FPS + ``` +* For dynamic model: + ``` + [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, inference only: False, limits: 60000 ms duration) + [ INFO ] Benchmarking in full mode (inputs filling are included in measurement loop). + [ INFO ] First inference took 5.10 ms + [Step 11/11] Dumping statistics report + Count: 13596 iterations + Duration: 60028.12 ms + Latency: + AVG: 17.53 ms + MIN: 2.88 ms + MAX: 63.54 ms + Latency for each data shape group: + data: {1, 3, 128, 128} + AVG: 5.09 ms + MIN: 2.88 ms + MAX: 23.30 ms + data: {1, 3, 224, 224} + AVG: 10.67 ms + MIN: 5.97 ms + MAX: 31.79 ms + data: {1, 3, 448, 448} + AVG: 36.84 ms + MIN: 24.76 ms + MAX: 63.54 ms + Throughput: 226.49 FPS ``` ## See Also -* [Using Inference Engine Samples](../../docs/OV_Runtime_UG/Samples_Overview.md) +* [Using OpenVINO Samples](../../docs/OV_Runtime_UG/Samples_Overview.md) * [Model Optimizer](../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) * [Model Downloader](@ref omz_tools_downloader) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/README.md b/tools/benchmark_tool/openvino/tools/benchmark/README.md deleted file mode 100644 index 36b0303dd18..00000000000 --- a/tools/benchmark_tool/openvino/tools/benchmark/README.md +++ /dev/null @@ -1,193 +0,0 @@ -# Benchmark Python* Application - -This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks. - -## How It Works - -Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine -plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend -on the mode defined with the `-api` command-line parameter. - -> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model](../../../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md). - -### Synchronous API - -For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values: -* Number of iterations defined with the `-niter` command-line argument -* Time duration specified with the `-t` command-line argument -* Both of them (execution will continue until both conditions are met) -* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device. - -During the execution, the application collects two types of metrics: -* Latency for each infer request executed with `Infer` method -* Duration of all executions - -Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size. - -### Asynchronous API -For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of executions is defined by one of the two values: -* Number of iterations defined with the `-niter` command-line argument -* Time duration specified with the `-t` command-line argument -* Both of them (execution will continue until both conditions are met) -* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on device. - -The infer requests are executed asynchronously. Callback is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration. - -## Running - -Before running the Benchmark tool, install the requirements: -```sh -pip install -r requirements.txt -``` - -Notice that the benchmark_app usually produces optimal performance for any device out of the box. - -**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, e.g.: -``` -$benchmark_app -m -i -d CPU -``` - -But it is still may be non-optimal for some cases, especially for very small networks. More details can read in [Introduction to Performance Topics](../../../../../docs/OV_Runtime_UG/Intro_to_Performance.md). - -Running the application with the `-h` or `--help`' option yields the following usage message: - -``` -usage: benchmark_app.py [-h] [-i PATH_TO_INPUT] -m PATH_TO_MODEL - [-d TARGET_DEVICE] - [-l PATH_TO_EXTENSION] [-c PATH_TO_CLDNN_CONFIG] - [-api {sync,async}] [-niter NUMBER_ITERATIONS] - [-b BATCH_SIZE] - [-stream_output [STREAM_OUTPUT]] [-t TIME] - [-progress [PROGRESS]] [-nstreams NUMBER_STREAMS] - [-nthreads NUMBER_THREADS] [-pin {YES,NO}] - [--exec_graph_path EXEC_GRAPH_PATH] - [-pc [PERF_COUNTS]] - -Options: - -h, --help Show this help message and exit. - -i PATHS_TO_INPUT [PATHS_TO_INPUT ...], --paths_to_input PATHS_TO_INPUT [PATHS_TO_INPUT ...] - Optional. Path to a folder with images and/or binaries - or to specific image or binary file. - -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL - Required. Path to an .xml file with a trained model. - -d TARGET_DEVICE, --target_device TARGET_DEVICE - Optional. Specify a target device to infer on (the - list of available devices is shown below). Default - value is CPU. Use '-d HETERO:' format to specify HETERO plugin. Use '-d - MULTI:' format to - specify MULTI plugin. Use "-d GPU.X" format to specify - device id for GPU devices. The application looks for a - suitable plugin for the specified device. - -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION - Optional. Required for CPU custom layers. Absolute - path to a shared library with the kernels - implementations. - -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG - Optional. Required for GPU custom kernels. Absolute - path to an .xml file with the kernels description. - -api {sync,async}, --api_type {sync,async} - Optional. Enable using sync/async API. Default value - is async. - -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS - Optional. Number of iterations. If not specified, the - number of iterations is calculated depending on a - device. - -b BATCH_SIZE, --batch_size BATCH_SIZE - Optional. Batch size value. If not specified, the - batch size value is determined from IR - -stream_output [STREAM_OUTPUT] - Optional. Print progress as a plain text. When - specified, an interactive progress bar is replaced - with a multiline output. - -t TIME, --time TIME Optional. Time in seconds to execute topology. - -progress [PROGRESS] Optional. Show progress bar (can affect performance - measurement). Default values is "False". - -shape SHAPE Optional. Set shape for input. For example, - "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" - in case of one input size. - -layout LAYOUT Optional. Prompts how network layouts should be - treated by application. For example, - "input1[NCHW],input2[NC]" or "[NCHW]" in case of one - input size. - -nstreams NUMBER_STREAMS, --number_streams NUMBER_STREAMS - Optional. Number of streams to use for inference on the CPU/GPU/MYRIAD - (for HETERO and MULTI device cases use format :,: or just ). - Default value is determined automatically for a device. - Please note that although the automatic selection usually provides a reasonable performance, - it still may be non-optimal for some cases, especially for very small networks. - Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency - estimations the number of streams should be set to 1. - -enforcebf16 [{true,false}], --enforce_bfloat16 [{true,false}] - Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform. - 'true' - enable bfloat16 regardless of platform support - 'false' - disable bfloat16 regardless of platform support. - -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS - Number of threads to use for inference on the CPU - (including HETERO and MULTI cases). - -pin {YES,NO,NUMA}, --infer_threads_pinning {YES,NO,NUMA} - Optional. Enable threads->cores ('YES' is default - value), threads->(NUMA)nodes ('NUMA') or completely - disable ('NO')CPU threads pinning for CPU-involved - inference. - --exec_graph_path EXEC_GRAPH_PATH - Optional. Path to a file where to store executable - graph information serialized. - -pc [PERF_COUNTS], --perf_counts [PERF_COUNTS] - Optional. Report performance counters. - -dump_config DUMP_CONFIG - Optional. Path to JSON file to dump IE parameters, - which were set by application. - -load_config LOAD_CONFIG - Optional. Path to JSON file to load custom IE - parameters. Please note, command line parameters have - higher priority then parameters from configuration - file. - -cdir CACHE_DIR, --cache_dir CACHE_DIR - Optional. Enable model caching to specified directory - -lfile [LOAD_FROM_FILE], --load_from_file [LOAD_FROM_FILE] - Optional. Loads model from file directly without - read_network. -``` - -Running the application with the empty list of options yields the usage message given above and an error message. - -Application supports topologies with one or more inputs. If a topology is not data sensitive, you can skip the input parameter. In this case, inputs are filled with random values. -If a model has only image input(s), please a provide folder with images or a path to an image as input. -If a model has some specific input(s) (not images), please prepare a binary file(s), which is filled with data of appropriate precision and provide a path to them as input. -If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. - -To run the tool, you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). - -> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - -For example, to do inference of an image using a trained network with multiple outputs on CPU, run the following command: - -``` -python3 benchmark_app.py -i /inputImage.bmp -m /multiple-output.xml -d CPU -``` - -## Demo Output - -The application outputs number of executed iterations, total duration of execution, latency and throughput. -Additionally, if you set the `-pc` parameter, the application outputs performance counters. -If you set `-exec_graph_path`, the application reports executable graph information serialized. - -``` -[Step 8/9] Measuring performance (Start inference asynchronously, 60000 ms duration, 4 inference requests in parallel using 4 streams) -Progress: |................................| 100.00% - -[Step 9/9] Dumping statistics report -Progress: |................................| 100.00% - -Count: 4408 iterations -Duration: 60153.52 ms -Latency: 51.8244 ms -Throughput: 73.28 FPS - -``` - -## See Also -* [Using Inference Engine Samples](../../../../../docs/OV_Runtime_UG/Samples_Overview.md) -* [Model Optimizer](../../../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -* [Model Downloader](https://github.com/openvinotoolkit/open_model_zoo/tree/2018/model_downloader) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py index 549fcfcae16..78037e8ecd6 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -388,11 +388,6 @@ def run(args): elif benchmark.inference_only and not allow_inference_only_or_sync: raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!") - if benchmark.inference_only: - logger.info("Benchmarking in inference only mode (inputs filling are not included in measurement loop).") - else: - logger.info("Benchmarking in full mode (inputs filling are included in measurement loop).") - # update batch size in case dynamic network with one data_shape if benchmark.inference_only and batch_size.is_dynamic: batch_size = Dimension(data_queue.batch_sizes[data_queue.current_group_id]) @@ -441,6 +436,12 @@ def run(args): output_string = process_help_inference_string(benchmark, device_number_streams) next_step(additional_info=output_string) + + if benchmark.inference_only: + logger.info("Benchmarking in inference only mode (inputs filling are not included in measurement loop).") + else: + logger.info("Benchmarking in full mode (inputs filling are included in measurement loop).") + progress_bar_total_count = 10000 if benchmark.niter and not benchmark.duration_seconds: progress_bar_total_count = benchmark.niter diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py index e7523e20383..f3ff5815eec 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py @@ -84,7 +84,7 @@ def parse_args(): help='Optional. ' 'Optional if network shapes are all static (original ones or set by -shape).' 'Required if at least one input shape is dynamic and input images are not provided.' - 'Set shape for input tensors. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224] in case of one input size.') + 'Set shape for input tensors. For example, "input1[1,3,224,224][1,3,448,448],input2[1,4][1,8]" or "[1,3,224,224][1,3,448,448] in case of one input size.') args.add_argument('-layout', type=str, required=False, default='', help='Optional. ' 'Prompts how network layouts should be treated by application. ' @@ -102,8 +102,8 @@ def parse_args(): help='Optional. Defines the percentile to be reported in latency metric. The valid range is [1, 100]. The default value is 50 (median).') args.add_argument('-enforcebf16', '--enforce_bfloat16', type=str2bool, required=False, default=False, nargs='?', const=True, choices=[True, False], help='Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform. ' - '\'true\' - enable bfloat16 regardless of platform support. ' - '\'false\' - disable bfloat16 regardless of platform support.') + '\'True\' - enable bfloat16 regardless of platform support. ' + '\'False\' - disable bfloat16 regardless of platform support.') args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None, help='Number of threads to use for inference on the CPU, GNA ' '(including HETERO and MULTI cases).') @@ -133,9 +133,9 @@ def parse_args(): args.add_argument('-report_folder', '--report_folder', type=str, required=False, default='', help="Optional. Path to a folder where statistics report is stored.") args.add_argument('-dump_config', type=str, required=False, default='', - help="Optional. Path to JSON file to dump IE parameters, which were set by application.") + help="Optional. Path to JSON file to dump OpenVINO parameters, which were set by application.") args.add_argument('-load_config', type=str, required=False, default='', - help="Optional. Path to JSON file to load custom IE parameters." + help="Optional. Path to JSON file to load custom OpenVINO parameters." " Please note, command line parameters have higher priority then parameters from configuration file.") args.add_argument('-qb', '--quantization_bits', type=int, required=False, default=None, choices=[8, 16], help="Optional. Weight bits for quantization: 8 (I8) or 16 (I16) ") From 68e873c6c8b3d7dc4c07cea1020a4972e149e5ff Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 21 Feb 2022 16:01:47 +0300 Subject: [PATCH 031/310] Config and hetero (#10555) * Updated properties documentation * Fixed doc refernce * merged snipet files * fixed build * Updated Hetero docs * Self-review Co-authored-by: Anton Pankratv --- docs/OV_Runtime_UG/Bfloat16Inference.md | 2 +- .../OV_Runtime_UG/InferenceEngine_QueryAPI.md | 235 ----------- .../OpenVINO_Runtime_User_Guide.md | 25 +- docs/OV_Runtime_UG/PropertiesAPI.md | 243 ++++++++++++ .../supported_plugins/Device_Plugins.md | 9 +- .../OV_Runtime_UG/supported_plugins/HETERO.md | 367 +++++++----------- docs/OV_Runtime_UG/supported_plugins/MULTI.md | 4 +- docs/snippets/GPU_Metric1.cpp | 6 +- docs/snippets/ov_hetero.cpp | 53 +++ docs/snippets/ov_hetero.py | 55 +++ docs/snippets/ov_properties_api.cpp | 47 +++ samples/cpp/hello_query_device/README.md | 2 +- samples/python/hello_query_device/README.md | 4 +- 13 files changed, 561 insertions(+), 491 deletions(-) delete mode 100644 docs/OV_Runtime_UG/InferenceEngine_QueryAPI.md create mode 100644 docs/OV_Runtime_UG/PropertiesAPI.md create mode 100644 docs/snippets/ov_hetero.cpp create mode 100644 docs/snippets/ov_hetero.py create mode 100644 docs/snippets/ov_properties_api.cpp diff --git a/docs/OV_Runtime_UG/Bfloat16Inference.md b/docs/OV_Runtime_UG/Bfloat16Inference.md index 8b0e8ff6779..7339e24a042 100644 --- a/docs/OV_Runtime_UG/Bfloat16Inference.md +++ b/docs/OV_Runtime_UG/Bfloat16Inference.md @@ -26,7 +26,7 @@ There are two ways to check if CPU device can support bfloat16 computations for 1. Query the instruction set using one of these system commands: * `lscpu | grep avx512_bf16` * `cat /proc/cpuinfo | grep avx512_bf16` -2. Use the [Query API](InferenceEngine_QueryAPI.md) with `METRIC_KEY(OPTIMIZATION_CAPABILITIES)`, which should return `BF16` in the list of CPU optimization options: +2. Use the [Properties API](PropertiesAPI.md) with `METRIC_KEY(OPTIMIZATION_CAPABILITIES)`, which should return `BF16` in the list of CPU optimization options: @snippet snippets/Bfloat16Inference0.cpp part0 diff --git a/docs/OV_Runtime_UG/InferenceEngine_QueryAPI.md b/docs/OV_Runtime_UG/InferenceEngine_QueryAPI.md deleted file mode 100644 index eacdffe186d..00000000000 --- a/docs/OV_Runtime_UG/InferenceEngine_QueryAPI.md +++ /dev/null @@ -1,235 +0,0 @@ -# Introduction to Inference Engine Device Query API {#openvino_docs_IE_DG_InferenceEngine_QueryAPI} - -## Inference Engine Query API (C++) - -@sphinxdirective -.. raw:: html - -
C++
-@endsphinxdirective - -The OpenVINO™ toolkit supports inferencing with several types of devices (processors or accelerators). -This section provides a high-level description of the process of querying of different device properties and configuration values at runtime. Refer to the [Hello Query Device С++ Sample](../../samples/cpp/hello_query_device/README.md) sources and the [Multi-Device Plugin documentation](supported_plugins/MULTI.md) for examples of using the Inference Engine Query API in user applications. - -### Using the Inference Engine Query API in Your Code - -The `InferenceEngine::Core` class provides the following API to query device information, set or get different device configuration properties: - -* `InferenceEngine::Core::GetAvailableDevices` - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with `.suffix` where `suffix` is a unique string identifier. The device name can be passed to all methods of the `InferenceEngine::Core` class that work with devices, for example `InferenceEngine::Core::LoadNetwork`. -* `InferenceEngine::Core::GetMetric` - Provides information about specific device. - `InferenceEngine::Core::GetConfig` - Gets the current value of a specific configuration key. -* `InferenceEngine::Core::SetConfig` - Sets a new value for the configuration key. - -The `InferenceEngine::ExecutableNetwork` class is also extended to support the Query API: - -* `InferenceEngine::ExecutableNetwork::GetMetric` -* `InferenceEngine::ExecutableNetwork::GetConfig` -* `InferenceEngine::ExecutableNetwork::SetConfig` - -### Query API in the Core Class - -#### GetAvailableDevices - -@snippet snippets/InferenceEngine_QueryAPI0.cpp part0 - -The function returns a list of available devices, for example: - -``` -MYRIAD.1.2-ma2480 -MYRIAD.1.4-ma2480 -CPU -GPU.0 -GPU.1 -``` - -Each device name can then be passed to: - -* `InferenceEngine::Core::LoadNetwork` to load the network to a specific device. -* `InferenceEngine::Core::GetMetric` to get common or device specific metrics. -* All other methods of the `InferenceEngine::Core` class that accept `deviceName`. - -#### GetConfig() - -The code below demonstrates how to understand whether the `HETERO` device dumps GraphViz `.dot` files with split graphs during the split stage: - -@snippet snippets/InferenceEngine_QueryAPI1.cpp part1 - -For documentation about common configuration keys, refer to `ie_plugin_config.hpp`. Device specific configuration keys can be found in corresponding plugin folders. - -#### GetMetric() - -* To extract device properties such as available device, device name, supported configuration keys, and others, use the `InferenceEngine::Core::GetMetric` method: - -@snippet snippets/InferenceEngine_QueryAPI2.cpp part2 - -A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. - -> **NOTE**: All metrics have a type, which is specified during metric instantiation. The list of common device-agnostic metrics can be found in `ie_plugin_config.hpp`. Device specific metrics (for example, for HDDL or MYRIAD devices) can be found in corresponding plugin folders. - -### Query API in the ExecutableNetwork Class - -#### GetMetric() - -The method is used to get an executable network specific metric such as `METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)`: - -@snippet snippets/InferenceEngine_QueryAPI3.cpp part3 - -Or the current temperature of the `MYRIAD` device: - -@snippet snippets/InferenceEngine_QueryAPI4.cpp part4 - -#### GetConfig() - -The method is used to get information about configuration values the executable network has been created with: - -@snippet snippets/InferenceEngine_QueryAPI5.cpp part5 - -#### SetConfig() - -The only device that supports this method is [Multi-Device](supported_plugins/MULTI.md). - -## Inference Engine Query API (Python) - -@sphinxdirective -.. raw:: html - -
Python
-@endsphinxdirective - -This section provides a high-level description of the process of querying of different device properties and configuration values. Refer to the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md) sources and the [Multi-Device Plugin documentation](supported_plugins/MULTI.md) for examples of using the Inference Engine Query API in user applications. - -### Using the Inference Engine Query API in Your Code - -The Inference Engine [Core](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino-inference-engine-iecore) class provides the following API to query device information, set or get different device configuration properties: - -* [ie_api.IECore.available_devices](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.available_devices) - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with .suffix where suffix is a unique string identifier. The device name can be passed to all methods of the IECore class that work with devices, for example [ie_api.IECore.load_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network). -* [ie_api.ieCore.get_metric](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_metric) - Provides information about specific device. -* [ie_api.IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) - Gets the current value of a specific configuration key. -* [ie_api.IECore.set_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.set_config) - Sets a new value for the configuration key. - -The [ie_api.ExecutableNetwork](api/ie_python_api/_autosummary/openvino.inference_engine.ExecutableNetwork.html) class is also extended to support the Query API: -* [ie_api.ExecutableNetwork.get_metric](api/ie_python_api/_autosummary/openvino.inference_engine.ExecutableNetwork.html#openvino.inference_engine.ExecutableNetwork.get_metric) -* [ie_api.ExecutableNetwork.get_config](latest/api/ie_python_api/_autosummary/openvino.inference_engine.ExecutableNetwork.html#openvino.inference_engine.ExecutableNetwork.get_config) -* There is no method to call for set_config, but the equivalent action is described below. - -### Query API in the IECore Class - -#### Get Available Devices - -```python -from openvino.inference_engine import IECore - -ie = IECore() -print(ie.available_devices) -``` - -This code prints a list of available devices, for example: - -``` -MYRIAD.1.2-ma2480 -MYRIAD.1.4-ma2480 -FPGA.0 -FPGA.1 -CPU -GPU.0 -GPU.1 -``` - -Each device name can then be passed to: - -* `IECore.load_network` to load the network to a specific device. -* `IECore.get_metric` to get common or device specific metrics. -* All other methods of the `IECore` class that accept a device name. - -#### Get Metric - -To extract device properties such as available device, device name, supported configuration keys, and others, use the [IECore.get_metric](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_metric) method: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_metric(device_name="CPU", metric_name="FULL_DEVICE_NAME") -``` - -A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. - -To list all supported metrics for a device: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_metric(device_name="GPU", metric_name="SUPPORTED_METRICS") -``` - -#### Get Configuration - -The code below uses the [IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) method and demonstrates how to understand whether the HETERO device dumps .dot files with split graphs during the split stage: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_config(device_name="HETERO", config_name="HETERO_DUMP_GRAPH_DOT") -``` - -To list all supported configuration keys for a device: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_metric(device_name=device, metric_name="SUPPORTED_CONFIG_KEYS") -``` - -For documentation about common configuration keys, refer to `ie_plugin_config.hpp`. Device specific configuration keys can be found in corresponding plugin folders. - - -### Query API in the ExecutableNetwork Class - -#### Get Metric - -To get the name of the loaded network: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name=device) -exec_net.get_metric("NETWORK_NAME") -``` - -Use `exec_net.get_metric("SUPPORTED_METRICS")` to list all supported metrics for an ExecutableNetwork instance. - - -#### Get Configuration - -The [IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) method is used to get information about configuration values the executable network has been created with: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name="CPU") -exec_net.get_config("CPU_THREADS_NUM") -``` - -Or the current temperature of MYRIAD device: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name="MYRIAD") -exec_net.get_config("DEVICE_THERMAL") -``` - -Use `exec_net.get_metric("SUPPORTED_CONFIG_KEYS")` to list all supported configuration keys. - -#### Set Configuration - -The only device that supports this method in the ExecutableNetwork class is the [Multi-Device](supported_plugins/MULTI.md), where you can change the priorities of the devices for the Multi plugin in real time: `exec_net.set_config({{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}})`. See the Multi-Device documentation for more details. \ No newline at end of file diff --git a/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md b/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md index 01617f8b934..bbe9c956f45 100644 --- a/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md +++ b/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md @@ -8,19 +8,28 @@ :maxdepth: 1 :hidden: - openvino_2_0_transition_guide openvino_docs_IE_DG_Integrate_with_customer_application_new_API openvino_docs_OV_Runtime_UG_Model_Representation - ngraph_transformation - openvino_docs_deployment_optimization_guide_dldt_optimization_guide - openvino_docs_IE_DG_Device_Plugins - openvino_docs_IE_DG_Int8Inference - openvino_docs_IE_DG_Bfloat16Inference - openvino_docs_IE_DG_DynamicBatching + openvino_docs_IE_DG_ShapeInference + openvino_docs_IE_DG_Device_Plugins + + + openvino_docs_IE_DG_DynamicBatching + + openvino_docs_IE_DG_supported_plugins_AUTO + + openvino_docs_IE_DG_supported_plugins_MULTI + openvino_docs_OV_UG_Hetero_execution + openvino_docs_IE_DG_network_state_intro + openvino_2_0_transition_guide + + openvino_docs_deployment_optimization_guide_dldt_optimization_guide openvino_docs_IE_DG_Model_caching_overview openvino_docs_IE_DG_Extensibility_DG_Intro - openvino_docs_IE_DG_network_state_intro + openvino_docs_IE_DG_Int8Inference + openvino_docs_IE_DG_Bfloat16Inference + ngraph_transformation openvino_docs_OV_Runtime_API_Changes @endsphinxdirective diff --git a/docs/OV_Runtime_UG/PropertiesAPI.md b/docs/OV_Runtime_UG/PropertiesAPI.md new file mode 100644 index 00000000000..435e0e3cd88 --- /dev/null +++ b/docs/OV_Runtime_UG/PropertiesAPI.md @@ -0,0 +1,243 @@ +# Introduction to OpenVINO™ Device Properties API {#openvino_docs_IE_DG_InferenceEngine_QueryAPI} + +## OpenVINO™ Properties API (C++) + +@sphinxdirective +.. raw:: html + +
C++
+@endsphinxdirective + +The OpenVINO™ toolkit supports inferencing with several types of devices (processors or accelerators). +This section provides a high-level description of the process of querying of different device properties and configuration values at runtime. Refer to the [Hello Query Device С++ Sample](../../samples/cpp/hello_query_device/README.md) sources and the [Multi-Device Plugin documentation](supported_plugins/MULTI.md) for examples of using the OpenVINO™ Properties API in user applications. + +### Using the OpenVINO™ Properties API in Your Code + +The `ov::Core` class provides the following API to query device information, set or get different device configuration properties: + +* `ov::Core::get_available_devices` - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with `.suffix` where `suffix` is a unique string identifier. The device name can be passed to all methods of the `ov::Core` class that work with devices, for example `ov::Core::compile_model`. +* `ov::Core::get_property` - Gets the current value of a specific property. +* `ov::Core::set_property` - Sets a new value for the property. + +The `ov::CompiledModel` class is also extended to support the Properties API: + +* `ov::CompiledModel::get_property` +* `ov::CompiledModel::set)property` + +### Properties API in the Core Class + +#### get_available_devices + +@snippet snippets/ov_properties_api.cpp part0 + +The function returns a list of available devices, for example: + +``` +MYRIAD.1.2-ma2480 +MYRIAD.1.4-ma2480 +CPU +GPU.0 +GPU.1 +``` + +Each device name can then be passed to: + +* `ov::Core::compile_model` to load the network to a specific device. +* `ov::Core::get_property` to get common or device specific properties. +* All other methods of the `ov::Core` class that accept `deviceName`. + +#### ov::Core methods + +`ov::Core` methods like: + +* `ov::Core::compile_model` +* `ov::Core::import_model` +* `ov::Core::query_model` +* `ov::Core::query_model` +* `ov::Core::create_context` + +accept variadic list of properties as last arguments. Each property in such parameters lists should be used as function call to pass property value with specified property type + +@snippet snippets/ov_properties_api.cpp part3 + +#### get_property() + +For documentation about common configuration keys, refer to `openvino/runtime/properties.hpp`. Device specific configuration keys can be found in corresponding plugin folders. + +* The code below demonstrates how to query `HETERO` device priority of devices which will be used to infer the model: + +@snippet snippets/ov_properties_api.cpp part1 + +* To extract device properties such as available device, device name, supported configuration keys, and others, use the `ov::Core::get_property` method: + +@snippet snippets/ov_properties_api.cpp part2 + +A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. + +> **NOTE**: All properties have a type, which is specified during property declaration. The list of common device-agnostic properties can be found in `openvino/runtime/properties.hpp`. Device specific properties (for example, for HDDL or MYRIAD devices) can be found in corresponding plugin folders. + +### Properties API in the CompiledModel Class + +#### get_property() + +The method is used to get configuration values the compiled model has been created with or compiled model specific property such as `ov::optimal_number_of_infer_requests`: + +@snippet snippets/ov_properties_api.cpp part4 + +Or the current temperature of the `MYRIAD` device: + +@snippet snippets/ov_properties_api.cpp part5 + +Or the number of threads that would be used for inference in `CPU` device: + +@snippet snippets/ov_properties_api.cpp part6 + +#### set_property() + +The only device that supports this method is [Multi-Device](supported_plugins/MULTI.md). + +## OpenVINO™ Properties API (Python) + +@sphinxdirective +.. raw:: html + +
Python
+@endsphinxdirective + +This section provides a high-level description of the process of querying of different device properties and configuration values. Refer to the [Hello Properties Device Python Sample](../../samples/python/hello_query_device/README.md) sources and the [Multi-Device Plugin documentation](supported_plugins/MULTI.md) for examples of using the OpenVINO™ Properties API in user applications. + +### Using the OpenVINO™ Properties API in Your Code + +The OpenVINO™ [Core](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino-inference-engine-iecore) class provides the following API to query device information, set or get different device configuration properties: + +* [ie_api.IECore.available_devices](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.available_devices) - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with .suffix where suffix is a unique string identifier. The device name can be passed to all methods of the IECore class that work with devices, for example [ie_api.IECore.load_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network). +* [ie_api.ieCore.get_property](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_property) - Provides information about specific device. +* [ie_api.IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) - Gets the current value of a specific configuration key. +* [ie_api.IECore.set_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.set_config) - Sets a new value for the configuration key. + +The [ie_api.CompiledModel](api/ie_python_api/_autosummary/openvino.inference_engine.CompiledModel.html) class is also extended to support the Properties API: +* [ie_api.CompiledModel.get_property](api/ie_python_api/_autosummary/openvino.inference_engine.CompiledModel.html#openvino.inference_engine.CompiledModel.get_property) +* [ie_api.CompiledModel.get_config](latest/api/ie_python_api/_autosummary/openvino.inference_engine.CompiledModel.html#openvino.inference_engine.CompiledModel.get_config) +* There is no method to call for set_config, but the equivalent action is described below. + +### Properties API in the IECore Class + +#### Get Available Devices + +```python +from openvino.inference_engine import IECore + +ie = IECore() +print(ie.available_devices) +``` + +This code prints a list of available devices, for example: + +``` +MYRIAD.1.2-ma2480 +MYRIAD.1.4-ma2480 +FPGA.0 +FPGA.1 +CPU +GPU.0 +GPU.1 +``` + +Each device name can then be passed to: + +* `IECore.load_network` to load the network to a specific device. +* `IECore.get_property` to get common or device specific properties. +* All other methods of the `IECore` class that accept a device name. + +#### Get Metric + +To extract device properties such as available device, device name, supported configuration keys, and others, use the [IECore.get_property](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_property) method: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +ie.get_property(device_name="CPU", property_name="FULL_DEVICE_NAME") +``` + +A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. + +To list all supported properties for a device: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +ie.get_property(device_name="GPU", property_name="SUPPORTED_METRICS") +``` + +#### Get Configuration + +The code below uses the [IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) method and demonstrates how to understand whether the HETERO device dumps .dot files with split graphs during the split stage: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +ie.get_config(device_name="HETERO", config_name="HETERO_DUMP_GRAPH_DOT") +``` + +To list all supported configuration keys for a device: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +ie.get_property(device_name=device, property_name="SUPPORTED_CONFIG_KEYS") +``` + +For documentation about common configuration keys, refer to `ie_plugin_config.hpp`. Device specific configuration keys can be found in corresponding plugin folders. + + +### Properties API in the CompiledModel Class + +#### Get Metric + +To get the name of the loaded network: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +net = ie.read_network(model=path_to_xml_file) +exec_net = ie.load_network(network=net, device_name=device) +exec_net.get_property("NETWORK_NAME") +``` + +Use `exec_net.get_property("SUPPORTED_METRICS")` to list all supported properties for an CompiledModel instance. + + +#### Get Configuration + +The [IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) method is used to get information about configuration values the compiled model has been created with: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +net = ie.read_network(model=path_to_xml_file) +exec_net = ie.load_network(network=net, device_name="CPU") +exec_net.get_config("CPU_THREADS_NUM") +``` + +Or the current temperature of MYRIAD device: + +```python +from openvino.inference_engine import IECore + +ie = IECore() +net = ie.read_network(model=path_to_xml_file) +exec_net = ie.load_network(network=net, device_name="MYRIAD") +exec_net.get_config("DEVICE_THERMAL") +``` + +Use `exec_net.get_property("SUPPORTED_CONFIG_KEYS")` to list all supported configuration keys. + +#### Set Configuration + +The only device that supports this method in the CompiledModel class is the [Multi-Device](supported_plugins/MULTI.md), where you can change the priorities of the devices for the Multi plugin in real time: `exec_net.set_config({{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}})`. See the Multi-Device documentation for more details. \ No newline at end of file diff --git a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md index 250b69e6371..ada0e6ce977 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md +++ b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md @@ -11,10 +11,7 @@ openvino_docs_IE_DG_supported_plugins_GPU openvino_docs_IE_DG_supported_plugins_VPU openvino_docs_IE_DG_supported_plugins_GNA - openvino_docs_IE_DG_supported_plugins_AUTO - openvino_docs_IE_DG_supported_plugins_HETERO - openvino_docs_IE_DG_supported_plugins_MULTI - + @endsphinxdirective Inference Engine uses a plugin architecture. Inference Engine plugin is a software component that contains complete implementation for inference on a certain Intel® hardware device: CPU, GPU, VPU, GNA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs. @@ -27,8 +24,8 @@ The Inference Engine provides capabilities to infer deep learning models on the |[CPU plugin](CPU.md) |Intel® Xeon® with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) | |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs | |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor| -|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel | -|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel® device for inference automatically | +|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel | +|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel® device for inference automatically | |[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). | Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). diff --git a/docs/OV_Runtime_UG/supported_plugins/HETERO.md b/docs/OV_Runtime_UG/supported_plugins/HETERO.md index 85cf286df95..04823782c55 100644 --- a/docs/OV_Runtime_UG/supported_plugins/HETERO.md +++ b/docs/OV_Runtime_UG/supported_plugins/HETERO.md @@ -1,256 +1,157 @@ -# Heterogeneous Plugin {#openvino_docs_IE_DG_supported_plugins_HETERO} +# Heterogeneous execution {#openvino_docs_OV_UG_Hetero_execution} -## Introducing the Heterogeneous Plugin (C++) +## Introducing the Heterogeneous execution -@sphinxdirective -.. raw:: html +The heterogeneous execution enables computing the inference of one model on several devices. The purposes of executing models in heterogeneous mode are to: -
C++
-@endsphinxdirective - -The heterogeneous plugin enables computing the inference of one network on several devices. The purposes of executing networks in heterogeneous mode are to: - -* Utilize the power of accelerators to process the heaviest parts of the network and to execute unsupported layers on fallback devices like the CPU +* Utilize the power of accelerators to process the heaviest parts of the model and to execute unsupported operations on fallback devices like the CPU * Utilize all available hardware more efficiently during one inference -The execution through heterogeneous plugin can be divided into two independent steps: +The execution through heterogeneous mode can be divided into two independent steps: -1. Setting of hardware affinity to layers -2. Loading a network to the Heterogeneous plugin, splitting the network to parts, and executing them through the plugin +1. Setting of hardware affinity to operations (ov::Core::query_model is used internally by the Hetero device) +2. Compiling a model to the Heterogeneous device assuming splitting the model to parts and compiling on the specified devices (via ov::device::priorities), and executing them through the Heterogeneous mode. The model is split to the subgraphs in according to the affinities where a set of conntected operations with the same affinity are supposed to be a dedicated subgraph. Each subgraph is compiled on a dedicated device and we have multiple ov::CompiledModel objects, which are connected via automatically allocated intermediate tensors. -These steps are decoupled. The setting of affinity can be done automatically using the fallback policy or in manual mode. +These steps are decoupled. The setting of affinities can be done automatically using the `automatic fallback` policy or in `manual` mode: -The fallback automatic policy causes "greedy" behavior and assigns all layers that can be executed on certain device according to the priorities you specify (for example, HETERO:GPU,CPU). -Automatic policy does not take into account plugin peculiarities such as the inability to infer some layers without other special layers placed before or after that layer. The plugin is responsible for solving such cases. If the device plugin does not support the subgraph topology constructed by the HETERO plugin, then you should set affinity manually. +- The fallback automatic policy causes "greedy" behavior and assigns all operations that can be executed on certain device according to the priorities you specify (for example, `ov::device::priorities("GPU,CPU")`). +Automatic policy does not take into account device peculiarities such as the inability to infer some operations without other special operations placed before or after that layer. The plugin is responsible for solving such cases. If the device plugin does not support the subgraph topology constructed by the HETERO device, then you should set affinity manually. +- Manual policy assumes explicit setting of affinities for all operations in the model using the runtime information ov::Node::get_rt_info. -### Details of Splitting Network and Execution -During loading of the network to the Heterogeneous plugin, the network is divided into separate parts and loaded to dedicated plugins. -Intermediate blobs between these subgraphs are allocated automatically in the most efficient way. +### Defining and Configuring the Hetero Device +Following the OpenVINO™ convention of labeling devices, the Hetero execution uses the name `"HETERO"`. Configuration options for the Hetero device: + +| Parameter name | C++ property | Parameter values | Default | Description | +| -------------- | ---------------- | ---------------- | --- | --- | +| "MULTI_DEVICE_PRIORITIES" | `ov::device::priorities` | comma-separated device names with no spaces | N/A | Prioritized list of devices | + +### Automatic and manual policies for assigning affinities + +`Automatic fallback` policy decides which operation goes to which device automatically according to the support in dedicated devices (`GPU`, `CPU`, `MYRIAD`, etc) and query model step is called implicitly by Hetero device during model compilation: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_hetero.cpp + :language: cpp + :fragment: [compile_model] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_hetero.py + :language: python + :fragment: [compile_model] + +@endsphinxdirective + +Another way to annotate a model is to set all affinities `manually` using ov::Node::get_rt_info with key `"affinity"`: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_hetero.cpp + :language: cpp + :fragment: [set_manual_affinities] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_hetero.py + :language: python + :fragment: [set_manual_affinities] + +@endsphinxdirective + +The fallback policy does not work if at least one operation has an initialized `"affinity"`. If you want to adjust automatically set affinities, then get automatic affinities first, then fix them (usually, to minimize a number of total subgraphs to optimize memory transfers): + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_hetero.cpp + :language: cpp + :fragment: [fix_automatic_affinities] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_hetero.py + :language: python + :fragment: [fix_automatic_affinities] + +@endsphinxdirective + +> **NOTE**: ov::Core::query_model does not depend on affinities set by a user. Instead, it queries for an operation support based on device capabilities. + +### Configure fallback devices +If you want different devices in Hetero execution to have different device-specific configuration options, you can use the special helper property ov::device::properties: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_hetero.cpp + :language: cpp + :fragment: [configure_fallback_devices] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_hetero.py + :language: python + :fragment: [configure_fallback_devices] + +@endsphinxdirective + +In the example above, `CPU` device is configured to enable profiling data, while only `GPU` device has configuration property to perform inference in `f16` precision, while CPU has default execution precision. + +### Handling Difficult Topologies + +Some topologies are not friendly to heterogeneous execution on some devices or cannot be executed at all with this device. +For example, models having activation operations that are not supported on the primary device are split by Hetero device into multiple set of subgraphs which leads to unoptimal execution. +If transmitting data from one subgraph of a whole model to another part in heterogeneous mode takes more time than in normal execution, it may not make sense to execute them heterogeneously. +In this case, you can define the heaviest part manually and set the affinity to avoid sending data back and forth many times during one inference. + +### Analyzing Performance Heterogeneous Execution +After enabling the OPENVINO_HETERO_VISUALIZE environment variable, you can dump GraphViz* `.dot` files with annotations of operations per devices. + +The Heterogeneous device can generate two files: + +* `hetero_affinity_.dot` - annotation of affinities per operation. +* `hetero_subgraphs_.dot` - annotation of affinities per graph. + +You can use the GraphViz* utility or a file converter to view the images. On the Ubuntu* operating system, you can use xdot: + +* `sudo apt-get install xdot` +* `xdot hetero_subgraphs.dot` + +You can use performance data (in sample applications, it is the option `-pc`) to get the performance data on each subgraph. + +Here is an example of the output for Googlenet v1 running on HDDL with fallback to CPU: + +``` +subgraph1: 1. input preprocessing (mean data/HDDL):EXECUTED layerType: realTime: 129 cpu: 129 execType: +subgraph1: 2. input transfer to DDR:EXECUTED layerType: realTime: 201 cpu: 0 execType: +subgraph1: 3. HDDL execute time:EXECUTED layerType: realTime: 3808 cpu: 0 execType: +subgraph1: 4. output transfer from DDR:EXECUTED layerType: realTime: 55 cpu: 0 execType: +subgraph1: 5. HDDL output postprocessing:EXECUTED layerType: realTime: 7 cpu: 7 execType: +subgraph1: 6. copy to IE blob:EXECUTED layerType: realTime: 2 cpu: 2 execType: +subgraph2: out_prob: NOT_RUN layerType: Output realTime: 0 cpu: 0 execType: unknown +subgraph2: prob: EXECUTED layerType: SoftMax realTime: 10 cpu: 10 execType: ref +Total time: 4212 microseconds +``` ### Sample Usage -Inference Engine sample programs can use the Heterogeneous plugin used with the `-d` option: +OpenVINO™ sample programs can use the Heterogeneous execution used with the `-d` option: ```sh ./hello_classification /squeezenet1.1.xml /picture.jpg HETERO:GPU,CPU ``` where: -- `HETERO` stands for the Heterogeneous plugin +- `HETERO` stands for the Heterogeneous execution - `GPU,CPU` points to fallback policy with priority on GPU and fallback to CPU You can point more than two devices: `-d HETERO:MYRIAD,GPU,CPU` - -### Annotation of Layers per Device and Default Fallback Policy - -Default fallback policy decides which layer goes to which device automatically according to the support in dedicated plugins (GPU, CPU, MYRIAD). - -Another way to annotate a network is to set affinity manually using `ngraph::Node::get_rt_info` with key `affinity`: - -@snippet snippets/HETERO0.cpp part0 - -The fallback policy does not work if even one layer has an initialized affinity. The sequence should be to call automating affinity settings and then fix manually. - -> **NOTE**: If you set affinity manually, be careful because currently Inference Engine plugins don't support constant (`Constant`->`Result`) and empty (`Parameter`->`Result`) networks. Please avoid such subgraphs when you set affinity manually. - -@snippet snippets/HETERO1.cpp part1 - -If you rely on the default affinity distribution, you can avoid calling InferenceEngine::Core::QueryNetwork and just call InferenceEngine::Core::LoadNetwork instead: - -@snippet snippets/HETERO2.cpp part2 - -> **NOTE**: `InferenceEngine::Core::QueryNetwork` does not depend on affinities set by a user. Instead, it queries for layer support based on device capabilities. - -### Handling Difficult Topologies - -Some topologies are not friendly to heterogeneous execution on some devices or cannot be executed at all with this plugin -Examples are networks having activation layers that are not supported on the primary device. -If transmitting data from one part of a network to another part in heterogeneous mode takes more time than in normal mode, it may not make sense to execute them in heterogeneous mode. -In this case, you can define the heaviest part manually and set the affinity to avoid sending data back and forth many times during one inference. - -### Execution Precision -Precision for inference in the heterogeneous plugin is defined by: -* Precision of IR -* Ability of final plugins to execute in precision defined in IR - -For example, if you want to execute GPU with CPU fallback with FP16 on GPU, you need to use only FP16 IR. - -### Analyzing Performance Heterogeneous Execution -After enabling the KEY_HETERO_DUMP_GRAPH_DOT config key (shown in code snippet below), you can dump GraphViz* `.dot` files with annotations of devices per layer. - -The Heterogeneous plugin can generate two files: - -* `hetero_affinity_.dot` - annotation of affinities per layer. This file is written to the disk only if default fallback policy was executed -* `hetero_subgraphs_.dot` - annotation of affinities per graph. This file is written to the disk during execution of `ICNNNetwork::LoadNetwork()` for the Heterogeneous plugin - -@snippet snippets/HETERO3.cpp part3 - -You can use the GraphViz* utility or a file converter to view the images. On the Ubuntu* operating system, you can use xdot: - -* `sudo apt-get install xdot` -* `xdot hetero_subgraphs.dot` - -You can use performance data (in sample applications, it is the option `-pc`) to get the performance data on each subgraph. - -Here is an example of the output for Googlenet v1 running on HDDL with fallback to CPU: - -``` -subgraph1: 1. input preprocessing (mean data/HDDL):EXECUTED layerType: realTime: 129 cpu: 129 execType: -subgraph1: 2. input transfer to DDR:EXECUTED layerType: realTime: 201 cpu: 0 execType: -subgraph1: 3. HDDL execute time:EXECUTED layerType: realTime: 3808 cpu: 0 execType: -subgraph1: 4. output transfer from DDR:EXECUTED layerType: realTime: 55 cpu: 0 execType: -subgraph1: 5. HDDL output postprocessing:EXECUTED layerType: realTime: 7 cpu: 7 execType: -subgraph1: 6. copy to IE blob:EXECUTED layerType: realTime: 2 cpu: 2 execType: -subgraph2: out_prob: NOT_RUN layerType: Output realTime: 0 cpu: 0 execType: unknown -subgraph2: prob: EXECUTED layerType: SoftMax realTime: 10 cpu: 10 execType: ref -Total time: 4212 microseconds -``` -### See Also -[Supported Devices](Supported_Devices.md) - -## Introducing the Heterogeneous Plugin (Python) - -@sphinxdirective -.. raw:: html - -
Python
-@endsphinxdirective - -The heterogeneous plugin enables computing the inference of one network on several devices. The purposes of executing networks in heterogeneous mode are to: - -* Utilize the power of accelerators to process the heaviest parts of the network and to execute unsupported layers on fallback devices like the CPU -* Utilize all available hardware more efficiently during one inference - -The execution through heterogeneous plugin can be divided into two independent steps: - -1. Setting of hardware affinity to layers -2. Loading a network to the Heterogeneous plugin, splitting the network to parts, and executing them through the plugin - -These steps are decoupled. The setting of affinity can be done automatically using the fallback policy or in manual mode. - -The fallback automatic policy causes "greedy" behavior and assigns all layers that can be executed on certain device according to the priorities you specify (for example, HETERO:GPU,CPU). -Automatic policy does not take into account plugin peculiarities such as the inability to infer some layers without other special layers placed before or after that layer. The plugin is responsible for solving such cases. If the device plugin does not support the subgraph topology constructed by the HETERO plugin, then you should set affinity manually. - -Some of the topologies are not well-supported for heterogeneous execution on some devices or cannot be executed in this mode at all. Examples of such networks are those having activation layers which are not supported on the primary device. If transmitting data from one part of a network to another part in heterogeneous mode takes more time than in normal mode, it may not make sense to execute them in heterogeneous mode. In this case, you can define the most compute intense part manually and set the affinity to avoid sending data back and forth many times during one inference. - -### Use Default Layer Affinities - -To use the default affinities, call `load_network` with the "HETERO" device, with an optional list of devices to consider. - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_model) -exec_net = ie.load_network(network=net, device_name='HETERO:GPU,CPU') -``` - - -### Annotation of Layers per Device and Default Fallback Policy - -Default fallback policy decides which layer goes to which device automatically according to the support in dedicated plugins (GPU, CPU, MYRIAD). - -Another way to annotate a network is to set affinity manually using code. - -### Set Affinity of All Layers to CPU -```python -import ngraph as ng -from openvino.inference_engine import IECore - -ie = IECore() -# Read a network in IR or ONNX format -net = ie.read_network(path_to_model) -# Create an Ngraph (graph) function from the network -ng_func = ng.function_from_cnn(net) -for node in ng_func.get_ordered_ops(): - rt_info = node.get_rt_info() - rt_info["affinity"] = "CPU" -``` - - -The fallback policy does not work if even one layer has an initialized affinity. The sequence should be calling the default affinity settings and then setting the layers manually. - -> **NOTE**: If you set affinity manually, be aware that currently Inference Engine plugins do not support constant (*Constant -> Result*) and empty (*Parameter -> Result*) networks. Please avoid these subgraphs when you set affinity manually. - -### Example - Manually Setting Layer Affinities - -```python -import ngraph as ng -from openvino.inference_engine import IECore - -ie = IECore() -# Read a network in IR or ONNX format -net = ie.read_network(path_to_model) -ng_func = ng.function_from_cnn(net) - -for node in ng_func.get_ordered_ops(): - rt_info = node.get_rt_info() - rt_info["affinity"] = "CPU" - -# Load the network on the target device -exec_net = ie.load_network(network=net, device_name='HETERO:FPGA,CPU') -``` - -> **NOTE**: `ie.query_network` does not depend on affinities set by a user, but queries for layer support based on device capabilities. - -### Details of Splitting Network and Execution - -During the loading of the network to the heterogeneous plugin, the network is divided into separate parts and loaded to dedicated plugins. Intermediate blobs between these sub graphs are allocated automatically in the most efficient way. - -### Execution Precision - -The precision for inference in the heterogeneous plugin is defined by: -* Precision of IR -* Ability of final plugins to execute in precision defined in IR - -For example, if you want to execute GPU with CPU fallback with FP16 on GPU, you need to use only FP16 IR. - -OpenVINO samples can be used with the following command: -```sh -./hello_classification /squeezenet1.1.xml /picture.jpg HETERO:GPU,CPU -``` - -where `HETERO` stands for the heterogeneous plugin. - -You can point to more than two devices, for example: `-d HETERO:MYRIAD,GPU,CPU` - -### Analyzing Heterogeneous Execution - -After enabling the KEY_HETERO_DUMP_GRAPH_DOT config key, you can dump GraphViz* .dot files with annotations of devices per layer. - -The heterogeneous plugin can generate two files: - -* `hetero_affinity_.dot` - annotation of affinities per layer. This file is written to the disk only if the default fallback policy was executed -* `hetero_subgraphs_.dot` - annotation of affinities per graph. This file is written to the disk during execution of `ICNNNetwork::LoadNetwork()` for the heterogeneous plugin - -#### To Generate the .dot Files - -```python -ie = IECore() -ie.set_config( config={'HETERO_DUMP_GRAPH_DOT' : 'YES'}, device_name='HETERO') -``` - -You can use the GraphViz* utility or a file converter to view the images. On the Ubuntu* operating system, you can use xdot: - -* `sudo apt-get install xdot` -* `xdot hetero_subgraphs.dot` - -You can use performance data (in sample applications, it is the option `-pc`) to get the performance data on each subgraph. - -Here is an example of the output for Googlenet v1 running on HDDL with fallback to CPU: - -``` -subgraph1: 1. input preprocessing (mean data/HDDL):EXECUTED layerType: realTime: 129 cpu: 129 execType: -subgraph1: 2. input transfer to DDR:EXECUTED layerType: realTime: 201 cpu: 0 execType: -subgraph1: 3. HDDL execute time:EXECUTED layerType: realTime: 3808 cpu: 0 execType: -subgraph1: 4. output transfer from DDR:EXECUTED layerType: realTime: 55 cpu: 0 execType: -subgraph1: 5. HDDL output postprocessing:EXECUTED layerType: realTime: 7 cpu: 7 execType: -subgraph1: 6. copy to IE blob:EXECUTED layerType: realTime: 2 cpu: 2 execType: -subgraph2: out_prob: NOT_RUN layerType: Output realTime: 0 cpu: 0 execType: unknown -subgraph2: prob: EXECUTED layerType: SoftMax realTime: 10 cpu: 10 execType: ref -Total time: 4212 microseconds -``` - - ### See Also [Supported Devices](Supported_Devices.md) diff --git a/docs/OV_Runtime_UG/supported_plugins/MULTI.md b/docs/OV_Runtime_UG/supported_plugins/MULTI.md index ff8c73a8131..64659111a3c 100644 --- a/docs/OV_Runtime_UG/supported_plugins/MULTI.md +++ b/docs/OV_Runtime_UG/supported_plugins/MULTI.md @@ -32,7 +32,7 @@ Following the OpenVINO™ convention of labeling devices, the Multi-Device plugi | "MULTI_DEVICE_PRIORITIES" | comma-separated device names with no spaces | N/A | Prioritized list of devices | You can set the configuration directly as a string, or use the metric key `MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES from the `multi/multi_device_config.hpp` file, which defines the same string. - + Basically, there are three ways to specify the devices to be use by the "MULTI": @snippet snippets/MULTI0.cpp part0 @@ -86,7 +86,7 @@ Note that while the performance of accelerators combines really well with Multi- See the [Using the Multi-Device with OpenVINO samples and benchmarking the performance](#using-the-multi-device-with-openvino-samples-and-benchmarking-the-performance) section below. ### Querying the Optimal Number of Inference Requests -You can use the new GetMetric API to query the optimal number of requests. Similarly, when using the Multi-Device you don't need to sum over included devices yourself, you can query metric directly: +You can use the [Properties API](../PropertiesAPI.md) API to query the optimal number of requests. Similarly, when using the Multi-Device you don't need to sum over included devices yourself, you can query property directly: @snippet snippets/MULTI5.cpp part5 diff --git a/docs/snippets/GPU_Metric1.cpp b/docs/snippets/GPU_Metric1.cpp index 64e959233e5..3cfae3a5a2e 100644 --- a/docs/snippets/GPU_Metric1.cpp +++ b/docs/snippets/GPU_Metric1.cpp @@ -9,16 +9,16 @@ uint32_t n_streams = 2; int64_t available_device_mem_size = 3221225472; ov::AnyMap options = { ov::hint::model(model), // Required. Set the address of the target network. If this is not set, the MAX_BATCH_SIZE returns 1. - ov::num_streams(n_streams), // Optional. Set only when you want to estimate max batch size for a specific throughtput streams. Default is 1 or throughtput streams set by SetConfig. + ov::num_streams(n_streams), // Optional. Set only when you want to estimate max batch size for a specific throughtput streams. Default is 1 or throughtput streams set by set_property. ov::intel_gpu::hint::available_device_mem(available_device_mem_size) // Optional. Set only when you want to limit the available device mem size. }; uint32_t max_batch_size = core.get_property("GPU", ov::max_batch_size, options); //! [part1] //! [part2] -// This is not entirely GPU-specific metric (so METRIC_KEY is used rather than GPU_METRIC_KEY below), +// This is not entirely GPU-specific property (so common `ov::` property is used rather than `ov::intel_gpu::` below), // but the GPU is the only device that supports that at the moment. -// For the GPU, the metric already accommodates limitation for the on-device memory that the MAX_BATCH_SIZE poses. +// For the GPU, the property already accommodates limitation for the on-device memory that the MAX_BATCH_SIZE poses. // so OPTIMAL_BATCH_SIZE is always less than MAX_BATCH_SIZE. Unlike the latter it is also aligned to the power of 2. uint32_t optimal_batch_size = core.get_property("GPU", ov::optimal_batch_size, options); //! [part2] diff --git a/docs/snippets/ov_hetero.cpp b/docs/snippets/ov_hetero.cpp new file mode 100644 index 00000000000..52874aea2bc --- /dev/null +++ b/docs/snippets/ov_hetero.cpp @@ -0,0 +1,53 @@ +#include + +int main() { +ov::Core core; +auto model = core.read_model("sample.xml"); +//! [set_manual_affinities] +for (auto && op : model->get_ops()) { + op->get_rt_info()["affinity"] = "CPU"; +} +//! [set_manual_affinities] + +//! [fix_automatic_affinities] +// This example demonstrates how to perform default affinity initialization and then +// correct affinity manually for some layers +const std::string device = "HETERO:GPU,CPU"; + +// query_model result contains mapping of supported operations to devices +auto supported_ops = core.query_model(model, device); + +// update default affinities manually for specific operations +supported_ops["operation_name"] = "CPU"; + +// set affinities to a model +for (auto&& node : model->get_ops()) { + auto& affinity = supported_ops[node->get_friendly_name()]; + // Store affinity mapping using op runtime information + node->get_rt_info()["affinity"] = affinity; +} + +// load model with manually set affinities +auto compiled_model = core.compile_model(model, device); +//! [fix_automatic_affinities] + +//! [compile_model] +{ + auto compiled_model = core.compile_model(model, "HETERO:GPU,CPU"); + // or with ov::device::priorities with multiple args + compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU", "CPU")); + // or with ov::device::priorities with a single argument + compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU,CPU")); +} +//! [compile_model] +{ +//! [configure_fallback_devices] + auto compiled_model = core.compile_model(model, "HETERO", + ov::device::priorities("GPU", "CPU"), // GPU with fallback to CPU + ov::device::properties("CPU", ov::enable_profiling(true)), // profiling is enabled only for CPU + ov::device::properties("GPU", ov::hint::inference_precision(ov::element::f16)) // FP16 inference precision only for GPU + ); +//! [configure_fallback_devices] +} +return 0; +} diff --git a/docs/snippets/ov_hetero.py b/docs/snippets/ov_hetero.py new file mode 100644 index 00000000000..4f42847efe0 --- /dev/null +++ b/docs/snippets/ov_hetero.py @@ -0,0 +1,55 @@ +#include + +int main() { +ov::Core core; +auto model = core.read_model("sample.xml"); +//! [set_manual_affinities] +for (auto && op : model->get_ops()) { + op->get_rt_info()["affinity"] = "CPU"; +} +//! [set_manual_affinities] + +//! [fix_automatic_affinities] +// This example demonstrates how to perform default affinity initialization and then +// correct affinity manually for some layers +const std::string device = "HETERO:GPU,CPU"; + +// query_model result contains mapping of supported operations to devices +auto supported_ops = core.query_model(model, device); + +// update default affinities manually for specific operations +supported_ops["operation_name"] = "CPU"; + +// set affinities to a model +for (auto&& node : model->get_ops()) { + auto& affinity = supported_ops[node->get_friendly_name()]; + // Store affinity mapping using op runtime information + node->get_rt_info()["affinity"] = affinity; +} + +// load model with manually set affinities +auto compiled_model = core.compile_model(model, device); +//! [fix_automatic_affinities] + +//! [compile_model] +{ + auto compiled_model = core.compile_model(model, "HETERO:GPU,CPU"); +} +{ + auto compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU", "CPU")); +} +{ + auto compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU,CPU")); +} +//! [compile_model] +{ +//! [configure_fallback_devices] + auto compiled_model = core.compile_model(model, "HETERO", + ov::device::priorities("GPU", "CPU"), // GPU with fallback to CPU + ov::device::properties("CPU", ov::enable_profiling(true)), // profiling is enabled only for CPU + ov::device::properties("GPU", ov::hint::inference_precision(ov::element::f16)) // FP16 inference precision only for GPU + ); +//! [configure_fallback_devices] +} +return 0; +} diff --git a/docs/snippets/ov_properties_api.cpp b/docs/snippets/ov_properties_api.cpp new file mode 100644 index 00000000000..2e3d761cc2b --- /dev/null +++ b/docs/snippets/ov_properties_api.cpp @@ -0,0 +1,47 @@ +#include + +int main() { +//! [part0] +ov::Core core; +auto available_devices = core.get_available_devices(); +//! [part0] + +//! [part1] +auto device_priorites = core.get_property("HETERO", ov::device::priorities); +//! [part1] + +//! [part2] +auto cpu_device_name = core.get_property("GPU", ov::device::full_name); +//! [part2] + +//! [part3] +auto model = core.read_model("sample.xml"); +{ + auto compiled_model = core.compile_model(model, "CPU", + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::inference_precision(ov::element::f32)); +} +//! [part3] + +//! [part4] +{ + auto compiled_model = core.compile_model(model, "CPU"); + auto nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); +} +//! [part4] + +//! [part5] +{ + auto compiled_model = core.compile_model(model, "MYRIAD"); + auto temperature = compiled_model.get_property(ov::device::thermal); +} +//! [part5] + +//! [part6] +{ + auto compiled_model = core.compile_model(model, "CPU"); + auto nthreads = compiled_model.get_property(ov::inference_num_threads); +} +//! [part6] +return 0; +} diff --git a/samples/cpp/hello_query_device/README.md b/samples/cpp/hello_query_device/README.md index 8b9ee2b12d6..a3cd7ae034f 100644 --- a/samples/cpp/hello_query_device/README.md +++ b/samples/cpp/hello_query_device/README.md @@ -1,6 +1,6 @@ # Hello Query Device C++ Sample {#openvino_inference_engine_samples_hello_query_device_README} -This sample demonstrates how to execute an query OpenVINO™ Runtime devices, prints their metrics and default configuration values, using [Query Device API feature](../../../docs/OV_Runtime_UG/InferenceEngine_QueryAPI.md). +This sample demonstrates how to execute an query OpenVINO™ Runtime devices, prints their metrics and default configuration values, using [Properties API](../../../docs/OV_Runtime_UG/PropertiesAPI.md). The following C++ API is used in the application: diff --git a/samples/python/hello_query_device/README.md b/samples/python/hello_query_device/README.md index a37852be755..67087932a69 100644 --- a/samples/python/hello_query_device/README.md +++ b/samples/python/hello_query_device/README.md @@ -1,6 +1,6 @@ # Hello Query Device Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README} -This sample demonstrates how to show OpenVINO™ Runtime devices and prints their metrics and default configuration values using [Query Device API feature](../../../docs/OV_Runtime_UG/InferenceEngine_QueryAPI.md). +This sample demonstrates how to show OpenVINO™ Runtime devices and prints their metrics and default configuration values using [Query Device API feature](../../../docs/OV_Runtime_UG/PropertiesAPI.md). The following Python API is used in the application: @@ -28,7 +28,7 @@ python hello_query_device.py ## Sample Output -The application prints all available devices with their supported metrics and default values for configuration parameters. +The application prints all available devices with their supported metrics and default values for configuration parameters. For example: ``` From f53f09f020fdc6fb61be2f868b49008b92397e01 Mon Sep 17 00:00:00 2001 From: Gorokhov Dmitriy Date: Mon, 21 Feb 2022 16:09:29 +0300 Subject: [PATCH 032/310] [CPU] Fixed legacy post ops behavior (#10542) --- src/plugins/intel_cpu/src/nodes/eltwise.cpp | 6 ++++-- src/plugins/intel_cpu/src/nodes/fake_quantize.cpp | 4 +++- src/plugins/intel_cpu/thirdparty/mkl-dnn | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/eltwise.cpp b/src/plugins/intel_cpu/src/nodes/eltwise.cpp index 43c4d83213b..4273d8234c9 100644 --- a/src/plugins/intel_cpu/src/nodes/eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/eltwise.cpp @@ -2083,7 +2083,6 @@ void MKLDNNEltwiseNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDim } } else { const size_t chIdx = postOpDims.size() > 1 ? getFusingAxis() : 0; - constexpr int bufferAlignment = 16; // always align for legacy scale/shift post ops // since legacy depthwise post ops mechanism requires broadcasted data we need to reinitilize it in case of changed shape if (depthwiseData.empty() || depthwiseDataSize != 2 * postOpDims[chIdx]) { depthwiseData.clear(); @@ -2106,7 +2105,10 @@ void MKLDNNEltwiseNode::appendPostOpsImpl(mkldnn::post_ops& ops, const VectorDim } depthwiseDataSize = 2 * postOpDims[chIdx]; - depthwiseData.resize(rnd_up(depthwiseData.size(), bufferAlignment), 0); + // always align for legacy scale/shift post ops + constexpr int bufferAlignment = 16; + int bufferPaddingSize = rnd_up(postOpDims[chIdx], bufferAlignment) - postOpDims[chIdx]; + depthwiseData.resize(depthwiseDataSize + bufferPaddingSize, 0); } if (depthwiseData.empty()) diff --git a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp index 7b3a478745f..dc7fcc988e8 100644 --- a/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp +++ b/src/plugins/intel_cpu/src/nodes/fake_quantize.cpp @@ -1791,7 +1791,9 @@ void MKLDNNFakeQuantizeNode::initializePostOpDataLegacy(const VectorDims &dims, quantizationData.insert(quantizationData.end(), outputScale.begin(), outputScale.end()); quantizationData.insert(quantizationData.end(), outputShift.begin(), outputShift.end()); quantizationDataSize = quantizationData.size(); - quantizationData.resize(rnd_up(quantizationData.size(), bufferAlignment), 0); + + int bufferPaddingSize = rnd_up(outputShift.size(), bufferAlignment) - outputShift.size(); + quantizationData.resize(quantizationDataSize + bufferPaddingSize, 0); } isPostOpDataInitialized = true; diff --git a/src/plugins/intel_cpu/thirdparty/mkl-dnn b/src/plugins/intel_cpu/thirdparty/mkl-dnn index 7e92a2dfe89..82ca2f931c1 160000 --- a/src/plugins/intel_cpu/thirdparty/mkl-dnn +++ b/src/plugins/intel_cpu/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit 7e92a2dfe893d65f46b5e267dcf14972e18fb814 +Subproject commit 82ca2f931c1d588b67d154d873136d4af1ffb3a8 From ae42bf1e86fa78da67629eba2ca869a33e9448a9 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Mon, 21 Feb 2022 16:22:01 +0300 Subject: [PATCH 033/310] [IE TESTS] Functional test review. Part1 (#10328) * [IE TESTS] Move Preprocess&Chacing tests to plugin. Add Cachinf tests for OV2.0 * Conformance * Fix * Apply Ilya's comments * Update caching_tests.cpp * Fixes * Update mkldnn_plugin.cpp * try to skip * try to fix * Fix cpu * tmp --- .../behavior/ov_plugin/caching_tests.cpp | 25 ++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../preprocessing.cpp | 2 +- .../set_preprocess.cpp | 2 +- src/plugins/intel_cpu/src/plugin.cpp | 2 +- .../src/behavior/ov_plugin/caching_tests.cpp | 40 ++++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../set_preprocess.cpp | 2 +- .../behavior/ov_plugin/caching_tests.cpp | 96 ++++++++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../preprocessing.cpp | 2 +- .../set_preprocess.cpp | 2 +- .../skip_tests_config.cpp | 4 + .../cpu/single_layer_tests/activation.cpp | 2 +- .../single_layer_tests/adaptive_pooling.cpp | 2 +- .../cpu/single_layer_tests/batch_to_space.cpp | 2 +- .../cpu/single_layer_tests/broadcast.cpp | 2 +- .../plugin/cpu/single_layer_tests/concat.cpp | 2 +- .../cpu/single_layer_tests/conversion.cpp | 2 +- .../cpu/single_layer_tests/convolution.cpp | 4 +- .../convolution_backprop_data.cpp | 2 +- .../cpu/single_layer_tests/ctc_Loss.cpp | 2 +- .../single_layer_tests/ctc_greedy_decoder.cpp | 2 +- .../ctc_greedy_decoder_seq_len.cpp | 2 +- .../plugin/cpu/single_layer_tests/cum_sum.cpp | 2 +- .../deformable_convolution.cpp | 2 +- .../cpu/single_layer_tests/depth_to_space.cpp | 2 +- .../plugin/cpu/single_layer_tests/eltwise.cpp | 2 +- .../embedding_bag_offsets_sum.cpp | 2 +- .../embedding_bag_packed_sum.cpp | 2 +- .../embedding_segments_sum.cpp | 2 +- .../extract_image_patches.cpp | 2 +- .../cpu/single_layer_tests/fake_quantize.cpp | 2 +- .../plugin/cpu/single_layer_tests/gather.cpp | 2 +- .../single_layer_tests/group_convolution.cpp | 4 +- .../group_convolution_backprop_data.cpp | 2 +- .../cpu/single_layer_tests/gru_cell.cpp | 2 +- .../cpu/single_layer_tests/gru_sequence.cpp | 2 +- .../cpu/single_layer_tests/interpolate.cpp | 2 +- .../cpu/single_layer_tests/log_softmax.cpp | 2 +- .../plugin/cpu/single_layer_tests/lrn.cpp | 2 +- .../cpu/single_layer_tests/lstm_cell.cpp | 2 +- .../cpu/single_layer_tests/lstm_sequence.cpp | 2 +- .../plugin/cpu/single_layer_tests/matmul.cpp | 2 +- .../plugin/cpu/single_layer_tests/mvn.cpp | 2 +- .../non_max_suppression.cpp | 2 +- .../plugin/cpu/single_layer_tests/nonzero.cpp | 2 +- .../cpu/single_layer_tests/normalize.cpp | 2 +- .../plugin/cpu/single_layer_tests/one_hot.cpp | 2 +- .../plugin/cpu/single_layer_tests/pad.cpp | 2 +- .../plugin/cpu/single_layer_tests/pooling.cpp | 4 +- .../cpu/single_layer_tests/prior_box.cpp | 2 +- .../prior_box_clustered.cpp | 2 +- .../cpu/single_layer_tests/proposal.cpp | 2 +- .../cpu/single_layer_tests/reduce_ops.cpp | 2 +- .../cpu/single_layer_tests/region_yolo.cpp | 2 +- .../cpu/single_layer_tests/rnn_cell.cpp | 2 +- .../cpu/single_layer_tests/rnn_sequence.cpp | 2 +- .../cpu/single_layer_tests/roi_pooling.cpp | 2 +- .../cpu/single_layer_tests/roialign.cpp | 2 +- .../single_layer_tests/scatter_ND_update.cpp | 2 +- .../scatter_elements_update.cpp | 2 +- .../cpu/single_layer_tests/scatter_update.cpp | 2 +- .../cpu/single_layer_tests/shape_ops.cpp | 2 +- .../single_layer_tests/shuffle_channels.cpp | 2 +- .../plugin/cpu/single_layer_tests/slice.cpp | 2 +- .../plugin/cpu/single_layer_tests/softmax.cpp | 2 +- .../cpu/single_layer_tests/space_to_batch.cpp | 2 +- .../cpu/single_layer_tests/space_to_depth.cpp | 2 +- .../cpu/single_layer_tests/strided_slice.cpp | 2 +- .../plugin/cpu/single_layer_tests/tile.cpp | 2 +- .../plugin/cpu/single_layer_tests/topk.cpp | 2 +- .../cpu/single_layer_tests/transpose.cpp | 2 +- .../cpu/single_layer_tests/variadic_split.cpp | 2 +- .../subgraph_tests/src/conv_sum_broadcast.cpp | 2 +- .../cpu/subgraph_tests/src/reshape_fc.cpp | 2 +- .../subgraph_tests/src/seq_native_order.cpp | 4 +- .../behavior/ov_plugin/caching_tests.cpp | 28 +++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../behavior/ov_plugin/caching_tests.cpp | 32 +++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../set_preprocess.cpp | 2 +- .../skip_tests_config.cpp | 2 + .../behavior/ov_plugin/caching_tests.cpp | 49 ++++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../set_preprocess.cpp | 2 +- .../behavior/ov_plugin/caching_tests.hpp | 51 +++++ .../{caching => plugin}/caching_tests.hpp | 0 .../preprocessing.hpp | 0 .../set_preprocess.hpp | 0 .../src/behavior/ov_plugin/caching_tests.cpp | 215 ++++++++++++++++++ .../{caching => plugin}/caching_tests.cpp | 2 +- .../shared_test_classes/base/ov_subgraph.hpp | 2 +- .../src/base/ov_subgraph.cpp | 4 +- 94 files changed, 628 insertions(+), 86 deletions(-) create mode 100644 docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp rename docs/template_plugin/tests/functional/shared_tests_instances/behavior/{caching => plugin}/caching_tests.cpp (95%) rename docs/template_plugin/tests/functional/shared_tests_instances/behavior/{preprocessing => plugin}/preprocessing.cpp (97%) rename docs/template_plugin/tests/functional/shared_tests_instances/behavior/{preprocessing => plugin}/set_preprocess.cpp (98%) create mode 100644 src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp rename src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/{caching => plugin}/caching_tests.cpp (96%) rename src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/{preprocessing => plugin}/set_preprocess.cpp (99%) create mode 100644 src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp rename src/tests/functional/plugin/cpu/shared_tests_instances/behavior/{caching => plugin}/caching_tests.cpp (99%) rename src/tests/functional/plugin/cpu/shared_tests_instances/behavior/{preprocessing => plugin}/preprocessing.cpp (97%) rename src/tests/functional/plugin/cpu/shared_tests_instances/behavior/{preprocessing => plugin}/set_preprocess.cpp (99%) create mode 100644 src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp rename src/tests/functional/plugin/gna/shared_tests_instances/behavior/{caching => plugin}/caching_tests.cpp (95%) create mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp rename src/tests/functional/plugin/gpu/shared_tests_instances/behavior/{caching => plugin}/caching_tests.cpp (96%) rename src/tests/functional/plugin/gpu/shared_tests_instances/behavior/{preprocessing => plugin}/set_preprocess.cpp (98%) create mode 100644 src/tests/functional/plugin/myriad/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp rename src/tests/functional/plugin/myriad/shared_tests_instances/behavior/{caching => plugin}/caching_tests.cpp (97%) rename src/tests/functional/plugin/myriad/shared_tests_instances/behavior/{preprocessing => plugin}/set_preprocess.cpp (98%) create mode 100644 src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp rename src/tests/functional/plugin/shared/include/behavior/{caching => plugin}/caching_tests.hpp (100%) rename src/tests/functional/plugin/shared/include/behavior/{preprocessing => plugin}/preprocessing.hpp (100%) rename src/tests/functional/plugin/shared/include/behavior/{preprocessing => plugin}/set_preprocess.hpp (100%) create mode 100644 src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp rename src/tests/functional/plugin/shared/src/behavior/{caching => plugin}/caching_tests.cpp (99%) diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..636631e3e19 --- /dev/null +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/caching_tests.hpp" + +using namespace ov::test::behavior; + +namespace { + static const std::vector precisionsTemplate = { + ov::element::f32, + }; + + static const std::vector batchSizesTemplate = { + 1, 2 + }; + + INSTANTIATE_TEST_SUITE_P(smoke_Behavior_CachingSupportCase_Template, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(CompileModelCacheTestBase::getStandardFunctions()), + ::testing::ValuesIn(precisionsTemplate), + ::testing::ValuesIn(batchSizesTemplate), + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE)), + CompileModelCacheTestBase::getTestCaseName); +} // namespace diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/caching/caching_tests.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/caching_tests.cpp similarity index 95% rename from docs/template_plugin/tests/functional/shared_tests_instances/behavior/caching/caching_tests.cpp rename to docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/caching_tests.cpp index 5e0a9d72935..700663bb5a0 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/caching/caching_tests.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/caching_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" using namespace LayerTestsDefinitions; diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing/preprocessing.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/preprocessing.cpp similarity index 97% rename from docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing/preprocessing.cpp rename to docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/preprocessing.cpp index bfdaf270353..af612c11697 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing/preprocessing.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/preprocessing.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/preprocessing/preprocessing.hpp" +#include "behavior/plugin/preprocessing.hpp" #ifdef ENABLE_GAPI_PREPROCESSING diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/set_preprocess.cpp similarity index 98% rename from docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp rename to docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/set_preprocess.cpp index 5e516a8da35..ffc55943b0b 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/set_preprocess.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/preprocessing/set_preprocess.hpp" +#include "behavior/plugin/set_preprocess.hpp" #ifdef ENABLE_GAPI_PREPROCESSING diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index b2c4a7f36cd..93c29e01884 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -854,7 +854,7 @@ Parameter Engine::GetMetric(const std::string& name, const std::map range = std::make_tuple(1, 1, 1); diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..77eaaa1acf3 --- /dev/null +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/caching_tests.hpp" +#include +#include +#include "conformance.hpp" + +namespace { +using namespace ov::test::behavior; +using namespace ngraph; + +static const std::vector ovElemTypesTemplate = { + ov::element::f64, + ov::element::f32, + ov::element::f16, + ov::element::i64, + ov::element::i32, + ov::element::i16, + ov::element::i8, + ov::element::u64, + ov::element::u32, + ov::element::u16, + ov::element::u8, + ov::element::boolean, +}; + +static const std::vector ovBatchSizesTemplate = { + 1, 2 +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Behavior_CachingSupportCase, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(CompileModelCacheTestBase::getStandardFunctions()), + ::testing::ValuesIn(ovElemTypesTemplate), + ::testing::ValuesIn(ovBatchSizesTemplate), + ::testing::Values(ov::test::conformance::targetDevice)), + CompileModelCacheTestBase::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/caching/caching_tests.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/plugin/caching_tests.cpp similarity index 96% rename from src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/caching/caching_tests.cpp rename to src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/plugin/caching_tests.cpp index cbebb8fb136..0489692e643 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/caching/caching_tests.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/plugin/caching_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" #include #include #include "conformance.hpp" diff --git a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/preprocessing/set_preprocess.cpp b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/plugin/set_preprocess.cpp similarity index 99% rename from src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/preprocessing/set_preprocess.cpp rename to src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/plugin/set_preprocess.cpp index 5ea5cc8f1aa..d16c3a0b0a2 100644 --- a/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/preprocessing/set_preprocess.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/api_conformance_runner/src/behavior/plugin/set_preprocess.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/preprocessing/set_preprocess.hpp" +#include "behavior/plugin/set_preprocess.hpp" #include "api_conformance_helpers.hpp" namespace { diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..c6906840b01 --- /dev/null +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,96 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/caching_tests.hpp" +#include +#include + +using namespace ov::test::behavior; +using namespace ngraph; + +namespace { + static const std::vector precisionsCPU = { + ngraph::element::f32, + ngraph::element::f16, + ngraph::element::i32, + ngraph::element::i64, + ngraph::element::i8, + ngraph::element::u8, + ngraph::element::i16, + ngraph::element::u16, + }; + + static const std::vector batchSizesCPU = { + 1, 2 + }; + + static const std::vector precisionsCPUInternal = { + ngraph::element::f32 + }; + + static const std::vector batchSizesCPUInternal = { + 1 + }; + + static std::shared_ptr simple_function_non_max_supression_internal(ngraph::element::Type, size_t) { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + auto max_output_boxes_per_class = ov::op::v0::Constant::create(element::i32, Shape{1}, {10}); + auto iou_threshold = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.75}); + auto score_threshold = ov::op::v0::Constant::create(element::f32, Shape{1}, {0.7}); + auto nms = std::make_shared(boxes, scores, max_output_boxes_per_class, + iou_threshold, score_threshold, 0, true, element::i32); + auto res = std::make_shared(nms); + auto func = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + return func; + } + + static std::shared_ptr simple_function_matrix_nms_internal(ngraph::element::Type, size_t) { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + ov::op::v8::MatrixNms::Attributes attr; + // convert_precision does not support internal op 'NmsStaticShapeIE' + attr.output_type = element::i32; + auto nms = std::make_shared>(boxes, scores, attr); + auto res = std::make_shared(nms); + auto func = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + return func; + } + + static std::shared_ptr simple_function_multiclass_nms_internal(ngraph::element::Type, size_t) { + auto boxes = std::make_shared(element::f32, Shape{1, 1000, 4}); + auto scores = std::make_shared(element::f32, Shape{1, 1, 1000}); + ov::op::v8::MulticlassNms::Attributes attr; + attr.output_type = element::i32; + auto nms = std::make_shared>(boxes, scores, attr); + auto res = std::make_shared(nms); + auto func = std::make_shared(NodeVector{nms}, ParameterVector{boxes, scores}); + return func; + } + + static std::vector internal_functions_cpu() { + std::vector funcs = { + ovModelWithName { simple_function_non_max_supression_internal, "NonMaxSuppressionIEInternal"}, + ovModelWithName { simple_function_matrix_nms_internal, "NmsStaticShapeIE_MatrixNms"}, + ovModelWithName { simple_function_multiclass_nms_internal, "NmsStaticShapeIE_MulticlassNms"}, + }; + return funcs; + } + + INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_CPU, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(CompileModelCacheTestBase::getStandardFunctions()), + ::testing::ValuesIn(precisionsCPU), + ::testing::ValuesIn(batchSizesCPU), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CompileModelCacheTestBase::getTestCaseName); + + INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_CPU_Internal, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(internal_functions_cpu()), + ::testing::ValuesIn(precisionsCPUInternal), + ::testing::ValuesIn(batchSizesCPUInternal), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + CompileModelCacheTestBase::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/caching/caching_tests.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/caching_tests.cpp similarity index 99% rename from src/tests/functional/plugin/cpu/shared_tests_instances/behavior/caching/caching_tests.cpp rename to src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/caching_tests.cpp index 29259221c57..3ce624dcc09 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/caching/caching_tests.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/caching_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" #include #include diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/preprocessing/preprocessing.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/preprocessing.cpp similarity index 97% rename from src/tests/functional/plugin/cpu/shared_tests_instances/behavior/preprocessing/preprocessing.cpp rename to src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/preprocessing.cpp index b127ddaca3d..5cd18a7dfd5 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/preprocessing/preprocessing.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/preprocessing.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/preprocessing/preprocessing.hpp" +#include "behavior/plugin/preprocessing.hpp" #ifdef ENABLE_GAPI_PREPROCESSING diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/set_preprocess.cpp similarity index 99% rename from src/tests/functional/plugin/cpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp rename to src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/set_preprocess.cpp index e4ee22722d9..248eb27329f 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/set_preprocess.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/preprocessing/set_preprocess.hpp" +#include "behavior/plugin/set_preprocess.hpp" #ifdef ENABLE_GAPI_PREPROCESSING diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 0ab01ee68c1..f0834a5fb55 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -182,6 +182,10 @@ std::vector disabledTestPatterns() { R"(.*LoopLayerCPUTest.*trip_count=0.*)", R"(.*LoopForDiffShapesLayerCPUTest.*exec_cond=0.*)", R"(.*LoopForDiffShapesLayerCPUTest.*trip_count=0.*)", + // [ INFO ] Can't compile network without cache for .. with precision .. + R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*KSOFunction.*)", + R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*NonMaxSuppression.*)", + R"(.*CompileModelCacheTestBase.*CompareWithRefImpl.*Nms.*)", }; #define FIX_62820 0 diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/activation.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/activation.cpp index 2f7bd014ca8..ff5aaeedfaf 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/activation.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/activation.cpp @@ -126,7 +126,7 @@ TEST_P(ActivationLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Eltwise"); + CheckPluginRelatedResults(compiledModel, "Eltwise"); } diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp index eb8b412282a..60c0b61feb4 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/adaptive_pooling.cpp @@ -171,7 +171,7 @@ private: TEST_P(AdaPoolLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "AdaptivePooling"); + CheckPluginRelatedResults(compiledModel, "AdaptivePooling"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp index 123ef0d54a3..28fd1baa200 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/batch_to_space.cpp @@ -87,7 +87,7 @@ TEST_P(BatchToSpaceCPULayerTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "BatchToSpace"); + CheckPluginRelatedResults(compiledModel, "BatchToSpace"); }; namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/broadcast.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/broadcast.cpp index 1d049f4bd44..ce0b339f4a8 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/broadcast.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/broadcast.cpp @@ -190,7 +190,7 @@ TEST_P(BroadcastLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Broadcast"); + CheckPluginRelatedResults(compiledModel, "Broadcast"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/concat.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/concat.cpp index 14241b64755..948a71d0a18 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/concat.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/concat.cpp @@ -91,7 +91,7 @@ TEST_P(ConcatLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Concatenation"); + CheckPluginRelatedResults(compiledModel, "Concatenation"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp index f4058cefeab..e00b6c325c8 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/conversion.cpp @@ -75,7 +75,7 @@ TEST_P(ConvertCPULayerTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(executableNetwork, "Convert"); + CheckPluginRelatedResults(compiledModel, "Convert"); } std::vector inShapes_4D = { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp index d17c74f3d21..00e2da6c101 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp @@ -216,9 +216,9 @@ TEST_P(ConvolutionLayerCPUTest, CompareWithRefs) { run(); if (isBias) { - checkBiasFusing(executableNetwork); + checkBiasFusing(compiledModel); } - CheckPluginRelatedResults(executableNetwork, "Convolution"); + CheckPluginRelatedResults(compiledModel, "Convolution"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp index c97707a0aef..5061d430568 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp @@ -260,7 +260,7 @@ TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) { } run(); - CheckPluginRelatedResults(executableNetwork, "Deconvolution"); + CheckPluginRelatedResults(compiledModel, "Deconvolution"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/ctc_Loss.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/ctc_Loss.cpp index 5efcbe54086..b7567626e82 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/ctc_Loss.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/ctc_Loss.cpp @@ -189,7 +189,7 @@ TEST_P(CTCLossLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED(); run(); - CheckPluginRelatedResults(executableNetwork, "CTCLoss"); + CheckPluginRelatedResults(compiledModel, "CTCLoss"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder.cpp index 960c17d663a..3f17dfc89fd 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder.cpp @@ -132,7 +132,7 @@ protected: TEST_P(CTCGreedyDecoderLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED(); run(); - CheckPluginRelatedResults(executableNetwork, "CTCGreedyDecoder"); + CheckPluginRelatedResults(compiledModel, "CTCGreedyDecoder"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder_seq_len.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder_seq_len.cpp index 09944c1bba8..60d4eef7a80 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder_seq_len.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/ctc_greedy_decoder_seq_len.cpp @@ -172,7 +172,7 @@ protected: TEST_P(CTCGreedyDecoderSeqLenLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED(); run(); - CheckPluginRelatedResults(executableNetwork, "CTCGreedyDecoderSeqLen"); + CheckPluginRelatedResults(compiledModel, "CTCGreedyDecoderSeqLen"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp index 8be9059ef60..ed7c8d27ea4 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp @@ -70,7 +70,7 @@ TEST_P(CumSumLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "CumSum"); + CheckPluginRelatedResults(compiledModel, "CumSum"); } const ngraph::element::TypeVector inputPrecision = { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp index 391939b9fea..75a08f3094f 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/deformable_convolution.cpp @@ -187,7 +187,7 @@ protected: TEST_P(DefConvLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "DeformableConvolution"); + CheckPluginRelatedResults(compiledModel, "DeformableConvolution"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/depth_to_space.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/depth_to_space.cpp index 975200d0eab..4540015525e 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/depth_to_space.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/depth_to_space.cpp @@ -81,7 +81,7 @@ TEST_P(DepthToSpaceLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "DepthToSpace"); + CheckPluginRelatedResults(compiledModel, "DepthToSpace"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp index ef2852c53ea..07b99f82d89 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/eltwise.cpp @@ -168,7 +168,7 @@ TEST_P(EltwiseLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Eltwise"); + CheckPluginRelatedResults(compiledModel, "Eltwise"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_offsets_sum.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_offsets_sum.cpp index 70f25af4ff4..826e2854ec7 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_offsets_sum.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_offsets_sum.cpp @@ -99,7 +99,7 @@ public: TEST_P(EmbeddingBagOffsetsSumLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "embeddingBagOffsetsSum"); + CheckPluginRelatedResults(compiledModel, "embeddingBagOffsetsSum"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_packed_sum.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_packed_sum.cpp index d744f661a5b..d77b4b0e669 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_packed_sum.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/embedding_bag_packed_sum.cpp @@ -89,7 +89,7 @@ protected: TEST_P(EmbeddingBagPackedSumLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "embeddingBagPackedSum"); + CheckPluginRelatedResults(compiledModel, "embeddingBagPackedSum"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/embedding_segments_sum.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/embedding_segments_sum.cpp index 2870409062a..d92c4fe5852 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/embedding_segments_sum.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/embedding_segments_sum.cpp @@ -103,7 +103,7 @@ protected: TEST_P(EmbeddingSegmentsSumLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "embeddingSegmentsSum"); + CheckPluginRelatedResults(compiledModel, "embeddingSegmentsSum"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/extract_image_patches.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/extract_image_patches.cpp index 31f7ee19b80..fdf15b71703 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/extract_image_patches.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/extract_image_patches.cpp @@ -69,7 +69,7 @@ protected: TEST_P(ExtractImagePatchesLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ExtractImagePatches"); + CheckPluginRelatedResults(compiledModel, "ExtractImagePatches"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp index 8bdd3d0a93a..d0e7c2822df 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/fake_quantize.cpp @@ -152,7 +152,7 @@ TEST_P(FakeQuantizeLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, layerName); + CheckPluginRelatedResults(compiledModel, layerName); } diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/gather.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/gather.cpp index 1cce9d6f790..9111cfff571 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/gather.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/gather.cpp @@ -150,7 +150,7 @@ TEST_P(GatherLayerTestCPU, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Gather"); + CheckPluginRelatedResults(compiledModel, "Gather"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp index ac63c5e814c..8ac53f86621 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution.cpp @@ -204,9 +204,9 @@ TEST_P(GroupConvolutionLayerCPUTest, CompareWithRefs) { run(); if (isBias) { - checkBiasFusing(executableNetwork); + checkBiasFusing(compiledModel); } - CheckPluginRelatedResults(executableNetwork, "Convolution"); + CheckPluginRelatedResults(compiledModel, "Convolution"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp index 87268c2135c..f3ce24010bd 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp @@ -262,7 +262,7 @@ TEST_P(GroupDeconvolutionLayerCPUTest, CompareWithRefs) { } run(); - CheckPluginRelatedResults(executableNetwork, "Deconvolution"); + CheckPluginRelatedResults(compiledModel, "Deconvolution"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp index 5334fef0c83..1c244c8d2c6 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp @@ -107,7 +107,7 @@ TEST_P(GRUCellCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RNNCell"); + CheckPluginRelatedResults(compiledModel, "RNNCell"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp index 836b1fa5006..c2c035697ee 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp @@ -190,7 +190,7 @@ TEST_P(GRUSequenceCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RNNSeq"); + CheckPluginRelatedResults(compiledModel, "RNNSeq"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp index 2e64e4bb3c2..e2d298acfe7 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/interpolate.cpp @@ -265,7 +265,7 @@ TEST_P(InterpolateLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Interpolate"); + CheckPluginRelatedResults(compiledModel, "Interpolate"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/log_softmax.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/log_softmax.cpp index f8d4733480e..6a8cc1fda0f 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/log_softmax.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/log_softmax.cpp @@ -76,7 +76,7 @@ TEST_P(LogSoftmaxLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "logSoftmax"); + CheckPluginRelatedResults(compiledModel, "logSoftmax"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/lrn.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/lrn.cpp index 31c5623b7a2..098dad8cc86 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/lrn.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/lrn.cpp @@ -66,7 +66,7 @@ TEST_P(LRNLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "LRN"); + CheckPluginRelatedResults(compiledModel, "LRN"); } const std::vector inputPrecisions = { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp index 6b5a2f70e1f..e4e85a776d7 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp @@ -106,7 +106,7 @@ TEST_P(LSTMCellLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RNNCell"); + CheckPluginRelatedResults(compiledModel, "RNNCell"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp index 480353bdf14..2de06f59a05 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp @@ -192,7 +192,7 @@ TEST_P(LSTMSequenceCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RNNSeq"); + CheckPluginRelatedResults(compiledModel, "RNNSeq"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp index 0157b4fdfac..fb311d6eaf2 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp @@ -175,7 +175,7 @@ TEST_P(MatMulLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, cpuNodeType); + CheckPluginRelatedResults(compiledModel, cpuNodeType); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp index e0c0cdac992..7f876c3f672 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/mvn.cpp @@ -111,7 +111,7 @@ TEST_P(MvnLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "MVN"); + CheckPluginRelatedResults(compiledModel, "MVN"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/non_max_suppression.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/non_max_suppression.cpp index 773fb6d50a5..b2b91fb181e 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/non_max_suppression.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/non_max_suppression.cpp @@ -398,7 +398,7 @@ private: TEST_P(NmsLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - // CheckPluginRelatedResults(executableNetwork, "NonMaxSuppression"); + // CheckPluginRelatedResults(compiledModel, "NonMaxSuppression"); }; const std::vector inShapeParams = { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/nonzero.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/nonzero.cpp index 4ed982630aa..bd9525d3962 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/nonzero.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/nonzero.cpp @@ -96,7 +96,7 @@ protected: TEST_P(NonZeroLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "NonZero"); + CheckPluginRelatedResults(compiledModel, "NonZero"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp index 70787642283..4c3cdfa8f4f 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp @@ -101,7 +101,7 @@ TEST_P(NormalizeL2LayerCPUTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(executableNetwork, "NormalizeL2"); + CheckPluginRelatedResults(compiledModel, "NormalizeL2"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp index f35bd76bb5a..9aa6f2120a3 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/one_hot.cpp @@ -153,7 +153,7 @@ TEST_P(OneHotLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "OneHot"); + CheckPluginRelatedResults(compiledModel, "OneHot"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/pad.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/pad.cpp index 989d4b2f14b..298ec19c410 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/pad.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/pad.cpp @@ -78,7 +78,7 @@ TEST_P(PadLayerCPUTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(executableNetwork, "Pad"); + CheckPluginRelatedResults(compiledModel, "Pad"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp index fefd5bdee78..fe94768f52d 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/pooling.cpp @@ -208,14 +208,14 @@ TEST_P(PoolingLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Pooling"); + CheckPluginRelatedResults(compiledModel, "Pooling"); } TEST_P(MaxPoolingV8LayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Pooling"); + CheckPluginRelatedResults(compiledModel, "Pooling"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/prior_box.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/prior_box.cpp index 042f3126438..279b1bd7cc4 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/prior_box.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/prior_box.cpp @@ -154,7 +154,7 @@ protected: TEST_P(PriorBoxLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "PriorBox"); + CheckPluginRelatedResults(compiledModel, "PriorBox"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/prior_box_clustered.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/prior_box_clustered.cpp index 822552650a8..3193a3968f7 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/prior_box_clustered.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/prior_box_clustered.cpp @@ -141,7 +141,7 @@ protected: TEST_P(PriorBoxClusteredLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "PriorBoxClustered"); + CheckPluginRelatedResults(compiledModel, "PriorBoxClustered"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/proposal.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/proposal.cpp index 7b0d39ba8a1..5f7786df11a 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/proposal.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/proposal.cpp @@ -194,7 +194,7 @@ TEST_P(ProposalLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Proposal"); + CheckPluginRelatedResults(compiledModel, "Proposal"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp index 6d712302ffb..a42a046055a 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/reduce_ops.cpp @@ -183,7 +183,7 @@ TEST_P(ReduceCPULayerTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(executableNetwork, "Reduce"); + CheckPluginRelatedResults(compiledModel, "Reduce"); } namespace { const std::vector inpOutPrc = {ElementType::bf16, ElementType::f32}; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp index 9e9e8f8bfc5..2d197d37b25 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/region_yolo.cpp @@ -92,7 +92,7 @@ protected: TEST_P(RegionYoloCPULayerTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RegionYolo"); + CheckPluginRelatedResults(compiledModel, "RegionYolo"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp index cf61d3df7dd..0ec1d3f9789 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp @@ -102,7 +102,7 @@ TEST_P(RNNCellCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RNNCell"); + CheckPluginRelatedResults(compiledModel, "RNNCell"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp index 951a238d11e..31963f9fa05 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp @@ -168,7 +168,7 @@ TEST_P(RNNSequenceCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "RNNSeq"); + CheckPluginRelatedResults(compiledModel, "RNNSeq"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp index 51fa791e3db..da931f930f4 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/roi_pooling.cpp @@ -220,7 +220,7 @@ protected: TEST_P(ROIPoolingCPULayerTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ROIPooling"); + CheckPluginRelatedResults(compiledModel, "ROIPooling"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/roialign.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/roialign.cpp index 60a1e9fa564..bf6d6aa9956 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/roialign.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/roialign.cpp @@ -162,7 +162,7 @@ protected: TEST_P(ROIAlignLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ROIAlign"); + CheckPluginRelatedResults(compiledModel, "ROIAlign"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/scatter_ND_update.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/scatter_ND_update.cpp index 69e96e07471..802f37e45d0 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/scatter_ND_update.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/scatter_ND_update.cpp @@ -117,7 +117,7 @@ protected: TEST_P(ScatterNDUpdateLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ScatterUpdate"); + CheckPluginRelatedResults(compiledModel, "ScatterUpdate"); } const std::vector scatterParams = { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/scatter_elements_update.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/scatter_elements_update.cpp index 56a098d011a..6d6f29578b4 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/scatter_elements_update.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/scatter_elements_update.cpp @@ -122,7 +122,7 @@ protected: TEST_P(ScatterElementsUpdateLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ScatterUpdate"); + CheckPluginRelatedResults(compiledModel, "ScatterUpdate"); } const std::vector axes = { -3, -2, -1, 0, 1, 2 }; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/scatter_update.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/scatter_update.cpp index 90085bfacf9..ed98b2e8fc6 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/scatter_update.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/scatter_update.cpp @@ -82,7 +82,7 @@ protected: TEST_P(ScatterUpdateLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ScatterUpdate"); + CheckPluginRelatedResults(compiledModel, "ScatterUpdate"); } const std::vector scatterParams = { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/shape_ops.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/shape_ops.cpp index d26fd7e781a..e2297f06f3c 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/shape_ops.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/shape_ops.cpp @@ -164,7 +164,7 @@ TEST_P(ShapeOpsCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Reshape"); + CheckPluginRelatedResults(compiledModel, "Reshape"); } namespace reshapeTest { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/shuffle_channels.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/shuffle_channels.cpp index c779572d784..daa0e3ccee0 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/shuffle_channels.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/shuffle_channels.cpp @@ -76,7 +76,7 @@ TEST_P(ShuffleChannelsLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "ShuffleChannels"); + CheckPluginRelatedResults(compiledModel, "ShuffleChannels"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/slice.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/slice.cpp index e3b0e30805e..7535fe0462a 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/slice.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/slice.cpp @@ -82,7 +82,7 @@ TEST_P(Slice8LayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Slice8"); + CheckPluginRelatedResults(compiledModel, "Slice8"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp index 151e6b7af91..e3e72626a17 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp @@ -82,7 +82,7 @@ protected: TEST_P(SoftMaxLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Softmax"); + CheckPluginRelatedResults(compiledModel, "Softmax"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp index df70773eec9..35f2b0ba681 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/space_to_batch.cpp @@ -81,7 +81,7 @@ TEST_P(SpaceToBatchCPULayerTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CPUTestsBase::CheckPluginRelatedResults(executableNetwork, "SpaceToBatch"); + CPUTestsBase::CheckPluginRelatedResults(compiledModel, "SpaceToBatch"); }; namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/space_to_depth.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/space_to_depth.cpp index 4e453b0eed0..9c384de0bdd 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/space_to_depth.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/space_to_depth.cpp @@ -82,7 +82,7 @@ TEST_P(SpaceToDepthLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CPUTestsBase::CheckPluginRelatedResults(executableNetwork, "SpaceToDepth"); + CPUTestsBase::CheckPluginRelatedResults(compiledModel, "SpaceToDepth"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/strided_slice.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/strided_slice.cpp index eb315ff436d..07d2f6d71d1 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/strided_slice.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/strided_slice.cpp @@ -85,7 +85,7 @@ TEST_P(StridedSliceLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "StridedSlice"); + CheckPluginRelatedResults(compiledModel, "StridedSlice"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/tile.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/tile.cpp index 834ddb20f5f..7254981ee4d 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/tile.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/tile.cpp @@ -141,7 +141,7 @@ TEST_P(TileLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Tile"); + CheckPluginRelatedResults(compiledModel, "Tile"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/topk.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/topk.cpp index b3bc99692de..f4a0fdeb73c 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/topk.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/topk.cpp @@ -212,7 +212,7 @@ TEST_P(TopKLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "TopK"); + CheckPluginRelatedResults(compiledModel, "TopK"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp index ca4aa944648..226f1a8f080 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp @@ -85,7 +85,7 @@ TEST_P(TransposeLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Transpose"); + CheckPluginRelatedResults(compiledModel, "Transpose"); } namespace { diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/variadic_split.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/variadic_split.cpp index 36839a660d2..5cf94dd4496 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/variadic_split.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/variadic_split.cpp @@ -78,7 +78,7 @@ TEST_P(VariadicSplitLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "Split"); + CheckPluginRelatedResults(compiledModel, "Split"); } namespace { diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp index 7888ec37075..4a82b0adcf2 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp @@ -114,7 +114,7 @@ TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) { run(); - CheckPluginRelatedResults(executableNetwork, "Convolution"); + CheckPluginRelatedResults(compiledModel, "Convolution"); } namespace { diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp index 3ee56628003..9c7c047e358 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/reshape_fc.cpp @@ -96,7 +96,7 @@ TEST_P(ReshapeFcCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckPluginRelatedResults(executableNetwork, "FullyConnected"); + CheckPluginRelatedResults(compiledModel, "FullyConnected"); } const std::vector reshFcParams = { diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp index 7eb9ce2ce70..8f8aa384c27 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/seq_native_order.cpp @@ -249,8 +249,8 @@ TEST_P(SequenceCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - CheckNumberOfNodesWithType(executableNetwork, "RNNSeq", 1); - CheckNumberOfNodesWithType(executableNetwork, "Transpose", 0); + CheckNumberOfNodesWithType(compiledModel, "RNNSeq", 1); + CheckNumberOfNodesWithType(compiledModel, "Transpose", 0); } const std::vector nodeType = { diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..fbacc45010b --- /dev/null +++ b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/caching_tests.hpp" + +using namespace ov::test::behavior; + +namespace { + static const std::vector precisionsGNA = { + ngraph::element::f32, + // integer weights are not supported by GNA so far + // ngraph::element::u8, + // ngraph::element::i16, + }; + + static const std::vector batchSizesGNA = { + 1, 2 + }; + + INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_GNA, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(CompileModelCacheTestBase::getStandardFunctions()), + ::testing::ValuesIn(precisionsGNA), + ::testing::ValuesIn(batchSizesGNA), + ::testing::Values(CommonTestUtils::DEVICE_GNA)), + CompileModelCacheTestBase::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/caching/caching_tests.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/plugin/caching_tests.cpp similarity index 95% rename from src/tests/functional/plugin/gna/shared_tests_instances/behavior/caching/caching_tests.cpp rename to src/tests/functional/plugin/gna/shared_tests_instances/behavior/plugin/caching_tests.cpp index cfb05b9dcbe..f287aea04aa 100644 --- a/src/tests/functional/plugin/gna/shared_tests_instances/behavior/caching/caching_tests.cpp +++ b/src/tests/functional/plugin/gna/shared_tests_instances/behavior/plugin/caching_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" using namespace LayerTestsDefinitions; diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..94d1aba355d --- /dev/null +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/caching_tests.hpp" + +using namespace ov::test::behavior; + +namespace { + static const std::vector precisionsGPU = { + ngraph::element::f32, + ngraph::element::f16, + ngraph::element::i32, + ngraph::element::i64, + ngraph::element::i8, + ngraph::element::u8, + ngraph::element::i16, + ngraph::element::u16, + }; + + static const std::vector batchSizesGPU = { + 1, 2 + }; + + INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_GPU, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(CompileModelCacheTestBase::getStandardFunctions()), + ::testing::ValuesIn(precisionsGPU), + ::testing::ValuesIn(batchSizesGPU), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + CompileModelCacheTestBase::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/caching/caching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp similarity index 96% rename from src/tests/functional/plugin/gpu/shared_tests_instances/behavior/caching/caching_tests.cpp rename to src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp index 0fa4a0e4921..076f0de87a2 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/caching/caching_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/caching_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" using namespace LayerTestsDefinitions; diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/set_preprocess.cpp similarity index 98% rename from src/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp rename to src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/set_preprocess.cpp index b15a6027800..ff4e2c22c7c 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/set_preprocess.cpp @@ -3,7 +3,7 @@ // #include -#include "behavior/preprocessing/set_preprocess.hpp" +#include "behavior/plugin/set_preprocess.hpp" #ifdef ENABLE_GAPI_PREPROCESSING diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp index 31e9e54a0a3..682f065e616 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp @@ -97,5 +97,7 @@ std::vector disabledTestPatterns() { R"(.*RangeLayerTest.*)", // Issue: 76197 R"(.*registerPluginsXMLUnicodePath.*)", + // Not supported yet + R"(.*CompileModelCacheTestBase.*)", }; } diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..3a28919bad2 --- /dev/null +++ b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/caching_tests.hpp" + +using namespace ov::test::behavior; + +namespace { + static const std::vector nightly_precisionsMyriad = { + ngraph::element::f32, + ngraph::element::f16, + ngraph::element::i32, + ngraph::element::i8, + ngraph::element::u8, + }; + + static const std::vector smoke_precisionsMyriad = { + ngraph::element::f32, + }; + + static const std::vector batchSizesMyriad = { + 1, 2 + }; + + static std::vector smoke_functions() { + auto funcs = CompileModelCacheTestBase::getStandardFunctions(); + if (funcs.size() > 1) { + funcs.erase(funcs.begin() + 1, funcs.end()); + } + return funcs; + } + + INSTANTIATE_TEST_SUITE_P(smoke_CachingSupportCase_Myriad, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(smoke_functions()), + ::testing::ValuesIn(smoke_precisionsMyriad), + ::testing::ValuesIn(batchSizesMyriad), + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)), + CompileModelCacheTestBase::getTestCaseName); + + INSTANTIATE_TEST_SUITE_P(nightly_CachingSupportCase_Myriad, CompileModelCacheTestBase, + ::testing::Combine( + ::testing::ValuesIn(CompileModelCacheTestBase::getStandardFunctions()), + ::testing::ValuesIn(nightly_precisionsMyriad), + ::testing::ValuesIn(batchSizesMyriad), + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD)), + CompileModelCacheTestBase::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/caching/caching_tests.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/caching_tests.cpp similarity index 97% rename from src/tests/functional/plugin/myriad/shared_tests_instances/behavior/caching/caching_tests.cpp rename to src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/caching_tests.cpp index 2dbfc4aac48..a818f3c6207 100644 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/caching/caching_tests.cpp +++ b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/caching_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" using namespace LayerTestsDefinitions; diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/set_preprocess.cpp similarity index 98% rename from src/tests/functional/plugin/myriad/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp rename to src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/set_preprocess.cpp index 4726b2d01b7..f3130d04e5b 100644 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp +++ b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/set_preprocess.cpp @@ -3,7 +3,7 @@ // #include -#include "behavior/preprocessing/set_preprocess.hpp" +#include "behavior/plugin/set_preprocess.hpp" #ifdef ENABLE_GAPI_PREPROCESSING diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp new file mode 100644 index 00000000000..d60a23a9709 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/caching_tests.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph/function.hpp" + +#include +#include + +namespace ov { +namespace test { +namespace behavior { + +using ovModelGenerator = std::function(ov::element::Type, std::size_t)>; +using ovModelWithName = std::tuple; + +using compileModelCacheParams = std::tuple< + ovModelWithName, // openvino model with friendly name + ov::element::Type, // element type + size_t, // batch size + std::string // device name +>; + +class CompileModelCacheTestBase : public testing::WithParamInterface, + virtual public SubgraphBaseTest { + std::string m_cacheFolderName; + std::string m_functionName; + ov::element::Type m_precision; + size_t m_batchSize; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + + void SetUp() override; + void TearDown() override; + void run() override; + + bool importExportSupported(ov::Core &core) const; + // Default functions and precisions that can be used as test parameters + static std::vector getStandardFunctions(); +}; + +} // namespace behavior +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/behavior/caching/caching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/behavior/caching/caching_tests.hpp rename to src/tests/functional/plugin/shared/include/behavior/plugin/caching_tests.hpp diff --git a/src/tests/functional/plugin/shared/include/behavior/preprocessing/preprocessing.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/preprocessing.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/behavior/preprocessing/preprocessing.hpp rename to src/tests/functional/plugin/shared/include/behavior/plugin/preprocessing.hpp diff --git a/src/tests/functional/plugin/shared/include/behavior/preprocessing/set_preprocess.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/set_preprocess.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/behavior/preprocessing/set_preprocess.hpp rename to src/tests/functional/plugin/shared/include/behavior/plugin/set_preprocess.hpp diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp new file mode 100644 index 00000000000..5a1bab8ea00 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/caching_tests.cpp @@ -0,0 +1,215 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include + +#include "behavior/ov_plugin/caching_tests.hpp" + +#include "common_test_utils/file_utils.hpp" +#include "functional_test_utils/skip_tests_config.hpp" + +#include "ngraph_functions/builders.hpp" +#include "ngraph_functions/subgraph_builders.hpp" + +#define GTEST_COUT std::cout << "[ ] [ INFO ] " + +namespace ov { +namespace test { +namespace behavior { + +static std::shared_ptr simple_function_multiply(ov::element::Type type, size_t batchSize) { + // Create Parameter operation with static shape + auto data = std::make_shared(type, ov::Shape{batchSize, 2}); + data->set_friendly_name("Parameter"); + + auto constant = ov::op::v0::Constant::create(type, ov::Shape{1}, {2}); + constant->set_friendly_name("constant"); + auto mul = std::make_shared(data, constant); + mul->set_friendly_name("mul"); + + // Create Result operation + auto res = std::make_shared(mul); + res->set_friendly_name("res"); + + // Create nGraph function + auto func = std::make_shared(ngraph::ResultVector{res}, ngraph::ParameterVector{data}); + func->set_friendly_name("function"); + return func; +} + +static std::shared_ptr simple_function_relu(ov::element::Type type, size_t batchSize) { + // Create Parameter operation with static shape + auto data = std::make_shared(type, ov::Shape{batchSize, 2}); + data->set_friendly_name("Parameter"); + + auto relu = std::make_shared(data); + relu->set_friendly_name("relu"); + + // Create Result operation + auto res = std::make_shared(relu); + res->set_friendly_name("res"); + + // Create nGraph function + auto func = std::make_shared(ov::ResultVector{res}, ov::ParameterVector{data}); + func->set_friendly_name("function"); + return func; +} + +std::vector CompileModelCacheTestBase::getStandardFunctions() { + // Wrapper of most part of available builder functions + using ovModelIS = std::function(std::vector inputShape, + ov::element::Type_t type)>; + auto inputShapeWrapper = [](ovModelIS fun, std::vector inputShape) { + return [fun, inputShape](ngraph::element::Type type, std::size_t batchSize) { + auto shape = inputShape; + shape[0] = batchSize; + return fun(shape, type); + }; + }; + + std::vector res; + res.push_back(ovModelWithName { simple_function_multiply, "SimpleFunctionMultiply"}); + res.push_back(ovModelWithName { simple_function_relu, "SimpleFunctionRelu"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeConvPoolRelu, {1, 1, 32, 32}), + "ConvPoolRelu"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeSplitConvConcat, {1, 4, 20, 20}), + "SplitConvConcat"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeKSOFunction, {1, 4, 20, 20}), + "KSOFunction"}); + res.push_back(ovModelWithName { [](ngraph::element::Type type, size_t batchSize) { + return ngraph::builder::subgraph::makeTIwithLSTMcell(type, batchSize); + }, "TIwithLSTMcell1"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeSingleConv, {1, 3, 24, 24}), + "SingleConv"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::make2InputSubtract, {1, 3, 24, 24}), + "2InputSubtract"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeNestedSplitConvConcat, {1, 4, 20, 20}), + "NestedSplitConvConcat"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeSplitConvConcatInputInBranch, {1, 4, 20, 20}), + "SplitConvConcatInputInBranch"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeSplitConvConcatNestedInBranch, {1, 4, 20, 20}), + "SplitConvConcatNestedInBranch"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeSplitConvConcatNestedInBranchNestedOut, {1, 4, 20, 20}), + "SplitConvConcatNestedInBranchNestedOut"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeConvBias, {1, 3, 24, 24}), + "ConvBias"}); + res.push_back(ovModelWithName { + inputShapeWrapper(ngraph::builder::subgraph::makeReadConcatSplitAssign, {1, 1, 2, 4}), + "ReadConcatSplitAssign"}); + res.push_back(ovModelWithName{ + inputShapeWrapper(ngraph::builder::subgraph::makeMatMulBias, {1, 3, 24, 24}), + "MatMulBias" }); + + return res; +} + +bool CompileModelCacheTestBase::importExportSupported(ov::Core& core) const { + auto supportedProperties = core.get_property(targetDevice, ov::supported_properties); + if (std::find(supportedProperties.begin(), supportedProperties.end(), ov::device::capabilities) == supportedProperties.end()) { + return false; + } + auto device_capabilities = core.get_property(targetDevice, ov::device::capabilities); + if (std::find(device_capabilities.begin(), device_capabilities.end(), std::string(ov::device::capability::EXPORT_IMPORT)) == device_capabilities.end()) { + return false; + } + return true; +} + +std::string CompileModelCacheTestBase::getTestCaseName(testing::TestParamInfo obj) { + auto param = obj.param; + auto funcName = std::get<1>(std::get<0>(param)); + auto precision = std::get<1>(param); + auto batchSize = std::get<2>(param); + auto deviceName = std::get<3>(param); + return funcName + "_" + ngraph::element::Type(precision).get_type_name() + "_batch" + std::to_string(batchSize) + "_" + deviceName; +} + +void CompileModelCacheTestBase::SetUp() { + ovModelWithName funcPair; + std::tie(funcPair, m_precision, m_batchSize, targetDevice) = GetParam(); + auto fGen = std::get<0>(funcPair); + m_functionName = std::get<1>(funcPair); + try { + function = fGen(m_precision, m_batchSize); + } catch (...) { + GTEST_SKIP(); + } + + std::stringstream ss; + auto hash = std::hash()(GetTestName()); + ss << "testCache_" << std::to_string(hash) << "_" << std::this_thread::get_id() << "_" << GetTimestamp(); + m_cacheFolderName = ss.str(); + core->set_property(ov::cache_dir()); +} + +void CompileModelCacheTestBase::TearDown() { + CommonTestUtils::removeFilesWithExt(m_cacheFolderName, "blob"); + std::remove(m_cacheFolderName.c_str()); + core->set_property(ov::cache_dir()); +} + +void CompileModelCacheTestBase::run() { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + if (!function) { + GTEST_COUT << "Can't create function " << m_functionName << " with precision " << m_precision.get_type_name() << std::endl; + GTEST_SKIP(); + } else { + std::vector inShapes; + for (const auto& param : function->get_parameters()) { + inShapes.push_back(param->get_shape()); + } + init_input_shapes(static_shapes_to_test_representation(inShapes)); + } + if (!importExportSupported(*core)) { + GTEST_COUT << "Plugin doesn't support import and export - skipping test" << std::endl; + GTEST_SKIP(); + } + configure_model(); + try { + compiledModel = core->compile_model(function, targetDevice, configuration); + generate_inputs(targetStaticShapes.front()); + infer(); + } catch (const Exception &ex) { + GTEST_COUT << "Can't loadNetwork without cache for " << m_functionName << " with precision " << m_precision.get_type_name() << std::endl; + GTEST_COUT << "Exception [" << ex.what() << "]" << std::endl; + GTEST_SKIP(); + } catch (...) { + GTEST_COUT << "Can't compile network without cache for " << m_functionName << " with precision " << m_precision.get_type_name() << std::endl; + GTEST_SKIP(); // skip caching test if such network is not supported by device at all + } + auto originalOutputs = get_plugin_outputs(); + + for (int i = 0; i < 2; i++) { + // Step 2: Load with cache. Export or import shall not throw + compiledModel = {}; // Destroy network object + { + core->set_property(ov::cache_dir(m_cacheFolderName)); + ASSERT_NO_THROW(compiledModel = core->compile_model(function, targetDevice, configuration)); + generate_inputs(targetStaticShapes.front()); + ASSERT_NO_THROW(infer()); + } + // cache is created and reused + ASSERT_EQ(CommonTestUtils::listFilesWithExt(m_cacheFolderName, "blob").size(), 1); + compare(originalOutputs, get_plugin_outputs()); + } +} + +TEST_P(CompileModelCacheTestBase, CompareWithRefImpl) { + run(); +} + +} // namespace behavior +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/src/behavior/caching/caching_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/plugin/caching_tests.cpp similarity index 99% rename from src/tests/functional/plugin/shared/src/behavior/caching/caching_tests.cpp rename to src/tests/functional/plugin/shared/src/behavior/plugin/caching_tests.cpp index 7dc8b61217b..bb239adb779 100644 --- a/src/tests/functional/plugin/shared/src/behavior/caching/caching_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/plugin/caching_tests.cpp @@ -6,7 +6,7 @@ #include #include -#include "behavior/caching/caching_tests.hpp" +#include "behavior/plugin/caching_tests.hpp" #include "common_test_utils/file_utils.hpp" #include "ngraph_functions/builders.hpp" #include "ngraph_functions/subgraph_builders.hpp" diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index 6d703a16058..4739fa1045f 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -55,7 +55,7 @@ protected: std::vector> targetStaticShapes; ElementType inType = ov::element::undefined, outType = ov::element::undefined; - ov::CompiledModel executableNetwork; + ov::CompiledModel compiledModel; ov::InferRequest inferRequest; constexpr static const double disable_threshold = std::numeric_limits::max(); diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 9d590ec3863..6db97b61d6b 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -180,7 +180,7 @@ void SubgraphBaseTest::compile_model() { if (functionRefs == nullptr) { functionRefs = ov::clone_model(*function); } - executableNetwork = core->compile_model(function, targetDevice, configuration); + compiledModel = core->compile_model(function, targetDevice, configuration); } void SubgraphBaseTest::init_ref_function(std::shared_ptr &funcRef, const std::vector& targetInputStaticShapes) { @@ -216,7 +216,7 @@ void SubgraphBaseTest::generate_inputs(const std::vector& targetInput } void SubgraphBaseTest::infer() { - inferRequest = executableNetwork.create_infer_request(); + inferRequest = compiledModel.create_infer_request(); for (const auto& input : inputs) { inferRequest.set_tensor(input.first, input.second); } From 09379dca86e33ec57bb7883dd80b75bcf84a81fb Mon Sep 17 00:00:00 2001 From: Yuan Hu Date: Mon, 21 Feb 2022 23:06:51 +0800 Subject: [PATCH 034/310] [AUTOPLUGIN] add device priority if set ov::device::priorities (#10296) * support config key device priority for example: if AUTO:CPU,GPU the priority of CPU will be higher than GPU Signed-off-by: Hu, Yuan2 * add test and fix compile and test error Signed-off-by: Hu, Yuan2 * add an info for device priority and add lost [AUTOPLUGIN] on log Signed-off-by: Hu, Yuan2 * parseMetaDevice return all DEVICE of GPU, when use AUTO:GPU Signed-off-by: Hu, Yuan2 * fix compile issue Signed-off-by: Hu, Yuan2 * modify test and add test case, fix code issue Signed-off-by: Hu, Yuan2 * fix a bug and mutli with HETERO test failed Signed-off-by: Hu, Yuan2 * fix mock test faild issue Signed-off-by: Hu, Yuan2 * fix misprint Signed-off-by: Hu, Yuan2 * Disable AUTO:MYRIAD case MYRIAD/CoreThreadingTests.smoke_QueryNetwork/targetDevice=MULTI_config=MULTI_DEVICE_PRIORITIES:MYRIAD_ faild on windows the error is myriadFuncTests-0 INFO: [E:] [BSL] found 0 ioexpander device Signed-off-by: Hu, Yuan2 * use ov::device::priorities key in this PR Signed-off-by: Hu, Yuan2 * fix a logic bug in key_network_priority after enable device priority add test case cover it Signed-off-by: Hu, Yuan2 --- src/plugins/auto/executable_network.hpp | 1 + src/plugins/auto/plugin.cpp | 78 +++-- .../unit/auto/key_network_priority_test.cpp | 284 ++++++++++++------ .../unit/auto/parse_meta_device_test.cpp | 137 ++++++--- src/tests/unit/auto/select_device_test.cpp | 103 ++++++- 5 files changed, 444 insertions(+), 159 deletions(-) diff --git a/src/plugins/auto/executable_network.hpp b/src/plugins/auto/executable_network.hpp index 449e73b4521..8bac3345b5e 100644 --- a/src/plugins/auto/executable_network.hpp +++ b/src/plugins/auto/executable_network.hpp @@ -41,6 +41,7 @@ struct DeviceInformation { int numRequestsPerDevices; std::string defaultDeviceID; DeviceName uniqueName; + unsigned int devicePriority; }; struct AutoContext { diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp index f10064c07ee..031eb94527d 100644 --- a/src/plugins/auto/plugin.cpp +++ b/src/plugins/auto/plugin.cpp @@ -115,6 +115,10 @@ std::vector MultiDeviceInferencePlugin::ParseMetaDevices(cons return ""; }; + unsigned int devicePriority = 0; + auto prioritiesIter = config.find(ov::device::priorities.name()); + bool enableDevicePriority = (prioritiesIter != config.end()); + auto deviceList = GetCore()->GetAvailableDevices(); for (auto && d : devicesWithRequests) { auto openingBracket = d.find_first_of('('); auto closingBracket = d.find_first_of(')', openingBracket); @@ -130,33 +134,60 @@ std::vector MultiDeviceInferencePlugin::ParseMetaDevices(cons } } - std::string defaultDeviceID = ""; DeviceIDParser parsed{deviceName}; std::string deviceid = parsed.getDeviceID(); - if (deviceid.empty()) { - defaultDeviceID = getDefaultDeviceID(deviceName); - deviceid = defaultDeviceID; - } - - std::string fullDeviceName = ""; - std::string uniqueName = ""; - if (parsed.getDeviceName() == "GPU") { - auto supportedMetrics = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_METRICS)).as>(); - if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) { - fullDeviceName = GetCore()->GetMetric(deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as(); + std::vector sameTypeDevices; + // if AUTO:GPU case, replace GPU with GPU.0 and GPU.1 + // Disable AUTO:MYRIAD here because of below test case + // MYRIAD/CoreThreadingTests.smoke_QueryNetwork/targetDevice=MULTI_config=MULTI_DEVICE_PRIORITIES:MYRIAD_ + // faild on windows + // the error is + // myriadFuncTests-0 INFO: [E:] [BSL] found 0 ioexpander device + if (deviceid.empty() && deviceName.find("MYRIAD") == std::string::npos) { + for (auto&& device : deviceList) { + if (device.find(deviceName) != std::string::npos) { + sameTypeDevices.push_back(std::move(device)); + } } } - - if (fullDeviceName.empty()) { - uniqueName = parsed.getDeviceName() + "_" + deviceid; - } else { - uniqueName = fullDeviceName + "_" + deviceid; + // it's a virtual device like HETERO, TEMPLATE + // or real device with ID like GPU.1 + if (sameTypeDevices.size() == 0) { + sameTypeDevices.push_back(std::move(deviceName)); } - LOG_DEBUG("deviceName:%s, defaultDeviceID:%s, uniqueName:%s", - deviceName.c_str(), defaultDeviceID.c_str(), uniqueName.c_str()); - // create meta device - metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID, uniqueName}); + for (auto&& deviceNameWithID : sameTypeDevices) { + DeviceIDParser newParsed{deviceNameWithID}; + std::string defaultDeviceID = ""; + if (newParsed.getDeviceID().empty()) { + defaultDeviceID = getDefaultDeviceID(deviceNameWithID); + } else { + defaultDeviceID = newParsed.getDeviceID(); + } + + std::string fullDeviceName = ""; + std::string uniqueName = ""; + if (newParsed.getDeviceName() == "GPU") { + auto supportedMetrics = GetCore()->GetMetric(deviceNameWithID, METRIC_KEY(SUPPORTED_METRICS)).as>(); + if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) { + fullDeviceName = GetCore()->GetMetric(deviceNameWithID, METRIC_KEY(FULL_DEVICE_NAME)).as(); + } + } + + if (fullDeviceName.empty()) { + uniqueName = newParsed.getDeviceName() + "_" + defaultDeviceID; + } else { + uniqueName = fullDeviceName + "_" + defaultDeviceID; + } + + LOG_DEBUG("[AUTOPLUGIN]:deviceNameWithID:%s, defaultDeviceID:%s, uniqueName:%s", + deviceNameWithID.c_str(), defaultDeviceID.c_str(), uniqueName.c_str()); + // create meta device + metaDevices.push_back({deviceNameWithID, getDeviceConfig(deviceNameWithID), numRequests, defaultDeviceID, uniqueName, devicePriority}); + } + if (enableDevicePriority) { + devicePriority++; + } } return metaDevices; @@ -312,6 +343,7 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons iter->config = deviceConfig; strDevices += iter->deviceName; strDevices += ((iter + 1) == supportDevices.end()) ? "" : ","; + LOG_INFO("[AUTOPLUGIN]:device:%s, priority:%ld", iter->deviceName.c_str(), iter->devicePriority); } return std::make_shared(modelPath, network, supportDevices, strDevices, this, context, context.needPerfCounters); } @@ -499,6 +531,10 @@ DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector; using namespace MockMultiDevice; -using PriorityParams = std::tuple; //{priority, deviceUniquName} +using PriorityParams = std::tuple; //{modelpriority, deviceUniquName} using ConfigParams = std::tuple< - std::string, // netPrecision - std::vector // {{priority, expect device uniqueName}} + std::string, // netPrecision + bool, // enable device priority + std::vector // {{modelpriority, expect device uniqueName}} >; class KeyNetworkPriorityTest : public ::testing::TestWithParam { public: @@ -49,9 +50,15 @@ public: public: static std::string getTestCaseName(testing::TestParamInfo obj) { std::string netPrecision; + bool enableDevicePriority; std::vector PriorityConfigs; - std::tie(netPrecision, PriorityConfigs) = obj.param; + std::tie(netPrecision, enableDevicePriority, PriorityConfigs) = obj.param; std::ostringstream result; + if (enableDevicePriority) { + result << "_enableDevicePriority_true"; + } else { + result << "_enableDevicePriority_false"; + } for (auto& item : PriorityConfigs) { result << "_priority_" << std::get<0>(item); result << "_return_" << std::get<1>(item); @@ -73,11 +80,7 @@ public: plugin = std::shared_ptr(origin_plugin); // replace core with mock Icore plugin->SetCore(core); - metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01"}, - {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01"}, - {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01"}, - {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01" }, - {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01"}}; + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, cpuCability, {"FP32", "FP16", "INT8", "BIN"}); IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, gpuCability, {"FP32", "FP16", "BATCHED_BLOB", "BIN"}); IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, myriadCability, {"FP16"}); @@ -99,11 +102,27 @@ public: TEST_P(KeyNetworkPriorityTest, SelectDevice) { // get Parameter + std::string netPrecision; + bool enableDevicePriority; std::vector PriorityConfigs; - std::tie(netPrecision, PriorityConfigs) = this->GetParam(); + std::tie(netPrecision, enableDevicePriority, PriorityConfigs) = this->GetParam(); std::vector resDevInfo; + if (enableDevicePriority) { + metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 1}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01", 2}, + {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01", 3}, + {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 4}}; + } else { + metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 0}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01", 0}, + {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01", 0}, + {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 0}}; + } + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size()); EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4)); @@ -119,11 +138,26 @@ TEST_P(KeyNetworkPriorityTest, SelectDevice) { TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) { // get Parameter std::string netPrecision; + bool enableDevicePriority; std::vector PriorityConfigs; - std::tie(netPrecision, PriorityConfigs) = this->GetParam(); + std::tie(netPrecision, enableDevicePriority, PriorityConfigs) = this->GetParam(); std::vector resDevInfo; std::vector> futureVect; + if (enableDevicePriority) { + metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 1}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01", 2}, + {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01", 3}, + {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 4}}; + } else { + metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 0}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01", 0}, + {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01", 0}, + {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 0}}; + } + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size() * 2); EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4 * 2)); // selectdevice in multi threads, and UnregisterPriority them all, should not affect the @@ -153,86 +187,164 @@ TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) { // ConfigParams details // example -// ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"}, +// ConfigParams {"FP32", false, {PriorityParams {0, "dGPU_01"}, // PriorityParams {1, "iGPU_01"}, // PriorityParams {2, "MYRIAD_01"}, // PriorityParams {2, "MYRIAD_01"}}}, -// {netPrecision, PriorityParamsVector{{priority, expect device uniqueName}}} +// {netPrecision, enableDevicePriority, PriorityParamsVector{{modelpriority, expect device uniqueName}}} const std::vector testConfigs = { - ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"}, - PriorityParams {1, "iGPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {2, "CPU_01"}}}, - ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"}, - PriorityParams {3, "iGPU_01"}, - PriorityParams {4, "CPU_01"}, - PriorityParams {5, "MYRIAD_01"}}}, - ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"}, - PriorityParams {0, "dGPU_01"}, - PriorityParams {2, "iGPU_01"}, - PriorityParams {2, "iGPU_01"}}}, - ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"}, - PriorityParams {0, "dGPU_01"}, - PriorityParams {2, "iGPU_01"}, - PriorityParams {3, "CPU_01"}}}, - ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"}, - PriorityParams {1, "iGPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "MYRIAD_01"}, - PriorityParams {0, "dGPU_01"}, - PriorityParams {1, "iGPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "MYRIAD_01"}}}, - ConfigParams {"INT8", {PriorityParams {0, "VPUX_01"}, - PriorityParams {1, "CPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {2, "CPU_01"}}}, - ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"}, - PriorityParams {3, "CPU_01"}, - PriorityParams {4, "CPU_01"}, - PriorityParams {5, "CPU_01"}}}, - ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"}, - PriorityParams {0, "VPUX_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {2, "CPU_01"}}}, - ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"}, - PriorityParams {0, "VPUX_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "CPU_01"}}}, - ConfigParams {"INT8", {PriorityParams {0, "VPUX_01"}, - PriorityParams {1, "CPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "CPU_01"}, - PriorityParams {0, "VPUX_01"}, - PriorityParams {1, "CPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "CPU_01"}}}, - ConfigParams {"BIN", {PriorityParams {0, "dGPU_01"}, - PriorityParams {1, "iGPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {2, "CPU_01"}}}, - ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"}, - PriorityParams {3, "iGPU_01"}, - PriorityParams {4, "CPU_01"}, - PriorityParams {5, "CPU_01"}}}, - ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"}, - PriorityParams {0, "dGPU_01"}, - PriorityParams {2, "iGPU_01"}, - PriorityParams {2, "iGPU_01"}}}, - ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"}, - PriorityParams {0, "dGPU_01"}, - PriorityParams {2, "iGPU_01"}, - PriorityParams {3, "CPU_01"}}}, - ConfigParams {"BIN", {PriorityParams {0, "dGPU_01"}, - PriorityParams {1, "iGPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "CPU_01"}, - PriorityParams {0, "dGPU_01"}, - PriorityParams {1, "iGPU_01"}, - PriorityParams {2, "CPU_01"}, - PriorityParams {3, "CPU_01"}}} - }; + ConfigParams {"FP32", false, {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"FP32", false, {PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "iGPU_01"}, + PriorityParams {4, "CPU_01"}, + PriorityParams {5, "MYRIAD_01"}}}, + ConfigParams {"FP32", false, {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {2, "iGPU_01"}}}, + ConfigParams {"FP32", false, {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"FP32", false, {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "MYRIAD_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "MYRIAD_01"}}}, + ConfigParams {"INT8", false, {PriorityParams {0, "VPUX_01"}, + PriorityParams {1, "CPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"INT8", false, {PriorityParams {2, "VPUX_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {4, "CPU_01"}, + PriorityParams {5, "CPU_01"}}}, + ConfigParams {"INT8", false, {PriorityParams {2, "VPUX_01"}, + PriorityParams {0, "VPUX_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"INT8", false, {PriorityParams {2, "VPUX_01"}, + PriorityParams {0, "VPUX_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"INT8", false, {PriorityParams {0, "VPUX_01"}, + PriorityParams {1, "CPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {0, "VPUX_01"}, + PriorityParams {1, "CPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"BIN", false, {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"BIN", false, {PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "iGPU_01"}, + PriorityParams {4, "CPU_01"}, + PriorityParams {5, "CPU_01"}}}, + ConfigParams {"BIN", false, {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {2, "iGPU_01"}}}, + ConfigParams {"BIN", false, {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"BIN", false, {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}}}, + // metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01", 0}, + // {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01", 1}, + // {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01", 2}, + // {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01", 3}, + // {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01", 4}}; + // cpu > igpu > dgpu > MYRIAD > VPUX + ConfigParams {"FP32", true, {PriorityParams {0, "CPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "dGPU_01"}, + PriorityParams {2, "dGPU_01"}}}, + ConfigParams {"FP32", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {3, "iGPU_01"}, + PriorityParams {4, "dGPU_01"}, + PriorityParams {5, "MYRIAD_01"}}}, + ConfigParams {"FP32", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {2, "iGPU_01"}}}, + ConfigParams {"FP32", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {3, "dGPU_01"}}}, + ConfigParams {"FP32", true, {PriorityParams {0, "CPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "MYRIAD_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "MYRIAD_01"}}}, + ConfigParams {"INT8", true, {PriorityParams {0, "CPU_01"}, + PriorityParams {1, "VPUX_01"}, + PriorityParams {2, "VPUX_01"}, + PriorityParams {2, "VPUX_01"}}}, + ConfigParams {"INT8", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {3, "VPUX_01"}, + PriorityParams {4, "VPUX_01"}, + PriorityParams {5, "VPUX_01"}}}, + ConfigParams {"INT8", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {2, "VPUX_01"}, + PriorityParams {2, "VPUX_01"}}}, + ConfigParams {"INT8", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {2, "VPUX_01"}, + PriorityParams {3, "VPUX_01"}}}, + ConfigParams {"INT8", true, {PriorityParams {0, "CPU_01"}, + PriorityParams {1, "VPUX_01"}, + PriorityParams {2, "VPUX_01"}, + PriorityParams {3, "VPUX_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {1, "VPUX_01"}, + PriorityParams {2, "VPUX_01"}, + PriorityParams {3, "VPUX_01"}}}, + ConfigParams {"BIN", true, {PriorityParams {0, "CPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "dGPU_01"}, + PriorityParams {2, "dGPU_01"}}}, + ConfigParams {"BIN", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {3, "iGPU_01"}, + PriorityParams {4, "dGPU_01"}, + PriorityParams {5, "dGPU_01"}}}, + ConfigParams {"BIN", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {2, "iGPU_01"}}}, + ConfigParams {"BIN", true, {PriorityParams {2, "CPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {3, "dGPU_01"}}}, + ConfigParams {"BIN", true, {PriorityParams {0, "CPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "dGPU_01"}, + PriorityParams {0, "CPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "dGPU_01"}}} +}; INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, KeyNetworkPriorityTest, diff --git a/src/tests/unit/auto/parse_meta_device_test.cpp b/src/tests/unit/auto/parse_meta_device_test.cpp index e4d5631127b..e96d514ecb9 100644 --- a/src/tests/unit/auto/parse_meta_device_test.cpp +++ b/src/tests/unit/auto/parse_meta_device_test.cpp @@ -37,11 +37,13 @@ using ::testing::InvokeWithoutArgs; using Config = std::map; using namespace MockMultiDevice; -const char cpuFullDeviceName[] = "Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz"; +// const char cpuFullDeviceName[] = "Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz"; const char igpuFullDeviceName[] = "Intel(R) Gen9 HD Graphics (iGPU)"; -// const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)"; -const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU"; -const char vpuxFullDeviceName[] = ""; +const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)"; +// const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU"; +// const char vpuxFullDeviceName[] = ""; +const std::vector availableDevs = {"CPU", "GPU.0", "GPU.1", + "MYRIAD.9.2-ma2480", "MYRIAD.9.1-ma2480", "VPUX"}; using ConfigParams = std::tuple< std::string, // Priority devices std::vector, // expect metaDevices @@ -85,22 +87,15 @@ public: ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _)) .WillByDefault(RETURN_MOCK_VALUE(metrics)); - ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_CPU), - StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(cpuFullDeviceName)); - ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_GPU), + ON_CALL(*core, GetMetric(StrEq("GPU.0"), StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(igpuFullDeviceName)); - ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_MYRIAD), - StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(myriadFullDeviceName)); - ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_KEEMBAY), - StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(vpuxFullDeviceName)); - IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, otherConfigKeys, {CONFIG_KEY(DEVICE_ID)}); - IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, cpuConfigKeys, {}); - ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_CPU), - StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuConfigKeys)); - ON_CALL(*core, GetMetric(Not(HasSubstr(CommonTestUtils::DEVICE_CPU)), - StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(RETURN_MOCK_VALUE(otherConfigKeys)); - ON_CALL(*core, GetConfig(_, StrEq(CONFIG_KEY(DEVICE_ID)))) - .WillByDefault(InvokeWithoutArgs([](){return "01";})); + ON_CALL(*core, GetMetric(StrEq("GPU.1"), + StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(dgpuFullDeviceName)); + IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, configKeys, {}); + ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)) + .WillByDefault(RETURN_MOCK_VALUE(configKeys)); + + ON_CALL(*core, GetAvailableDevices()).WillByDefault(Return(availableDevs)); ON_CALL(*plugin, ParseMetaDevices).WillByDefault([this](const std::string& priorityDevices, const std::map& config) { @@ -119,9 +114,18 @@ public: } } } + + void compareDevicePriority(std::vector& result, std::vector& expect) { + EXPECT_EQ(result.size(), expect.size()); + if (result.size() == expect.size()) { + for (unsigned int i = 0 ; i < result.size(); i++) { + EXPECT_EQ(result[i].devicePriority, expect[i].devicePriority); + } + } + } }; -TEST_P(ParseMetaDeviceTest, ParseMetaDevices) { +TEST_P(ParseMetaDeviceTest, ParseMetaDevicesWithPriority) { // get Parameter std::string priorityDevices; std::vector metaDevices; @@ -131,11 +135,37 @@ TEST_P(ParseMetaDeviceTest, ParseMetaDevices) { EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1); EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber()); EXPECT_CALL(*core, GetConfig(_, _)).Times(AnyNumber()); + EXPECT_CALL(*core, GetAvailableDevices()).Times(1); + EXPECT_CALL(*core, GetSupportedConfig(_, _)).Times(metaDevices.size()); if (throwException) { ASSERT_ANY_THROW(plugin->ParseMetaDevices(priorityDevices, {})); } else { - auto result = plugin->ParseMetaDevices(priorityDevices, {}); + auto result = plugin->ParseMetaDevices(priorityDevices, {{ov::device::priorities.name(), priorityDevices}}); compare(result, metaDevices); + compareDevicePriority(result, metaDevices); + } +} + +TEST_P(ParseMetaDeviceTest, ParseMetaDevicesNotWithPriority) { + // get Parameter + std::string priorityDevices; + std::vector metaDevices; + bool throwException; + std::tie(priorityDevices, metaDevices, throwException) = this->GetParam(); + + EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1); + EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber()); + EXPECT_CALL(*core, GetConfig(_, _)).Times(AnyNumber()); + EXPECT_CALL(*core, GetAvailableDevices()).Times(1); + EXPECT_CALL(*core, GetSupportedConfig(_, _)).Times(metaDevices.size()); + if (throwException) { + ASSERT_ANY_THROW(plugin->ParseMetaDevices(priorityDevices, {})); + } else { + auto result = plugin->ParseMetaDevices(priorityDevices, {{}}); + compare(result, metaDevices); + for (unsigned int i = 0 ; i < result.size(); i++) { + EXPECT_EQ(result[i].devicePriority, 0); + } } } @@ -144,24 +174,59 @@ TEST_P(ParseMetaDeviceTest, ParseMetaDevices) { // ConfigParams {devicePriority, expect metaDevices, ifThrowException} const std::vector testConfigs = { + // ConfigParams {"CPU,GPU,MYRIAD,VPUX", + // {{"CPU", {}, -1, "", "CPU_", 0}, + // {"GPU.0", {}, -1, "0", std::string(igpuFullDeviceName) + "_0", 1}, + // {"GPU.1", {}, -1, "1", std::string(dgpuFullDeviceName) + "_1", 1}, + // {"MYRIAD.9.2-ma2480", {}, -1, "9.2-ma2480", "MYRIAD_9.2-ma2480", 2}, + // {"MYRIAD.9.1-ma2480", {}, -1, "9.1-ma2480", "MYRIAD_9.1-ma2480", 2}, + // {"VPUX", {}, -1, "", "VPUX_", 3}}, false}, + // ConfigParams {"VPUX,MYRIAD,GPU,CPU", + // {{"VPUX", {}, -1, "", "VPUX_", 0}, + // {"MYRIAD.9.2-ma2480", {}, -1, "9.2-ma2480", "MYRIAD_9.2-ma2480", 1}, + // {"MYRIAD.9.1-ma2480", {}, -1, "9.1-ma2480", "MYRIAD_9.1-ma2480", 1}, + // {"GPU.0", {}, -1, "0", std::string(igpuFullDeviceName) + "_0", 2}, + // {"GPU.1", {}, -1, "1", std::string(dgpuFullDeviceName) + "_1", 2}, + // {"CPU", {}, -1, "", "CPU_", 3}}, false}, + // ConfigParams {"CPU(1),GPU(2),MYRIAD(3),VPUX(4)", + // {{"CPU", {}, 1, "", "CPU_", 0}, + // {"GPU.0", {}, 2, "0", std::string(igpuFullDeviceName) + "_0", 1}, + // {"GPU.1", {}, 2, "1", std::string(dgpuFullDeviceName) + "_1", 1}, + // {"MYRIAD.9.2-ma2480", {}, 3, "9.2-ma2480", "MYRIAD_9.2-ma2480", 2}, + // {"MYRIAD.9.1-ma2480", {}, 3, "9.1-ma2480", "MYRIAD_9.1-ma2480", 2}, + // {"VPUX", {}, 4, "", "VPUX_", 3}}, false}, + // ConfigParams {"CPU,GPU,MYRIAD,VPUX", - {{"CPU", {}, -1, "", "CPU_"}, - {"GPU", {}, -1, "01", std::string(igpuFullDeviceName) + "_01"}, - {"MYRIAD", {}, -1, "01", "MYRIAD_01"}, - {"VPUX", {}, -1, "01", "VPUX_01"}}, false}, - ConfigParams {"CPU(1),GPU(2),MYRIAD(3),VPUX(4)", - {{"CPU", {}, 1, "", "CPU_"}, - {"GPU", {}, 2, "01", std::string(igpuFullDeviceName) + "_01"}, - {"MYRIAD", {}, 3, "01", "MYRIAD_01"}, - {"VPUX", {}, 4, "01", "VPUX_01"}}, false}, + {{"CPU", {}, -1, "", "CPU_", 0}, + {"GPU.0", {}, -1, "0", std::string(igpuFullDeviceName) + "_0", 1}, + {"GPU.1", {}, -1, "1", std::string(dgpuFullDeviceName) + "_1", 1}, + {"MYRIAD", {}, -1, "", "MYRIAD_", 2}, + {"VPUX", {}, -1, "", "VPUX_", 3}}, false}, + ConfigParams {"VPUX,GPU,CPU", + {{"VPUX", {}, -1, "", "VPUX_", 0}, + {"GPU.0", {}, -1, "0", std::string(igpuFullDeviceName) + "_0", 1}, + {"GPU.1", {}, -1, "1", std::string(dgpuFullDeviceName) + "_1", 1}, + {"CPU", {}, -1, "", "CPU_", 2}}, false}, + ConfigParams {"CPU(1),GPU(2),VPUX(4)", + {{"CPU", {}, 1, "", "CPU_", 0}, + {"GPU.0", {}, 2, "0", std::string(igpuFullDeviceName) + "_0", 1}, + {"GPU.1", {}, 2, "1", std::string(dgpuFullDeviceName) + "_1", 1}, + {"VPUX", {}, 4, "", "VPUX_", 2}}, false}, + ConfigParams {"CPU(-1),GPU,MYRIAD,VPUX", {}, true}, ConfigParams {"CPU(NA),GPU,MYRIAD,VPUX", {}, true}, - ConfigParams {"CPU.02(3),GPU.03,MYRIAD.04,VPUX.05", - {{"CPU.02", {}, 3, "", "CPU_02"}, - {"GPU.03", {}, -1, "", std::string(igpuFullDeviceName) + "_03"}, - {"MYRIAD.04", {}, -1, "", "MYRIAD_04"}, - {"VPUX.05", {}, -1, "", "VPUX_05"}}, false} - }; + + ConfigParams {"CPU(3),GPU.1,MYRIAD.9.2-ma2480,VPUX", + {{"CPU", {}, 3, "", "CPU_", 0}, + {"GPU.1", {}, -1, "1", std::string(dgpuFullDeviceName) + "_1", 1}, + {"MYRIAD.9.2-ma2480", {}, -1, "9.2-ma2480", "MYRIAD_9.2-ma2480", 2}, + {"VPUX", {}, -1, "", "VPUX_", 3}}, false}, + ConfigParams {"VPUX,MYRIAD.9.2-ma2480,GPU.1,CPU(3)", + {{"VPUX", {}, -1, "", "VPUX_", 0}, + {"MYRIAD.9.2-ma2480", {}, -1, "9.2-ma2480", "MYRIAD_9.2-ma2480", 1}, + {"GPU.1", {}, -1, "1", std::string(dgpuFullDeviceName) + "_1", 2}, + {"CPU", {}, 3, "", "CPU_", 3}}, false} +}; INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, ParseMetaDeviceTest, diff --git a/src/tests/unit/auto/select_device_test.cpp b/src/tests/unit/auto/select_device_test.cpp index c8a94da0f4c..f2acfb09a5c 100644 --- a/src/tests/unit/auto/select_device_test.cpp +++ b/src/tests/unit/auto/select_device_test.cpp @@ -38,7 +38,9 @@ using ConfigParams = std::tuple< std::string, // netPrecision std::vector, // metaDevices for select DeviceInformation, // expect DeviceInformation - bool // throw exception + bool, // throw exception + bool, // enableDevicePriority + bool // reverse total device >; const DeviceInformation CPU_INFO = {CommonTestUtils::DEVICE_CPU, {}, 2, "01", "CPU_01"}; @@ -58,6 +60,7 @@ std::map> devicesMap = {{"FP32 {"BATCHED_BLOB", batchedblobDeviceVector} }; const std::vector totalDevices = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO, KEEMBAY_INFO}; +const std::vector reverseTotalDevices = {KEEMBAY_INFO, CPU_INFO, MYRIAD_INFO, IGPU_INFO, DGPU_INFO}; const std::vector netPrecisions = {"FP32", "FP16", "INT8", "BIN", "BATCHED_BLOB"}; std::vector testConfigs; @@ -72,7 +75,9 @@ public: std::vector devices; DeviceInformation expect; bool throwExcept; - std::tie(netPrecision, devices, expect, throwExcept) = obj.param; + bool enableDevicePriority; + bool reverse; + std::tie(netPrecision, devices, expect, throwExcept, enableDevicePriority, reverse) = obj.param; std::ostringstream result; result << "_netPrecision_" << netPrecision; for (auto& item : devices) { @@ -84,19 +89,39 @@ public: } else { result << "_throwExcept_false"; } + + if (enableDevicePriority) { + result << "_enableDevicePriority_true"; + } else { + result << "_enableDevicePriority_false"; + } + + if (reverse) { + result << "_reverseTotalDevice_true"; + } else { + result << "_reverseTotalDevice_false"; + } + return result.str(); } // combine select_num devices from devices and make them to ConfigParams // insert the ConfigParams into testConfigs static void combine_device(const std::vector& devices, int start, - int* result, int result_index, const int select_num, std::string& netPrecision) { + int* result, int result_index, const int select_num, std::string& netPrecision, + bool enableDevicePriority, bool reverse) { int i = 0; for (i = start; i < devices.size() + 1 - result_index; i++) { result[result_index - 1] = i; if (result_index - 1 == 0) { std::vector metaDevices = {}; + int devicePriority = 0; for (int j = select_num - 1; j >= 0; j--) { - metaDevices.push_back(devices[result[j]]); + auto tmpDevInfo = devices[result[j]]; + if (enableDevicePriority) { + tmpDevInfo.devicePriority = devicePriority; + devicePriority++; + } + metaDevices.push_back(tmpDevInfo); } // Debug the combine_device // for (auto& item : metaDevices) { @@ -107,24 +132,47 @@ public: bool find = false; DeviceInformation expect; if (metaDevices.size() > 1) { - for (auto& item : devicesInfo) { - auto device = std::find_if(metaDevices.begin(), metaDevices.end(), - [&item](const DeviceInformation& d)->bool{return d.uniqueName == item.uniqueName;}); - if (device != metaDevices.end()) { + if (enableDevicePriority) { + std::vector validDevices; + for (auto& item : devicesInfo) { + auto device = std::find_if(metaDevices.begin(), metaDevices.end(), + [&item](const DeviceInformation& d)->bool{return d.uniqueName == item.uniqueName;}); + if (device != metaDevices.end()) { + validDevices.push_back(*device); + } + } + int currentDevicePriority = 100; + for (auto iter = validDevices.begin(); iter != validDevices.end(); iter++) { + if (iter->devicePriority < currentDevicePriority) { + expect = *iter; + currentDevicePriority = iter->devicePriority; + } + } + if (currentDevicePriority != 100) { find = true; - expect = item; - break; + } + } else { + for (auto& item : devicesInfo) { + auto device = std::find_if(metaDevices.begin(), metaDevices.end(), + [&item](const DeviceInformation& d)->bool{return d.uniqueName == item.uniqueName;}); + if (device != metaDevices.end()) { + find = true; + expect = item; + break; + } } } } else if (metaDevices.size() == 1) { - expect = metaDevices[0]; find = true; + expect = metaDevices[0]; } else { find = false; } - testConfigs.push_back(std::make_tuple(netPrecision, metaDevices, expect, !find)); + testConfigs.push_back(std::make_tuple(netPrecision, metaDevices, + expect, !find, enableDevicePriority, reverse)); } else { - combine_device(devices, i + 1, result, result_index - 1, select_num, netPrecision); + combine_device(devices, i + 1, result, result_index - 1, + select_num, netPrecision, enableDevicePriority, reverse); } } } @@ -139,10 +187,31 @@ public: // total test config num is 32*5 = 160 for (auto netPrecision : netPrecisions) { for (int i = 1; i <= totalDevices.size(); i++) { - combine_device(totalDevices, 0, result, i, i, netPrecision); + combine_device(totalDevices, 0, result, i, i, netPrecision, false, false); } // test null device - testConfigs.push_back(ConfigParams{netPrecision, {}, {}, true}); + testConfigs.push_back(ConfigParams{netPrecision, {}, {}, true, false, false}); + } + // reverse totalDevices for test + for (auto netPrecision : netPrecisions) { + for (int i = 1; i <= reverseTotalDevices.size(); i++) { + combine_device(reverseTotalDevices, 0, result, i, i, netPrecision, false, true); + } + } + + // add test for enableDevicePriority + // test case num is 31*5 = 155 + for (auto netPrecision : netPrecisions) { + for (int i = 1; i <= totalDevices.size(); i++) { + combine_device(totalDevices, 0, result, i, i, netPrecision, true, false); + } + } + + // reverse totalDevices for test + for (auto netPrecision : netPrecisions) { + for (int i = 1; i <= reverseTotalDevices.size(); i++) { + combine_device(reverseTotalDevices, 0, result, i, i, netPrecision, true, true); + } } delete []result; return testConfigs; @@ -193,7 +262,9 @@ TEST_P(SelectDeviceTest, SelectDevice) { std::vector devices; DeviceInformation expect; bool throwExcept; - std::tie(netPrecision, devices, expect, throwExcept) = this->GetParam(); + bool enableDevicePriority; + bool reverse; + std::tie(netPrecision, devices, expect, throwExcept, enableDevicePriority, reverse) = this->GetParam(); EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(1); if (devices.size() >= 1) { From 4decf16927f89ab5c98274bfe4c55fdd1841566b Mon Sep 17 00:00:00 2001 From: Fedor Zharinov Date: Mon, 21 Feb 2022 18:08:05 +0300 Subject: [PATCH 035/310] Set Latency performance mode in case of sync mode. (#10516) --- samples/cpp/benchmark_app/main.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 6c2326adb35..b4613fac299 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -124,10 +124,12 @@ ov::hint::PerformanceMode get_performance_hint(const std::string& device, const ov_perf_hint = ov::hint::PerformanceMode::UNDEFINED; } } else { - slog::warn << "PerformanceMode was not explicitly specified in command line. " + ov_perf_hint = + FLAGS_api == "sync" ? ov::hint::PerformanceMode::LATENCY : ov::hint::PerformanceMode::THROUGHPUT; + + slog::warn << "Performance hint was not explicitly specified in command line. " "Device(" - << device << ") performance hint will be set to THROUGHPUT." << slog::endl; - ov_perf_hint = ov::hint::PerformanceMode::THROUGHPUT; + << device << ") performance hint will be set to " << ov_perf_hint << "." << slog::endl; } } else { if (FLAGS_hint != "") { @@ -390,10 +392,10 @@ int main(int argc, char* argv[]) { if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("CPU") != std::string::npos)) { - slog::warn << "Turn on GPU throttling. Multi-device execution with " + slog::warn << "GPU throttling is turned on. Multi-device execution with " "the CPU + GPU performs best with GPU throttling hint, " << "which releases another CPU thread (that is otherwise " - "used by the GPU driver for active polling)" + "used by the GPU driver for active polling)." << slog::endl; device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1"; } From 6bb8701651e893b5e2f853e493bfb8eeff3a1890 Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Mon, 21 Feb 2022 16:08:28 +0100 Subject: [PATCH 036/310] Add MatMulConstTransposesExtraction transformation (#10412) Transformation insert Transpose for MatMul's weights and sets its transpose_b attribute to true. If executed by MO, it helps to reduce LoadNetwork time on CPU plugin, since ConvertMatMulToFC doesn't have to insert Transpose by itself. Ticket: 78635 --- .../matmul_const_transposes_extraction.hpp | 25 ++++ .../matmul_const_transposes_extraction.cpp | 51 ++++++++ .../moc_transformations.cpp | 4 +- .../rt_info/primitives_priority_attribute.cpp | 7 +- .../ngraph_reader/fusion_tests.cpp | 2 +- .../ngraph_reader/matmul_tests.cpp | 6 +- .../matmul_const_transposes_extraction.cpp | 88 +++++++++++++ .../matmul_const_transposes_extraction.cpp | 73 +++++++++++ .../matmul_const_transposes_extraction.hpp | 19 +++ .../matmul_const_transposes_extraction.hpp | 47 +++++++ .../matmul_const_transposes_extraction.cpp | 117 ++++++++++++++++++ 11 files changed, 431 insertions(+), 8 deletions(-) create mode 100644 src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp create mode 100644 src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp create mode 100644 src/tests/functional/inference_engine/transformations/common_optimizations/matmul_const_transposes_extraction.cpp create mode 100644 src/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/matmul_const_transposes_extraction.cpp create mode 100644 src/tests/functional/plugin/shared/include/subgraph_tests/matmul_const_transposes_extraction.hpp create mode 100644 src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/matmul_const_transposes_extraction.hpp create mode 100644 src/tests/functional/shared_test_classes/src/subgraph/matmul_const_transposes_extraction.cpp diff --git a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp new file mode 100644 index 00000000000..f528c666e3f --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +/** + * @ingroup ie_transformation_common_api + * @brief Resolves transpose_b key from MatMul operation if corresponding input is constant or FakeQuantize by inserting Transpose + */ + +namespace ngraph { +namespace pass { + +class TRANSFORMATIONS_API MatMulConstTransposesExtraction: public MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + MatMulConstTransposesExtraction(); +}; + +} // namespace pass +} // namespace ngraph diff --git a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp new file mode 100644 index 00000000000..07cb1bf115f --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/matmul_const_transposes_extraction.hpp" + +#include +#include +#include +#include + +NGRAPH_RTTI_DEFINITION(ngraph::pass::MatMulConstTransposesExtraction, "MatMulConstTransposesExtraction", 0); + +ngraph::pass::MatMulConstTransposesExtraction::MatMulConstTransposesExtraction() { + auto data_pattern = pattern::any_input(); + auto weights_pattern = pattern::wrap_type([](Output node) -> bool { + const auto& pshape = node.get_partial_shape(); + const auto& rank = pshape.rank(); + return rank.is_static() && rank.get_length() >= 2 && + std::count(pshape.begin(), pshape.end(), 1) >= rank.get_length() - 2; + }); + auto matmul_pattern = pattern::wrap_type({data_pattern, weights_pattern}); + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto node = m.get_match_root(); + auto matmul = as_type(node.get()); + if (!matmul || matmul->get_transpose_b()) + return false; + + const auto& pattern_value_map = m.get_pattern_value_map(); + const auto& weights = pattern_value_map.at(weights_pattern); + + std::vector transpose_order(weights.get_partial_shape().size()); + std::iota(transpose_order.begin(), transpose_order.end(), 0); + std::reverse(transpose_order.end() - 2, transpose_order.end()); + std::shared_ptr transpose = std::make_shared(weights, + op::Constant::create(element::i32, {transpose_order.size()}, transpose_order)); + if (ov::is_type(weights.get_node())) { + if (auto constant = get_constant_from_source(transpose)) + transpose = constant; + } + auto new_matmul = std::make_shared(pattern_value_map.at(data_pattern), transpose, matmul->get_transpose_a(), true); + new_matmul->set_friendly_name(matmul->get_friendly_name()); + copy_runtime_info(node, {transpose, new_matmul}); + replace_node(node, new_matmul); + return true; + }; + + auto m = std::make_shared(matmul_pattern, "MatMulConstTransposesExtraction"); + this->register_matcher(m, callback); +} diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index 901f5e24244..e75372e89d4 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -58,7 +58,8 @@ #include #include #include -#include "transformations/common_optimizations/align_eltwise_input_ranks.hpp" +#include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); @@ -155,6 +156,7 @@ bool ngraph::pass::MOCTransformations::run_on_model(const std::shared_ptradd_matcher(); common_fusions->add_matcher(); common_fusions->add_matcher(m_use_shapes); + common_fusions->add_matcher(); common_fusions->set_name("ngraph::pass::CommonFusions"); manager.register_pass(); diff --git a/src/common/transformations/src/transformations/rt_info/primitives_priority_attribute.cpp b/src/common/transformations/src/transformations/rt_info/primitives_priority_attribute.cpp index 5e016dbe6d0..1ce8fc94ff3 100644 --- a/src/common/transformations/src/transformations/rt_info/primitives_priority_attribute.cpp +++ b/src/common/transformations/src/transformations/rt_info/primitives_priority_attribute.cpp @@ -31,11 +31,12 @@ std::string ov::getPrimitivesPriority(const std::shared_ptr& node) } Any PrimitivesPriority::merge(const ngraph::NodeVector& nodes) const { - auto isConvolutionBased = [](const std::shared_ptr& node) -> bool { + auto canBeMerged = [](const std::shared_ptr& node) -> bool { if (std::dynamic_pointer_cast(node) || std::dynamic_pointer_cast(node) || std::dynamic_pointer_cast(node) || - std::dynamic_pointer_cast(node)) { + std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node)) { return true; } return false; @@ -44,7 +45,7 @@ Any PrimitivesPriority::merge(const ngraph::NodeVector& nodes) const { std::set unique_pp; for (auto& node : nodes) { - if (isConvolutionBased(node)) { + if (canBeMerged(node)) { std::string pp = getPrimitivesPriority(node); if (!pp.empty()) unique_pp.insert(pp); diff --git a/src/tests/functional/inference_engine/ngraph_reader/fusion_tests.cpp b/src/tests/functional/inference_engine/ngraph_reader/fusion_tests.cpp index a0385eb4780..0b9d412bd96 100644 --- a/src/tests/functional/inference_engine/ngraph_reader/fusion_tests.cpp +++ b/src/tests/functional/inference_engine/ngraph_reader/fusion_tests.cpp @@ -409,7 +409,7 @@ TEST_F(NGraphReaderTests, MatMulBiasFusionNoBroadcast) { - + 1 diff --git a/src/tests/functional/inference_engine/ngraph_reader/matmul_tests.cpp b/src/tests/functional/inference_engine/ngraph_reader/matmul_tests.cpp index 67c67956e05..0d6390920cf 100644 --- a/src/tests/functional/inference_engine/ngraph_reader/matmul_tests.cpp +++ b/src/tests/functional/inference_engine/ngraph_reader/matmul_tests.cpp @@ -641,7 +641,7 @@ TEST_F(NGraphReaderTests, ReadMatMul1DNetwork) { - + 2048 @@ -658,7 +658,7 @@ TEST_F(NGraphReaderTests, ReadMatMul1DNetwork) { - + 1 @@ -687,7 +687,7 @@ TEST_F(NGraphReaderTests, ReadMatMul1DNetwork) { - + 1 diff --git a/src/tests/functional/inference_engine/transformations/common_optimizations/matmul_const_transposes_extraction.cpp b/src/tests/functional/inference_engine/transformations/common_optimizations/matmul_const_transposes_extraction.cpp new file mode 100644 index 00000000000..76c2ce90aa1 --- /dev/null +++ b/src/tests/functional/inference_engine/transformations/common_optimizations/matmul_const_transposes_extraction.cpp @@ -0,0 +1,88 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace ngraph; + +TEST_F(TransformationTestsF, MatMulConstTransposesExtractionConstantWeights) { + { + auto data = std::make_shared(element::f32, Shape{1, 3, 4}); + auto weights = opset8::Constant::create(element::f32, Shape{1, 3, 2}, {1, 2, 3, 4, 5, 6}); + auto matmul = std::make_shared(data, weights, true); + function = std::make_shared(matmul, ParameterVector{data}); + + manager.register_pass(); + } + + { + auto data = std::make_shared(element::f32, Shape{1, 3, 4}); + auto weights = opset8::Constant::create(element::f32, Shape{1, 2, 3}, {1, 3, 5, 2, 4, 6}); + auto matmul = std::make_shared(data, weights, true, true); + function_ref = std::make_shared(matmul, ParameterVector{data}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + enable_accuracy_check(); +} + +TEST_F(TransformationTestsF, MatMulConstTransposesExtractionFQOnWeights) { + { + auto data = std::make_shared(element::f32, Shape{1, 4, 3}); + auto weights = opset8::Constant::create(element::f32, Shape{1, 3, 2}, {1, 2, 3, 4, 5, 6}); + auto low = opset8::Constant::create(element::f32, Shape{1}, {0}); + auto high = opset8::Constant::create(element::f32, Shape{1}, {10}); + auto fq = std::make_shared(weights, low, high, low, high, 255); + auto matmul = std::make_shared(data, fq); + function = std::make_shared(matmul, ParameterVector{data}); + + manager.register_pass(); + } + + { + auto data = std::make_shared(element::f32, Shape{1, 4, 3}); + auto weights = opset8::Constant::create(element::f32, Shape{1, 3, 2}, {1, 2, 3, 4, 5, 6}); + auto low = opset8::Constant::create(element::f32, Shape{1}, {0}); + auto high = opset8::Constant::create(element::f32, Shape{1}, {10}); + auto fq = std::make_shared(weights, low, high, low, high, 255); + auto transpose = std::make_shared(fq, op::Constant::create(element::i32, Shape{3}, {0, 2, 1})); + auto matmul = std::make_shared(data, transpose, false, true); + function_ref = std::make_shared(matmul, ParameterVector{data}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + enable_accuracy_check(); +} + +TEST_F(TransformationTestsF, NegativeMatMulConstTransposesExtractionInvalidRank) { + auto data = std::make_shared(element::f32, Shape{1, 3, 4}); + auto weights = opset8::Constant::create(element::f32, Shape{3}, {1, 2, 3}); + auto matmul = std::make_shared(data, weights, true); + function = std::make_shared(matmul, ParameterVector{data}); + manager.register_pass(); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, NegativeMatMulConstTransposesExtractionTransposeBSet) { + auto data = std::make_shared(element::f32, Shape{1, 3, 4}); + auto weights = opset8::Constant::create(element::f32, Shape{1, 2, 3}, {1, 2, 3, 4, 5, 6}); + auto matmul = std::make_shared(data, weights, true, true); + function = std::make_shared(matmul, ParameterVector{data}); + manager.register_pass(); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, NegativeMatMulConstTransposesExtractionNonUnitDims) { + auto data = std::make_shared(element::f32, Shape{1, 3, 4}); + auto weights = opset8::Constant::create(element::f32, Shape{2, 3, 2}, {1, 2, 3, 4, 5, 6, 2, 3, 4, 5, 6, 7}); + auto matmul = std::make_shared(data, weights, true); + function = std::make_shared(matmul, ParameterVector{data}); + manager.register_pass(); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/matmul_const_transposes_extraction.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/matmul_const_transposes_extraction.cpp new file mode 100644 index 00000000000..4456ca032ff --- /dev/null +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/subgraph_tests/matmul_const_transposes_extraction.cpp @@ -0,0 +1,73 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "subgraph_tests/matmul_const_transposes_extraction.hpp" + +using namespace SubgraphTestsDefinitions; + +namespace { +std::vector shape_params = { + {{2, 2}, {2, 3}, false}, + {{5}, {5, 1}, false}, + {{5}, {5, 3}, false}, + {{5, 10}, {10, 7}, false}, + {{5, 10}, {1, 10, 7}, false}, + {{5, 10}, {1, 1, 10, 7}, false}, + {{2, 3, 5, 10}, {10, 7}, false}, + {{2, 3, 5, 10}, {1, 10, 7}, false}, + {{2, 3, 5, 10}, {1, 10, 1}, false}, + {{2, 3, 5, 10}, {1, 1, 10, 7}, false}, + {{2, 3, 5, 10}, {1, 1, 10, 1}, false}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_MatMulConstTransposesExtractionTest, MatMulConstTransposesExtractionTest, + ::testing::Combine( + ::testing::ValuesIn(shape_params), + ::testing::Values(true), // can be fused + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + MatMulConstTransposesExtractionTest::getTestCaseName); + +std::vector negative_shape_params = { + {{5}, {5}, false}, + {{5}, {3, 5}, true}, + {{5, 5}, {5, 5}, true}, + {{5, 10}, {7, 10}, true}, + {{5, 10}, {2, 10, 7}, false}, + {{5, 10}, {2, 3, 10, 7}, false}, + {{1, 1, 5, 10}, {10}, false}, + {{1, 1, 5, 10}, {7, 10}, true}, + {{1, 1, 5, 10}, {1, 1, 7, 10}, true}, + {{2, 3, 5, 10}, {7, 10}, true}, + {{2, 3, 5, 10}, {3, 7, 10}, true}, + {{2, 3, 5, 10}, {2, 3, 7, 10}, true}, + {{2, 3, 5, 10}, {3, 10, 7}, false}, + {{2, 3, 5, 10}, {1, 3, 10, 7}, false}, + {{2, 3, 5, 10}, {2, 3, 10, 7}, false}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_NegativeMatMulConstTransposesExtractionTest, MatMulConstTransposesExtractionTest, + ::testing::Combine( + ::testing::ValuesIn(negative_shape_params), + ::testing::Values(false), // cannot be fused + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + MatMulConstTransposesExtractionTest::getTestCaseName); + +std::vector shape_params2 = { + {{2, 2}, {2, 2}, false}, + {{5, 10}, {10, 7}, false}, + {{5, 10}, {1, 10, 7}, false}, + {{5, 10}, {1, 1, 10, 7}, false}, + {{2, 3, 5, 10}, {10, 7}, false}, + {{2, 3, 5, 10}, {1, 10, 7}, false}, + {{2, 3, 5, 10}, {1, 1, 10, 7}, false}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_QuantizedMatMulConstTransposesExtractionTest, QuantizedMatMulConstTransposesExtractionTest, + ::testing::Combine( + ::testing::ValuesIn(shape_params2), + ::testing::Values(true), // can be fused + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + QuantizedMatMulConstTransposesExtractionTest::getTestCaseName); + +} // namespace diff --git a/src/tests/functional/plugin/shared/include/subgraph_tests/matmul_const_transposes_extraction.hpp b/src/tests/functional/plugin/shared/include/subgraph_tests/matmul_const_transposes_extraction.hpp new file mode 100644 index 00000000000..188164f1196 --- /dev/null +++ b/src/tests/functional/plugin/shared/include/subgraph_tests/matmul_const_transposes_extraction.hpp @@ -0,0 +1,19 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/subgraph/matmul_const_transposes_extraction.hpp" + +namespace SubgraphTestsDefinitions { + +TEST_P(MatMulConstTransposesExtractionTest, CompareWithRefs) { + Run(); +} + +TEST_P(QuantizedMatMulConstTransposesExtractionTest, CompareWithRefs) { + Run(); +} + +} // namespace SubgraphTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/matmul_const_transposes_extraction.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/matmul_const_transposes_extraction.hpp new file mode 100644 index 00000000000..ab345a20167 --- /dev/null +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/matmul_const_transposes_extraction.hpp @@ -0,0 +1,47 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "shared_test_classes/base/layer_test_utils.hpp" +#include + +namespace SubgraphTestsDefinitions { + +struct MatMulConstTransposesExtractionTestShapeParams { + ngraph::Shape input_shape; + ngraph::Shape weights_shape; + bool trans_b; +}; + +typedef std::tuple< + MatMulConstTransposesExtractionTestShapeParams, + bool, // whether Mul can be fused to MatMul in this case + std::string // Device name + > MatMulConstTransposesExtractionTestParams; + +class MatMulConstTransposesExtractionTest + : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; +}; + +class QuantizedMatMulConstTransposesExtractionTest + : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(const testing::TestParamInfo &obj); + +protected: + void SetUp() override; + void TearDown() override; +}; + +} // namespace SubgraphTestsDefinitions diff --git a/src/tests/functional/shared_test_classes/src/subgraph/matmul_const_transposes_extraction.cpp b/src/tests/functional/shared_test_classes/src/subgraph/matmul_const_transposes_extraction.cpp new file mode 100644 index 00000000000..dbbcf5524db --- /dev/null +++ b/src/tests/functional/shared_test_classes/src/subgraph/matmul_const_transposes_extraction.cpp @@ -0,0 +1,117 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/matmul_const_transposes_extraction.hpp" +#include "shared_test_classes/subgraph/matmul_const_transposes_extraction.hpp" +#include "ngraph_functions/builders.hpp" +#include + +namespace SubgraphTestsDefinitions { + +using namespace ngraph; + +std::string MatMulConstTransposesExtractionTest::getTestCaseName(const testing::TestParamInfo &obj) { + MatMulConstTransposesExtractionTestShapeParams shape_params; + std::string device; + std::tie(shape_params, std::ignore, device) = obj.param; + std::ostringstream results; + + results << "input=" << shape_params.input_shape << "_"; + results << "weights=" << shape_params.weights_shape << "_"; + results << "transB=" << std::boolalpha << shape_params.trans_b << "_"; + results << "dev=" << device; + return results.str(); +} + +void MatMulConstTransposesExtractionTest::SetUp() { + MatMulConstTransposesExtractionTestShapeParams shape_params; + element::Type type = element::f32; + bool can_be_fused; + std::tie(shape_params, can_be_fused, targetDevice) = GetParam(); + + const auto& input_shape = shape_params.input_shape; + const auto& weights_shape = shape_params.weights_shape; + + auto param = std::make_shared(type, input_shape); + auto weights = opset8::Constant::create(type, weights_shape, {0.5}); + auto matmul = std::make_shared(param, weights, false, shape_params.trans_b); + function = std::make_shared(matmul, ParameterVector{param}); + + auto transformed_function = clone_function(*function); + pass::Manager manager; + manager.register_pass(); + manager.run_passes(transformed_function); + + bool functions_equal; + auto orig_function = clone_function(*function); + std::tie(functions_equal, std::ignore) = compare_functions(transformed_function, orig_function, true); + if (can_be_fused) { + ASSERT_FALSE(functions_equal); + } else { + ASSERT_TRUE(functions_equal); + } +} + +std::string QuantizedMatMulConstTransposesExtractionTest::getTestCaseName( + const testing::TestParamInfo &obj) { + MatMulConstTransposesExtractionTestShapeParams params; + std::string device; + std::tie(params, std::ignore, device) = obj.param; + std::ostringstream results; + + results << "input=" << params.input_shape << "_" + "weights=" << params.weights_shape << "_" + "dev=" << device; + return results.str(); +} + +void QuantizedMatMulConstTransposesExtractionTest::SetUp() { + MatMulConstTransposesExtractionTestShapeParams params; + bool can_be_fused; + std::tie(params, can_be_fused, targetDevice) = GetParam(); + + const auto& input_shape = params.input_shape; + auto weights_shape = params.weights_shape; + + element::Type type = element::f32; + auto param = std::make_shared(type, input_shape); + std::shared_ptr input; + std::shared_ptr weights = opset8::Constant::create(type, weights_shape, {0.5}); + auto low = opset8::Constant::create(type, {1}, {-2}); + auto high = opset8::Constant::create(type, {1}, {2}); + input = std::make_shared(param, low, high, low, high, 256); + weights = std::make_shared(weights, low, high, low, high, 255); + auto matmul = std::make_shared(input, weights, false, false); + function = std::make_shared(matmul, ParameterVector{param}); + + auto transformed_function = clone_function(*function); + pass::Manager manager; + manager.register_pass(); + manager.run_passes(transformed_function); + + bool functions_equal; + auto orig_function = clone_function(*function); + std::tie(functions_equal, std::ignore) = compare_functions(transformed_function, orig_function, true); + if (can_be_fused) { + ASSERT_FALSE(functions_equal); + } else { + ASSERT_TRUE(functions_equal); + } +} + +void QuantizedMatMulConstTransposesExtractionTest::TearDown() { + auto runtime_function = executableNetwork.GetExecGraphInfo().getFunction(); + int ops_found = 0; + for (const auto& node : runtime_function->get_ordered_ops()) { + const auto& layer_type = node->get_rt_info().at(ExecGraphInfoSerialization::LAYER_TYPE).as(); + if (layer_type == "FullyConnected" || layer_type == "MatMul") { + ops_found++; + auto inputs = node->input_values(); + ASSERT_EQ(element::u8, inputs[0].get_element_type()); + ASSERT_EQ(element::i8, inputs[1].get_element_type()); + } + } + ASSERT_GT(ops_found, 0); +} +} // namespace SubgraphTestsDefinitions From b7fede89c8bc3835500bdaed6184ee4690d144a5 Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Mon, 21 Feb 2022 18:26:20 +0300 Subject: [PATCH 037/310] [CPU] Fix uninitialized reorder implementation type (valgrind, asan) (#10520) --- src/plugins/intel_cpu/src/nodes/reorder.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index a450511235d..73d12389821 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -204,6 +204,10 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc& srcDe void* srcPtr, const mkldnn::memory::desc& dstDesc, void* dstPtr) { + auto selectedPD = getSelectedPrimitiveDescriptor(); + if (!selectedPD) + IE_THROW() << "Preferable primitive descriptor is not set."; + const auto engine = getEngine(); src_blocked = std::make_shared(engine); src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); @@ -211,7 +215,7 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc& srcDe dst_blocked = std::make_shared(engine); dst_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); - impl_desc_type impl_type; + impl_desc_type impl_type = selectedPD->getImplementationType(); ReorderKey key = {src_blocked->GetPrimitive().get_desc(), dst_blocked->GetPrimitive().get_desc()}; auto builder = [&engine, &impl_type](const ReorderKey& key) -> std::shared_ptr { From 1d33c37970ee37ea7fab2a83d339930d158c9dc3 Mon Sep 17 00:00:00 2001 From: Vladislav Volkov Date: Mon, 21 Feb 2022 18:47:24 +0300 Subject: [PATCH 038/310] [CPU] Issue in opset name determining (#10479) --- .../include/ngraph_ops/nms_ie_internal.hpp | 2 +- .../ngraph_ops/nms_static_shape_ie.hpp | 2 +- src/core/src/pass/serialize.cpp | 8 +- src/frontends/ir/src/ir_deserializer.cpp | 28 ++- .../move_eltwise_up_data_movement.cpp | 1 - src/plugins/intel_cpu/src/plugin.cpp | 4 +- .../move_eltwise_up_data_movement_test.cpp | 223 ++++++------------ 7 files changed, 97 insertions(+), 171 deletions(-) diff --git a/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp b/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp index 6fe4eb3a818..34a5b6ec7ab 100644 --- a/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp +++ b/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp @@ -18,7 +18,7 @@ namespace internal { class TRANSFORMATIONS_API NonMaxSuppressionIEInternal : public Op { public: - OPENVINO_OP("NonMaxSuppressionIEInternal", "util"); + OPENVINO_OP("NonMaxSuppressionIEInternal", "ie_internal_opset"); BWDCMP_RTTI_DECLARATION; NonMaxSuppressionIEInternal() = default; diff --git a/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp b/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp index d5dff78ea9c..cb7ee61fdb5 100644 --- a/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp +++ b/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp @@ -109,7 +109,7 @@ const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info_st static const std::string name = BaseNmsOpTypeInfoPtr->name; static const ::ngraph::Node::type_info_t type_info_static{ - name.c_str(), BaseNmsOpTypeInfoPtr->version, BaseNmsOpTypeInfoPtr}; + name.c_str(), BaseNmsOpTypeInfoPtr->version, "ie_internal_opset", BaseNmsOpTypeInfoPtr}; return type_info_static; } diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index b6ba8b46802..2c86d37d2f9 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -568,9 +568,7 @@ const std::vector create_edge_mapping(const std::unordered_map& custom_opsets) { OPENVINO_ASSERT(n != nullptr); - if (n->get_type_info().version_id != nullptr) { - return n->get_type_info().version_id; - } + // Try to find opset name from RT info auto opset_it = n->get_rt_info().find("opset"); if (opset_it != n->get_rt_info().end()) { @@ -582,6 +580,10 @@ std::string get_opset_name(const ngraph::Node* n, const std::mapget_type_info().version_id != nullptr) { + return n->get_type_info().version_id; + } + for (const auto& custom_opset : custom_opsets) { std::string name = custom_opset.first; ngraph::OpSet opset = custom_opset.second; diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 4cd2a74701d..1ba4aed8e30 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -608,6 +608,20 @@ GenericLayerParams XmlDeserializer::parseGenericParams(const pugi::xml_node& nod return params; } +// Symmetric function to translate type name. +// See translate_type_name in src/core/src/pass/serialize.cpp. +static const std::string& translate_type_name(const std::string& name) { + static const std::unordered_map translate_type_name_translator = {{"Const", "Constant"}, + {"PReLU", "PRelu"}, + {"ReLU", "Relu"}, + {"SoftMax", "Softmax"}}; + auto found = translate_type_name_translator.find(name); + if (found != end(translate_type_name_translator)) { + return found->second; + } + return name; +} + std::shared_ptr XmlDeserializer::createNode( const std::vector>& inputs, const pugi::xml_node& node, @@ -623,8 +637,10 @@ std::shared_ptr XmlDeserializer::createNode( << " has undefined element type for input with index " << i << "!"; } + const std::string& type_name = translate_type_name(params.type); + std::shared_ptr ngraphNode; - ov::DiscreteTypeInfo type(params.type.c_str(), 0, params.version.c_str()); + ov::DiscreteTypeInfo type(type_name.c_str(), 0, params.version.c_str()); auto extensionIt = m_extensions.find(type); if (extensionIt != m_extensions.end()) { @@ -646,17 +662,15 @@ std::shared_ptr XmlDeserializer::createNode( "RNNCell", "Proposal"}; - if (experimental_ops_added_to_opset.count(params.type) && + if (experimental_ops_added_to_opset.count(type_name) && (params.version == "experimental" || params.version == "extension")) { opsetIt = m_opsets.find("opset6"); } if (!ngraphNode && opsetIt != m_opsets.end()) { - auto const& type = params.type == "Const" ? "Constant" : params.type; - if (params.version == "opset1") { // MVN, ROIPooling and ReorgYolo were missing in opset1 - if (type == "MVN" || type == "ROIPooling" || type == "ReorgYolo") { + if (type_name == "MVN" || type_name == "ROIPooling" || type_name == "ReorgYolo") { opsetIt = m_opsets.find("opset2"); if (opsetIt == m_opsets.end()) { IE_THROW() << "Cannot create " << params.type << " layer " << params.name @@ -667,9 +681,9 @@ std::shared_ptr XmlDeserializer::createNode( auto const& opset = opsetIt->second; - ngraphNode = std::shared_ptr(opset.create_insensitive(type)); + ngraphNode = std::shared_ptr(opset.create_insensitive(type_name)); if (!ngraphNode) { - IE_THROW() << "Opset " << params.version << " doesn't contain the operation with type: " << type; + IE_THROW() << "Opset " << params.version << " doesn't contain the operation with type: " << type_name; } // Share Weights form constant blob if (auto constant = std::dynamic_pointer_cast(ngraphNode)) { diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp index 34e8cd691e3..74eb8dc90cb 100644 --- a/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp +++ b/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp @@ -96,7 +96,6 @@ ov::intel_cpu::MoveEltwiseUpThroughDataMov::MoveEltwiseUpThroughDataMov() { eltwiseInputs[0] = child->input_value(0); auto newEltwise = eltwise->clone_with_new_inputs(eltwiseInputs); ngraph::copy_runtime_info(eltwise, newEltwise); - newEltwise->set_friendly_name(eltwise->get_friendly_name()); ngraph::OutputVector childInputs = child->input_values(); childInputs[0] = newEltwise; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 93c29e01884..b1e46577633 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -655,6 +655,7 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std } auto config = orig_config; + CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network); const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE); const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/ @@ -822,7 +823,8 @@ Parameter Engine::GetMetric(const std::string& name, const std::map rwProperties {RW_property(ov::num_streams.name()), diff --git a/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp b/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp index 3dd99f87f27..1358440ec98 100644 --- a/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp +++ b/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp @@ -14,11 +14,12 @@ using namespace testing; -TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwise) { +class MoveEltwiseUpThroughDataMovTest: public TransformationTestsF{}; + +TEST_F(MoveEltwiseUpThroughDataMovTest, SingleUnaryEltwise) { const ngraph::Shape shape{1, 3, 224, 224}; const std::vector input_order = {3, 2, 1, 0}; const int64_t unsqueeze_axis = 2; - std::shared_ptr f(nullptr); { auto input = std::make_shared(ngraph::element::f32, shape); @@ -30,15 +31,9 @@ TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwise) { auto sigmoid = std::make_shared(unsqueeze); - f = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input}); + function = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input}); + manager.register_pass(); } - - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - std::shared_ptr f_ref(nullptr); { auto input = std::make_shared(ngraph::element::f32, shape); @@ -50,19 +45,14 @@ TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwise) { auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); auto unsqueeze = std::make_shared(transpose, unsqueeze_const); - f_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); + function_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); } - - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; } -TEST(MoveEltwiseUpThroughDataMov, EltwiseSequence) { +TEST_F(MoveEltwiseUpThroughDataMovTest, EltwiseSequence) { const ngraph::Shape shape{1, 3, 224, 224}; const std::vector input_order = {1, 2, 0, 3}; const int64_t unsqueeze_axis = 1; - std::shared_ptr f(nullptr); { auto input_left = std::make_shared(ngraph::element::f32, shape); auto input_right = std::make_shared(ngraph::element::f32, shape); @@ -79,16 +69,9 @@ TEST(MoveEltwiseUpThroughDataMov, EltwiseSequence) { auto sigmoid = std::make_shared(unsqueeze); - f = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input_left, input_right}); + function = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input_left, input_right}); + manager.register_pass(); } - - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - - std::shared_ptr f_ref(nullptr); { auto input_left = std::make_shared(ngraph::element::f32, shape); auto input_right = std::make_shared(ngraph::element::f32, shape); @@ -105,60 +88,40 @@ TEST(MoveEltwiseUpThroughDataMov, EltwiseSequence) { auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); auto unsqueeze = std::make_shared(transpose, unsqueeze_const); - f_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input_left, input_right}); + function_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input_left, input_right}); } - - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; } -TEST(MoveEltwiseUpThroughDataMov, DataMovementTwoConsumers) { +TEST_F(MoveEltwiseUpThroughDataMovTest, DataMovementTwoConsumers) { /* In this case transformation shouldn't apply */ - auto create_graph = [] () -> std::shared_ptr { - const ngraph::Shape shape{1, 3, 224, 224}; - const std::vector input_order = {1, 2, 0, 3}; - const int64_t unsqueeze_axis = 1; + const ngraph::Shape shape{1, 3, 224, 224}; + const std::vector input_order = {1, 2, 0, 3}; + const int64_t unsqueeze_axis = 1; - auto input_left = std::make_shared(ngraph::element::f32, shape); - auto input_right = std::make_shared(ngraph::element::f32, shape); + auto input_left = std::make_shared(ngraph::element::f32, shape); + auto input_right = std::make_shared(ngraph::element::f32, shape); - auto matmul = std::make_shared(input_left, input_right); + auto matmul = std::make_shared(input_left, input_right); - auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{input_order.size()}, input_order); - auto transpose = std::make_shared(matmul, transpose_const); + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{input_order.size()}, input_order); + auto transpose = std::make_shared(matmul, transpose_const); - auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); - auto unsqueeze = std::make_shared(transpose, unsqueeze_const); + auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); + auto unsqueeze = std::make_shared(transpose, unsqueeze_const); - auto sigmoid = std::make_shared(unsqueeze); + auto sigmoid = std::make_shared(unsqueeze); - auto relu = std::make_shared(transpose); + auto relu = std::make_shared(transpose); - return std::make_shared(ngraph::NodeVector{sigmoid, relu}, ngraph::ParameterVector{input_left, input_right}); - }; - - std::shared_ptr f = create_graph(); - - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - - std::shared_ptr f_ref = create_graph(); - - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; + function = std::make_shared(ngraph::NodeVector{sigmoid, relu}, ngraph::ParameterVector{input_left, input_right}); + manager.register_pass(); } -TEST(MoveEltwiseUpThroughDataMov, SingleBinaryEltwiseWithScalarOnSecondBranch) { +TEST_F(MoveEltwiseUpThroughDataMovTest, SingleBinaryEltwiseWithScalarOnSecondBranch) { const ngraph::Shape shape{1, 3, 224, 224}; const std::vector input_order = {3, 2, 1, 0}; const int64_t unsqueeze_axis = 2; const float scalar_value = 0.5f; - std::shared_ptr f(nullptr); { auto input = std::make_shared(ngraph::element::f32, shape); @@ -170,14 +133,9 @@ TEST(MoveEltwiseUpThroughDataMov, SingleBinaryEltwiseWithScalarOnSecondBranch) { auto add = std::make_shared(unsqueeze, ngraph::opset8::Constant::create(ngraph::element::f32, {}, {scalar_value})); - f = std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); + manager.register_pass(); + function = std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); } - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - std::shared_ptr f_ref(nullptr); { auto input = std::make_shared(ngraph::element::f32, shape); @@ -189,20 +147,15 @@ TEST(MoveEltwiseUpThroughDataMov, SingleBinaryEltwiseWithScalarOnSecondBranch) { auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); auto unsqueeze = std::make_shared(transpose, unsqueeze_const); - f_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); + function_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); } - - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; } -TEST(MoveEltwiseUpThroughDataMov, SingleEltwiseWith5ScalarOnSecondBranch) { +TEST_F(MoveEltwiseUpThroughDataMovTest, SingleEltwiseWith5ScalarOnSecondBranch) { const ngraph::Shape shape{1, 3, 224, 224}; const std::vector input_order = {3, 2, 1, 0}; const int64_t unsqueeze_axis = 2; const float scalar_value = 0.5f; - std::shared_ptr f(nullptr); { auto input = std::make_shared(ngraph::element::f32, shape); @@ -211,14 +164,9 @@ TEST(MoveEltwiseUpThroughDataMov, SingleEltwiseWith5ScalarOnSecondBranch) { auto add = std::make_shared(unsqueeze, ngraph::opset8::Constant::create(ngraph::element::f32, {1, 1, 1, 1, 1}, {scalar_value})); - f = std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); + manager.register_pass(); + function = std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); } - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - std::shared_ptr f_ref(nullptr); { auto input = std::make_shared(ngraph::element::f32, shape); @@ -227,50 +175,33 @@ TEST(MoveEltwiseUpThroughDataMov, SingleEltwiseWith5ScalarOnSecondBranch) { auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); auto unsqueeze = std::make_shared(add, unsqueeze_const); - f_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); + function_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); } - - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; } -TEST(MoveEltwiseUpThroughDataMov, SingleBinaryEltwiseWithNotScalarOnSecondBranch) { - auto create_graph = [] () -> std::shared_ptr { - const ngraph::Shape shape{1, 3, 224, 224}; - const std::vector input_order = {3, 2, 1, 0}; - const int64_t unsqueeze_axis = 2; - std::shared_ptr f(nullptr); - auto input = std::make_shared(ngraph::element::f32, shape); - - auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{input_order.size()}, input_order); - auto transpose = std::make_shared(input, transpose_const); - - auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); - auto unsqueeze = std::make_shared(transpose, unsqueeze_const); - - auto add_scalar = ngraph::opset8::Constant::create(ngraph::element::f32, {1, 1, 1, 3}, {0.5, 0.2, 0.3}); - auto add = std::make_shared(unsqueeze, add_scalar); - - return std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); - }; - std::shared_ptr f = create_graph(); - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - - std::shared_ptr f_ref = create_graph(); - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; -} - -TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwiseDynamicShape) { +TEST_F(MoveEltwiseUpThroughDataMovTest, SingleBinaryEltwiseWithNotScalarOnSecondBranch) { + const ngraph::Shape shape{1, 3, 224, 224}; + const std::vector input_order = {3, 2, 1, 0}; + const int64_t unsqueeze_axis = 2; + + auto input = std::make_shared(ngraph::element::f32, shape); + + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{input_order.size()}, input_order); + auto transpose = std::make_shared(input, transpose_const); + + auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); + auto unsqueeze = std::make_shared(transpose, unsqueeze_const); + + auto add_scalar = ngraph::opset8::Constant::create(ngraph::element::f32, {1, 1, 1, 3}, {0.5, 0.2, 0.3}); + auto add = std::make_shared(unsqueeze, add_scalar); + + function = std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); + manager.register_pass(); +} + +TEST_F(MoveEltwiseUpThroughDataMovTest, SingleUnaryEltwiseDynamicShape) { const std::vector input_order = {3, 2, 1, 0}; const int64_t unsqueeze_axis = 2; - std::shared_ptr f(nullptr); { auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(3)); @@ -279,15 +210,10 @@ TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwiseDynamicShape) { auto sigmoid = std::make_shared(unsqueeze); - f = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input}); + function = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input}); + manager.register_pass(); } - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - std::shared_ptr f_ref(nullptr); { auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(3)); @@ -296,36 +222,19 @@ TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwiseDynamicShape) { auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); auto unsqueeze = std::make_shared(sigmoid, unsqueeze_const); - f_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); + function_ref = std::make_shared(ngraph::NodeVector{unsqueeze}, ngraph::ParameterVector{input}); } - - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; } -TEST(MoveEltwiseUpThroughDataMov, SingleUnaryEltwiseDynamicRank) { - auto create_graph = [] () -> std::shared_ptr { - const std::vector input_order = {3, 2, 1, 0}; - const int64_t unsqueeze_axis = 2; - std::shared_ptr f(nullptr); - auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(ngraph::Rank::dynamic())); +TEST_F(MoveEltwiseUpThroughDataMovTest, SingleUnaryEltwiseDynamicRank) { + const std::vector input_order = {3, 2, 1, 0}; + const int64_t unsqueeze_axis = 2; - auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); - auto unsqueeze = std::make_shared(input, unsqueeze_const); - auto sigmoid = std::make_shared(unsqueeze); - return std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input}); - }; - std::shared_ptr f = create_graph(); - ngraph::pass::Manager m; - m.register_pass(); - m.register_pass(); + auto input = std::make_shared(ngraph::element::f32, ngraph::PartialShape::dynamic(ngraph::Rank::dynamic())); - m.run_passes(f); - ASSERT_NO_THROW(check_rt_info(f)); - - std::shared_ptr f_ref = create_graph(); - auto res = compare_functions(f, f_ref); - - ASSERT_TRUE(res.first) << res.second; + auto unsqueeze_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {unsqueeze_axis}); + auto unsqueeze = std::make_shared(input, unsqueeze_const); + auto sigmoid = std::make_shared(unsqueeze); + function = std::make_shared(ngraph::NodeVector{sigmoid}, ngraph::ParameterVector{input}); + manager.register_pass(); } From 65d15756428ddac382c9aa6082b3ef90b37fd51b Mon Sep 17 00:00:00 2001 From: Nikolay Tyukaev Date: Mon, 21 Feb 2022 18:48:29 +0300 Subject: [PATCH 039/310] DOCS: ovms integration (#10528) * ignore model server pages * merge * fixed link to ovms docs * workbench fix Co-authored-by: azaytsev --- docs/conf.py | 6 ++ docs/documentation.md | 2 +- docs/doxygen-xfail.txt | 5 ++ docs/model_server/README.md | 143 --------------------------------- docs/ovsa/ovsa_get_started.md | 2 +- docs/scripts/create_mapping.py | 1 + 6 files changed, 14 insertions(+), 145 deletions(-) delete mode 100644 docs/model_server/README.md diff --git a/docs/conf.py b/docs/conf.py index 692c2302fda..49222306ee0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -108,6 +108,12 @@ repositories = { 'github_repo': 'open_model_zoo', 'github_version': 'master', 'host_url': 'https://github.com' + }, + 'ovms': { + 'github_user': 'openvinotoolkit', + 'github_repo': 'model_server', + 'github_version': 'main', + 'host_url': 'https://github.com' } } diff --git a/docs/documentation.md b/docs/documentation.md index 9a89656a6dd..731c7d7aa13 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -63,7 +63,7 @@ :caption: Add-Ons :hidden: - openvino_docs_ovms + ovms_what_is_openvino_model_server ovsa_get_started .. toctree:: diff --git a/docs/doxygen-xfail.txt b/docs/doxygen-xfail.txt index 80435be0e28..d0be8692508 100644 --- a/docs/doxygen-xfail.txt +++ b/docs/doxygen-xfail.txt @@ -60,4 +60,9 @@ openvino_docs_ie_dg_lpt_variadicsplittransformation.rst openvino_docs_ie_plugin_dg_lp_representation.rst openvino_docs_ie_dg_lpt.rst notebooks/notebook_utils-with-output.rst +ovms_extras_nginx-mtls-auth-readme.rst +ovms_client_python_lib_readme.rst +ovms_docs_shape_batch_layout.rst api/api_reference.rst +workbench/docs/workbench_dg/key_concepts.md +workbench/docs/workbench_dg/run_single_inference.md diff --git a/docs/model_server/README.md b/docs/model_server/README.md deleted file mode 100644 index 6bdd36eae29..00000000000 --- a/docs/model_server/README.md +++ /dev/null @@ -1,143 +0,0 @@ -# OpenVINO™ Model Server {#openvino_docs_ovms} - -OpenVINO™ Model Server (OVMS) is a scalable, high-performance solution for serving machine learning models optimized for Intel® architectures. -The server provides an inference service via gRPC or REST API - making it easy to deploy new algorithms and AI experiments using the same -architecture as [TensorFlow* Serving](https://github.com/tensorflow/serving) for any models trained in a framework that is supported -by [OpenVINO](https://software.intel.com/en-us/openvino-toolkit). - -The server implements gRPC and REST API framework with data serialization and deserialization using TensorFlow Serving API, - and OpenVINO™ as the inference execution provider. Model repositories may reside on a locally accessible file system (for example, NFS), - Google Cloud Storage\* (GCS), Amazon S3\*, MinIO\*, or Azure Blob Storage\*. - -OVMS is now implemented in C++ and provides much higher scalability compared to its predecessor in the Python version. -You can take advantage of all the power of Xeon® CPU capabilities or AI accelerators and expose it over the network interface. -Read the [release notes](https://github.com/openvinotoolkit/model_server/releases) to find out what's new in the C++ version. - -Review the [Architecture Concept](https://github.com/openvinotoolkit/model_server/blob/main/docs/architecture.md) document for more details. - -A few key features: -- Support for multiple frameworks. Serve models trained in popular formats such as Caffe\*, TensorFlow\*, MXNet\*, and ONNX*. -- Deploy new [model versions](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-version-policy) without changing client code. -- Support for AI accelerators including [Intel Movidius Myriad VPUs](../OV_Runtime_UG/supported_plugins/VPU.md), -[GPU](../OV_Runtime_UG/supported_plugins/GPU.md), and [HDDL](../OV_Runtime_UG/supported_plugins/HDDL.md). -- The server can be enabled both on [Bare Metal Hosts](https://github.com/openvinotoolkit/model_server/blob/main/docs/host.md) or in -[Docker* containers](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md). -- [Kubernetes deployments](https://github.com/openvinotoolkit/model_server/blob/main/deploy). The server can be deployed in a Kubernetes cluster allowing the inference service to scale horizontally and ensure high availability. -- [Model reshaping](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md#model-reshaping). The server supports reshaping models in runtime. -- [Model ensemble](https://github.com/openvinotoolkit/model_server/blob/main/docs/ensemble_scheduler.md) (preview). Connect multiple models to deploy complex processing solutions and reduce overhead of sending data back and forth. - -> **NOTE**: OVMS has been tested on CentOS\* and Ubuntu\*. Publicly released [Docker images](https://hub.docker.com/r/openvino/model_server) are based on CentOS. - -## Build OpenVINO Model Server - -1. Go to the root directory of the repository. - -2. Build the Docker image with the command below: -```bash -make docker_build -``` - -The command generates: -* Image tagged as `openvino/model_server:latest` with CPU, NCS, and HDDL support -* Image tagged as `openvino/model_server:latest-gpu` with CPU, NCS, HDDL, and iGPU support -* `.tar.gz` release package with OVMS binary and necessary libraries in the `./dist` directory. - -The release package is compatible with Linux machines on which `glibc` version is greater than or equal to the build image version. -For debugging, the command also generates an image with a suffix `-build`, namely `openvino/model_server-build:latest`. - -> **NOTE**: Images include OpenVINO 2021.1 release. - - -## Run OpenVINO Model Server - -Find a detailed description of how to use the OpenVINO Model Server in the [OVMS Quick Start Guide](https://github.com/openvinotoolkit/model_server/blob/main/docs/ovms_quickstart.md). - - -For more detailed guides on using the Model Server in various scenarios, visit the links below: - -* [Models repository configuration](https://github.com/openvinotoolkit/model_server/blob/main/docs/models_repository.md) - -* [Using a Docker container](https://github.com/openvinotoolkit/model_server/blob/main/docs/docker_container.md) - -* [Landing on bare metal or virtual machine](https://github.com/openvinotoolkit/model_server/blob/main/docs/host.md) - -* [Performance tuning](https://github.com/openvinotoolkit/model_server/blob/main/docs/performance_tuning.md) - -* [Model Ensemble Scheduler](https://github.com/openvinotoolkit/model_server/blob/main/docs/ensemble_scheduler.md) - - -## API Documentation - -### GRPC - -OpenVINO™ Model Server gRPC API is documented in the proto buffer files in [tensorflow_serving_api](https://github.com/tensorflow/serving/tree/r2.2/tensorflow_serving/apis). - -> **NOTE**: The implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available. -> These are the most generic function calls and should address most of the usage scenarios. - -[Predict proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/predict.proto) defines two message specifications: `PredictRequest` and `PredictResponse` used while calling Prediction endpoint. -* `PredictRequest` specifies information about the model spec, that is name and version, and a map of input data serialized via -[TensorProto](https://github.com/tensorflow/tensorflow/blob/r2.2/tensorflow/core/framework/tensor.proto) to a string format. -* `PredictResponse` includes a map of outputs serialized by -[TensorProto](https://github.com/tensorflow/tensorflow/blob/r2.2/tensorflow/core/framework/tensor.proto) and information about the used model spec. - -[Get Model Metadata proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/get_model_metadata.proto) defines three message definitions used while calling Metadata endpoint: - `SignatureDefMap`, `GetModelMetadataRequest`, `GetModelMetadataResponse`. - - A function call `GetModelMetadata` accepts model spec information as input and returns Signature Definition content in the format similar to TensorFlow Serving. - -[Get Model Status proto](https://github.com/tensorflow/serving/blob/r2.2/tensorflow_serving/apis/get_model_status.proto) defines three message definitions used while calling Status endpoint: - `GetModelStatusRequest`, `ModelVersionStatus`, `GetModelStatusResponse` that report all exposed versions including their state in their lifecycle. - -Refer to the [example client code](https://github.com/openvinotoolkit/model_server/blob/main/example_client) to learn how to use this API and submit the requests using the gRPC interface. - -Using the gRPC interface is recommended for optimal performance due to its faster implementation of input data deserialization. It enables you to achieve lower latency, especially with larger input messages like images. - -### REST - -OpenVINO™ Model Server RESTful API follows the documentation from the [TensorFlow Serving REST API](https://www.tensorflow.org/tfx/serving/api_rest). - -Both row and column format of the requests are implemented. - -> **NOTE**: Just like with gRPC, only the implementations for `Predict`, `GetModelMetadata`, and `GetModelStatus` function calls are currently available. - -Only the numerical data types are supported. - -Review the exemplary clients below to find out more how to connect and run inference requests. - -REST API is recommended when the primary goal is in reducing the number of client side Python dependencies and simpler application code. - - -## Known Limitations - -* Currently, `Predict`, `GetModelMetadata`, and `GetModelStatus` calls are implemented using the TensorFlow Serving API. -* `Classify`, `Regress`, and `MultiInference` are not included. -* `Output_filter` is not effective in the `Predict` call. All outputs defined in the model are returned to the clients. - -## OpenVINO Model Server Contribution Policy - -* All contributed code must be compatible with the [Apache 2](https://www.apache.org/licenses/LICENSE-2.0) license. - -* All changes have to pass linter, unit, and functional tests. - -* All new features need to be covered by tests. - - -## References - -* [Speed and Scale AI Inference Operations Across Multiple Architectures - webinar recording](https://techdecoded.intel.io/essentials/speed-and-scale-ai-inference-operations-across-multiple-architectures/) - -* [OpenVINO™](https://software.intel.com/en-us/openvino-toolkit) - -* [TensorFlow Serving](https://github.com/tensorflow/serving) - -* [gRPC](https://grpc.io/) - -* [RESTful API](https://restfulapi.net/) - -* [Inference at Scale in Kubernetes](https://www.intel.ai/inference-at-scale-in-kubernetes) - - - ---- -\* Other names and brands may be claimed as the property of others. diff --git a/docs/ovsa/ovsa_get_started.md b/docs/ovsa/ovsa_get_started.md index 84cc5d47224..08ffe9f1869 100644 --- a/docs/ovsa/ovsa_get_started.md +++ b/docs/ovsa/ovsa_get_started.md @@ -11,7 +11,7 @@ In this release, one person performs the role of both the Model Developer and th ## Overview -The OpenVINO™ Security Add-on works with the [OpenVINO™ Model Server](@ref openvino_docs_ovms) on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. +The OpenVINO™ Security Add-on works with the [OpenVINO™ Model Server](@ref ovms_what_is_openvino_model_server) on Intel® architecture. Together, the OpenVINO™ Security Add-on and the OpenVINO™ Model Server provide a way for Model Developers and Independent Software Vendors to use secure packaging and secure model execution to enable access control to the OpenVINO™ models, and for model Users to run inference within assigned limits. The OpenVINO™ Security Add-on consists of three components that run in Kernel-based Virtual Machines (KVMs). These components provide a way to run security-sensitive operations in an isolated environment. A brief description of the three components are as follows. Click each triangled line for more information about each. diff --git a/docs/scripts/create_mapping.py b/docs/scripts/create_mapping.py index 7fd0bbcb249..4e220591f34 100644 --- a/docs/scripts/create_mapping.py +++ b/docs/scripts/create_mapping.py @@ -11,6 +11,7 @@ REPOSITORIES = [ 'openvino', 'omz', 'pot' + 'ovms' ] From f82533005b5680a4998a687010d38067bb384718 Mon Sep 17 00:00:00 2001 From: Mikhail Nosov Date: Mon, 21 Feb 2022 19:20:23 +0300 Subject: [PATCH 040/310] [OV2.0] Preprocessing documentation (#10451) * [OV2.0] Preprocessing documentation - first draft * Small update * Added ov::Layout overview * Fix code style * Preprocessing details - ~50% done * Corrected links * Fixed comments, added more docs * Minor updates * Couple more links * Fixed comments * Remove 'future' link --- .../OpenVINO_Runtime_User_Guide.md | 1 + docs/OV_Runtime_UG/img/preprocess_not_fit.png | 3 + docs/OV_Runtime_UG/layout_overview.md | 154 ++++++++ docs/OV_Runtime_UG/preprocessing_details.md | 346 ++++++++++++++++++ docs/OV_Runtime_UG/preprocessing_overview.md | 169 +++++++++ docs/glossary.md | 30 +- docs/snippets/ov_layout.cpp | 55 +++ docs/snippets/ov_layout.py | 54 +++ docs/snippets/ov_preprocessing.cpp | 152 ++++++++ docs/snippets/ov_preprocessing.py | 171 +++++++++ src/core/include/openvino/core/layout.hpp | 18 + .../core/preprocess/postprocess_steps.hpp | 36 +- .../core/preprocess/pre_post_process.hpp | 4 - .../core/preprocess/preprocess_steps.hpp | 45 +-- 14 files changed, 1181 insertions(+), 57 deletions(-) create mode 100644 docs/OV_Runtime_UG/img/preprocess_not_fit.png create mode 100644 docs/OV_Runtime_UG/layout_overview.md create mode 100644 docs/OV_Runtime_UG/preprocessing_details.md create mode 100644 docs/OV_Runtime_UG/preprocessing_overview.md create mode 100644 docs/snippets/ov_layout.cpp create mode 100644 docs/snippets/ov_layout.py create mode 100644 docs/snippets/ov_preprocessing.cpp create mode 100644 docs/snippets/ov_preprocessing.py diff --git a/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md b/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md index bbe9c956f45..deaea820d7b 100644 --- a/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md +++ b/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md @@ -10,6 +10,7 @@ openvino_docs_IE_DG_Integrate_with_customer_application_new_API openvino_docs_OV_Runtime_UG_Model_Representation + openvino_docs_OV_Runtime_UG_Preprocessing_Overview openvino_docs_IE_DG_ShapeInference openvino_docs_IE_DG_Device_Plugins diff --git a/docs/OV_Runtime_UG/img/preprocess_not_fit.png b/docs/OV_Runtime_UG/img/preprocess_not_fit.png new file mode 100644 index 00000000000..32a43476707 --- /dev/null +++ b/docs/OV_Runtime_UG/img/preprocess_not_fit.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fed5e153636e3e556e000e3e5fc48b9da8f5a1272490550066d647d306ec24f +size 81575 diff --git a/docs/OV_Runtime_UG/layout_overview.md b/docs/OV_Runtime_UG/layout_overview.md new file mode 100644 index 00000000000..a589f16b9f9 --- /dev/null +++ b/docs/OV_Runtime_UG/layout_overview.md @@ -0,0 +1,154 @@ +# Layout API overview {#openvino_docs_OV_Runtime_UG_Layout_Overview} + +## Introduction + +In few words, with layout `NCHW` it is easier to understand what model's shape `{8, 3, 224, 224}` means. Without layout it is just a 4-dimensional tensor. + + +Concept of layout helps you (and your application) to understand what does each particular dimension of input/output tensor mean. For example, if your input has shape `{1, 3, 720, 1280}` and layout "NCHW" - it is clear that `N(batch) = 1`, `C(channels) = 3`, `H(height) = 720` and `W(width) = 1280`. Without layout information `{1, 3, 720, 1280}` doesn't give any idea to your application what these number mean and how to resize input image to fit model's expectations. + + +Reasons when you may want to care about input/output layout: + - Perform model modification: + - Apply [preprocessing](./preprocessing_overview.md) steps, like subtract means, divide by scales, resize image, convert RGB<->BGR + - Set/get batch for a model + - Same operations, used during model conversion phase, see [Model Optimizer model conversion](../MO_DG/prepare_model/convert_model/Converting_Model.md) + - Improve readability of a model's input and output + +## Layout syntax + +### Short +The easiest way is to fully specify each dimension with one alphabetical letter + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_layout.cpp + :language: cpp + :fragment: [ov:layout:simple] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_layout.py + :language: python + :fragment: [ov:layout:simple] + +@endsphinxdirective + +This assigns 'N' to first dimension, 'C' to second, 'H' to 3rd and 'W' to 4th + +### Advanced +Advanced syntax allows assigning a word to a dimension. To do this, wrap layout with square brackets `[]` and specify each name separated by comma `,` + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_layout.cpp + :language: cpp + :fragment: [ov:layout:complex] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_layout.py + :language: python + :fragment: [ov:layout:complex] + +@endsphinxdirective + + +### Partially defined layout +If some dimension is not important, it's name can be set to `?` + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_layout.cpp + :language: cpp + :fragment: [ov:layout:partially_defined] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_layout.py + :language: python + :fragment: [ov:layout:partially_defined] + +@endsphinxdirective + + +### Dynamic layout +If number of dimensions is not important, ellipsis `...` can be used to specify variadic number of dimensions. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_layout.cpp + :language: cpp + :fragment: [ov:layout:dynamic] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_layout.py + :language: python + :fragment: [ov:layout:dynamic] + +@endsphinxdirective + +### Predefined names + +Layout has pre-defined some widely used in computer vision dimension names: +- N/Batch - batch size +- C/Channels - channels dimension +- D/Depth - depth +- H/Height - height +- W/Width - width + +These names are used in [PreProcessing API](./preprocessing_overview.md) and there is a set of helper functions to get appropriate dimension index from layout + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_layout.cpp + :language: cpp + :fragment: [ov:layout:predefined] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_layout.py + :language: python + :fragment: [ov:layout:predefined] + +@endsphinxdirective + + +### Equality + +Layout names are case-insensitive, which means that ```Layout("NCHW") == Layout("nChW") == Layout("[N,c,H,w]")``` + +### Dump layout + +Layout can be converted to string in advanced syntax format. Can be useful for debugging and serialization purposes + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_layout.cpp + :language: cpp + :fragment: [ov:layout:dump] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_layout.py + :language: python + :fragment: [ov:layout:dump] + +@endsphinxdirective + +## See also + +* ov::Layout C++ class documentation diff --git a/docs/OV_Runtime_UG/preprocessing_details.md b/docs/OV_Runtime_UG/preprocessing_details.md new file mode 100644 index 00000000000..b7fa4e97161 --- /dev/null +++ b/docs/OV_Runtime_UG/preprocessing_details.md @@ -0,0 +1,346 @@ +# Preprocessing API - details {#openvino_docs_OV_Runtime_UG_Preprocessing_Details} + +## Preprocessing capabilities + +### Addressing particular input/output + +If your model has only one input, then simple ov::preprocess::PrePostProcessor::input() will get a reference to preprocessing builder for this input (tensor, steps, model): + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:input_1] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:input_1] + +@endsphinxdirective + +In general, when model has multiple inputs/outputs, each one can be addressed by tensor name + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:input_name] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:input_name] + +@endsphinxdirective + + +Or by it's index + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:input_index] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:input_index] + +@endsphinxdirective + +C++ references: + * ov::preprocess::InputTensorInfo + * ov::preprocess::OutputTensorInfo + * ov::preprocess::PrePostProcessor + + +### Supported preprocessing operations + +C++ references: +* ov::preprocess::PreProcessSteps + +#### Mean/Scale normalization + +Typical data normalization includes 2 operations for each data item: subtract mean value and divide to standard deviation. This can be done with the following code: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:mean_scale] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:mean_scale] + +@endsphinxdirective + + +In Computer Vision area normalization is usually done separately for R, G, B values. To do this, [layout with 'C' dimension](./layout_overview.md) shall be defined. Example: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:mean_scale_array] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:mean_scale_array] + +@endsphinxdirective + +C++ references: +* ov::preprocess::PreProcessSteps::mean() +* ov::preprocess::PreProcessSteps::scale() + + +#### Convert precision + +In Computer Vision, image is represented by array of unsigned 8-but integer values (for each color), but model accepts floating point tensors + +To integrate precision conversion into execution graph as a preprocessing step, just do: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:convert_element_type] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:convert_element_type] + +@endsphinxdirective + +C++ references: + * ov::preprocess::InputTensorInfo::set_element_type() + * ov::preprocess::PreProcessSteps::convert_element_type() + + +#### Convert layout (transpose) + +Transposing of matrices/tensors is a typical operation in Deep Learning - you may have a BMP image 640x480 which is an array of `{480, 640, 3}` elements, but Deep Learning model can require input with shape `{1, 3, 480, 640}` + +Using [layout](./layout_overview.md) of user's tensor and layout of original model conversion can be done implicitly + +@sphinxdirective +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:convert_layout] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:convert_layout] + +@endsphinxdirective + + +Or if you prefer manual transpose of axes without usage of [layout](./layout_overview.md) in your code, just do: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:convert_layout_2] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:convert_layout_2] + +@endsphinxdirective + +It performs the same transpose, but we believe that approach using source and destination layout can be easier to read and understand + +C++ references: + * ov::preprocess::PreProcessSteps::convert_layout() + * ov::preprocess::InputTensorInfo::set_layout() + * ov::preprocess::InputModelInfo::set_layout() + * ov::Layout + +#### Resize image + +Resizing of image is a typical preprocessing step for computer vision tasks. With preprocessing API this step can also be integrated into execution graph and performed on target device. + +To resize the input image, it is needed to define `H` and `W` dimensions of [layout](./layout_overview.md) + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:resize_1] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:resize_1] + +@endsphinxdirective + +Or in case if original model has known spatial dimensions (widht+height), target width/height can be omitted + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:resize_2] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:resize_2] + +@endsphinxdirective + +C++ references: +* ov::preprocess::PreProcessSteps::resize() +* ov::preprocess::ResizeAlgorithm + + +#### Color conversion + +Typical use case is to reverse color channels from RGB to BGR and wise versa. To do this, specify source color format in `tensor` section and perform `convert_color` preprocessing operation. In example below, user has `BGR` image and needs to convert it to `RGB` as required for model's input + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:convert_color_1] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:convert_color_1] + +@endsphinxdirective + +#### Color conversion - NV12/I420 +Preprocessing also support YUV-family source color formats, i.e. NV12 and I420. +In advanced cases such YUV images can be splitted into separate planes, e.g. for NV12 images Y-component may come from one source and UV-component comes from another source. Concatenating such components in user's application manually is not a perfect solution from performance and device utilization perspectives, so there is a way to use Preprocessing API. For such cases there is `NV12_TWO_PLANES` and `I420_THREE_PLANES` source color formats, which will split original `input` to 2 or 3 inputs + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:convert_color_2] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:convert_color_2] + +@endsphinxdirective + +In this example, original `input` is being split to `input/y` and `input/uv` inputs. You can fill `input/y` from one source, and `input/uv` from another source. Color conversion to `RGB` will be performed using these sources, it is more optimal as there will be no additional copies of NV12 buffers. + +C++ references: +* ov::preprocess::ColorFormat +* ov::preprocess::PreProcessSteps::convert_color + + +### Custom operations + +Preprocessing API also allows adding custom preprocessing steps into execution graph. Custom step is a function which accepts current 'input' node and returns new node after adding preprocessing step + +> **Note:** Custom preprocessing function shall only insert node(s) after input, it will be done during model compilation. This function will NOT be called during execution phase. This may look not trivial and require some knowledge of [OpenVINO™ operations](../ops/opset.md) + +If there is a need to insert some additional operations to execution graph right after input, like some specific crops and/or resizes - Preprocessing API can be a good choice to implement this + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:custom] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:custom] + +@endsphinxdirective + +C++ references: +* ov::preprocess::PreProcessSteps::custom() +* [Available Operations Sets](../ops/opset.md) + +## Postprocessing + +Postprocessing steps can be added to model outputs. As for preprocessing, these steps will be also integrated into graph and executed on selected device. + +Preprocessing uses flow **User tensor** -> **Steps** -> **Model input** + +Postprocessing is wise versa: **Model output** -> **Steps** -> **User tensor** + +Comparing to preprocessing, there is not so much operations needed to do in post-processing stage, so right now only following postprocessing operations are supported: + - Convert [layout](./layout_overview.md) + - Convert element type + - Custom operations + +Usage of these operations is similar to Preprocessing. Some example is shown below: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:postprocess] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:postprocess] + +@endsphinxdirective + +C++ references: +* ov::preprocess::PostProcessSteps +* ov::preprocess::OutputModelInfo +* ov::preprocess::OutputTensorInfo diff --git a/docs/OV_Runtime_UG/preprocessing_overview.md b/docs/OV_Runtime_UG/preprocessing_overview.md new file mode 100644 index 00000000000..bd35bdef429 --- /dev/null +++ b/docs/OV_Runtime_UG/preprocessing_overview.md @@ -0,0 +1,169 @@ +# Overview of Preprocessing API {#openvino_docs_OV_Runtime_UG_Preprocessing_Overview} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_OV_Runtime_UG_Preprocessing_Details + openvino_docs_OV_Runtime_UG_Layout_Overview + +@endsphinxdirective + +## Introduction + +When your input data don't perfectly fit to Neural Network model input tensor - this means that additional operations/steps are needed to transform your data to format expected by model. These operations are known as "preprocessing". + +### Example +Consider the following standard example: deep learning model expects input with shape `{1, 3, 224, 224}`, `FP32` precision, `RGB` color channels order, and requires data normalization (subtract mean and divide by scale factor). But you have just a `640x480` `BGR` image (data is `{480, 640, 3}`). This means that we need some operations which will: + - Convert U8 buffer to FP32 + - Transform to `planar` format: from `{1, 480, 640, 3}` to `{1, 3, 480, 640}` + - Resize image from 640x480 to 224x224 + - Make `BGR->RGB` conversion as model expects `RGB` + - For each pixel, subtract mean values and divide by scale factor + + +![](img/preprocess_not_fit.png) + + +Even though all these steps can be relatively easy implemented manually in application's code before actual inference, it is possible to do it with Preprocessing API. Reasons to use this API are: + - Preprocessing API is easy to use + - Preprocessing steps will be integrated into execution graph and will be performed on selected device (CPU/GPU/VPU/etc.) rather than always being executed on CPU. This will improve selected device utilization which is always good. + +## Preprocessing API + +Intuitively, Preprocessing API consists of the following parts: + 1. **Tensor:** Declare user's data format, like shape, [layout](./layout_overview.md), precision, color format of actual user's data + 2. **Steps:** Describe sequence of preprocessing steps which need to be applied to user's data + 3. **Model:** Specify Model's data format. Usually, precision and shape are already known for model, only additional information, like [layout](./layout_overview.md) can be specified + +> **Note:** All model's graph modification shall be performed after model is read from disk and **before** it is being loaded on actual device. + +### PrePostProcessor object + +`ov::preprocess::PrePostProcessor` class allows specifying preprocessing and postprocessing steps for model read from disk. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:create] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:create] + +@endsphinxdirective + +### Declare user's data format + +To address particular input of model/preprocessor, use `ov::preprocess::PrePostProcessor::input(input_name)` method + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:tensor] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:tensor] + +@endsphinxdirective + + +Here we've specified all information about user's input: + - Precision is U8 (unsigned 8-bit integer) + - Data represents tensor with {1,480,640,3} shape + - [Layout](./layout_overview.md) is "NHWC". It means that 'height=480, width=640, channels=3' + - Color format is `BGR` + +### Declare model's layout + +Model's input already has information about precision and shape. Preprocessing API is not intended to modify this. The only thing that may be specified is input's data [layout](./layout_overview.md) + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:model] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:model] + +@endsphinxdirective + + +Now, if model's input has `{1,3,224,224}` shape, preprocessing will be able to identify that model's `height=224`, `width=224`, `channels=3`. Height/width information is necessary for 'resize', and `channels` is needed for mean/scale normalization + +### Preprocessing steps + +Now we can define sequence of preprocessing steps: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:steps] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:steps] + +@endsphinxdirective + +Here: + - Convert U8 to FP32 precision + - Convert current color format (BGR) to RGB + - Resize to model's height/width. **Note** that if model accepts dynamic size, e.g. {?, 3, ?, ?}, `resize` will not know how to resize the picture, so in this case you should specify target height/width on this step. See also ov::preprocess::PreProcessSteps::resize() + - Subtract mean from each channel. On this step, color format is RGB already, so `100.5` will be subtracted from each Red component, and `101.5` will be subtracted from `Blue` one. + - Divide each pixel data to appropriate scale value. In this example each `Red` component will be divided by 50, `Green` by 51, `Blue` by 52 respectively + - **Note:** last `convert_layout` step is commented out as it is not necessary to specify last layout conversion. PrePostProcessor will do such conversion automatically + +### Integrate steps into model + +We've finished with preprocessing steps declaration, now it is time to build it. For debugging purposes it is possible to print `PrePostProcessor` configuration on screen: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp + :language: cpp + :fragment: [ov:preprocess:build] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_preprocessing.py + :language: python + :fragment: [ov:preprocess:build] + +@endsphinxdirective + + +After this, `model` will accept U8 input with `{1, 480, 640, 3}` shape, with `BGR` channels order. All conversion steps will be integrated into execution graph. Now you can load model on device and pass your image to model as is, without any data manipulation on application's side + + +## See Also + +* [Preprocessing Details](./preprocessing_details.md) +* [Layout API overview](./layout_overview.md) +* ov::preprocess::PrePostProcessor C++ class documentation diff --git a/docs/glossary.md b/docs/glossary.md index d5b17d7af8c..5e168add190 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -56,21 +56,21 @@ Glossary of terms used in the OpenVINO™ -| Term | Description | -| :--- | :--- | -| Batch | Number of images to analyze during one call of infer. Maximum batch size is a property of the network and it is set before loading of the network to the plugin. In NHWC, NCHW and NCDHW image data layout representation, the N refers to the number of images in the batch | -| Tensor | Memory container used for storing inputs, outputs of the network, weights and biases of the layers | -| Device (Affinitity) | A preferred Intel(R) hardware device to run the inference (CPU, GPU, etc.) | -| Extensibility mechanism, Custom layers | The mechanism that provides you with capabilities to extend the OpenVINO™ Runtime and Model Optimizer so that they can work with topologies containing layers that are not yet supported | -| ov::Model | A class of the Model that OpenVINO™ Runtime reads from IR. Consists of topology, weights and biases | -| ov::CompiledModel | An instance of the loaded network which allows the OpenVINO™ Runtime to request (several) infer requests and perform inference synchronously or asynchronously | -| InferRequest | A class that represents the end point of inference on the model loaded to the plugin and represented by executable network. Inputs are set here, outputs should be requested from this interface as well | -| ov::ProfileInfo | Represents basic inference profiling information per layer | -| OpenVINO™ Runtime | A C++ library with a set of classes that you can use in your application to infer input data (images) and get the result | -| OpenVINO™ API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation, set input and output formats and execute the model on various devices | -| OpenVINO™ Core | OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, MYRIAD, GNA, etc. | -| ov::Layout | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch | -| ov::element::Type | Represents data element type. For example, f32 is 32-bit floating point, f16 is 16-bit floating point. Element type can be changed before loading the network to the plugin | +| Term | Description | +| :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Batch | Number of images to analyze during one call of infer. Maximum batch size is a property of the network and it is set before loading of the network to the plugin. In NHWC, NCHW and NCDHW image data layout representation, the N refers to the number of images in the batch | +| Tensor | Memory container used for storing inputs, outputs of the network, weights and biases of the layers | +| Device (Affinitity) | A preferred Intel(R) hardware device to run the inference (CPU, GPU, etc.) | +| Extensibility mechanism, Custom layers | The mechanism that provides you with capabilities to extend the OpenVINO™ Runtime and Model Optimizer so that they can work with topologies containing layers that are not yet supported | +| ov::Model | A class of the Model that OpenVINO™ Runtime reads from IR. Consists of topology, weights and biases | +| ov::CompiledModel | An instance of the loaded network which allows the OpenVINO™ Runtime to request (several) infer requests and perform inference synchronously or asynchronously | +| InferRequest | A class that represents the end point of inference on the model loaded to the plugin and represented by executable network. Inputs are set here, outputs should be requested from this interface as well | +| ov::ProfileInfo | Represents basic inference profiling information per layer | +| OpenVINO™ Runtime | A C++ library with a set of classes that you can use in your application to infer input data (images) and get the result | +| OpenVINO™ API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation, set input and output formats and execute the model on various devices | +| OpenVINO™ Core | OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, MYRIAD, GNA, etc. | +| ov::Layout | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch. See also [Layout API Overview](./OV_Runtime_UG/layout_overview.md) | +| ov::element::Type | Represents data element type. For example, f32 is 32-bit floating point, f16 is 16-bit floating point. Element type can be changed before loading the network to the plugin | ## See Also diff --git a/docs/snippets/ov_layout.cpp b/docs/snippets/ov_layout.cpp new file mode 100644 index 00000000000..689096ce32d --- /dev/null +++ b/docs/snippets/ov_layout.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include + +int main() { + ov::Layout layout; + //! [ov:layout:simple] + layout = ov::Layout("NHWC"); + //! [ov:layout:simple] + //! [ov:layout:complex] + // Each dimension has name separated by comma, layout is wrapped with square brackets + layout = ov::Layout("[time,temperature,humidity]"); + //! [ov:layout:complex] + //! [ov:layout:partially_defined] + // First dimension is batch, 4th is 'channels'. Others are not important for us + layout = ov::Layout("N??C"); + // Or the same using advanced syntax + layout = ov::Layout("[n,?,?,c]"); + //! [ov:layout:partially_defined] + //! [ov:layout:dynamic] + // First dimension is 'batch' others are whatever + layout = ov::Layout("N..."); + + // Second dimension is 'channels' others are whatever + layout = ov::Layout("?C..."); + + // Last dimension is 'channels' others are whatever + layout = ov::Layout("...C"); + //! [ov:layout:dynamic] + + //! [ov:layout:predefined] + // returns 0 for batch + ov::layout::batch_idx("NCDHW"); + + // returns 1 for channels + ov::layout::channels_idx("NCDHW"); + + // returns 2 for depth + ov::layout::depth_idx("NCDHW"); + + // returns -2 for height + ov::layout::height_idx("...HW"); + + // returns -1 for width + ov::layout::width_idx("...HW"); + //! [ov:layout:predefined] + + //! [ov:layout:dump] + layout = ov::Layout("NCHW"); + std::cout << layout.to_string(); // prints [N,C,H,W] + //! [ov:layout:dump] + + return 0; +} diff --git a/docs/snippets/ov_layout.py b/docs/snippets/ov_layout.py new file mode 100644 index 00000000000..689937b6c87 --- /dev/null +++ b/docs/snippets/ov_layout.py @@ -0,0 +1,54 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# ! [ov:layout:simple] +from openvino.runtime import Layout +layout = Layout('NCHW') +# ! [ov:layout:simple] +# ! [ov:layout:complex] +# Each dimension has name separated by comma +# Layout is wrapped with square brackets +layout = Layout('[time,temperature,humidity]') +# ! [ov:layout:complex] +# ! [ov:layout:partially_defined] +# First dimension is batch, 4th is 'channels'. +# Others are not important for us +layout = Layout('N??C') + +# Or the same using advanced syntax +layout = Layout('[n,?,?,c]') +# ! [ov:layout:partially_defined] +# ! [ov:layout:dynamic] +# First dimension is 'batch' others are whatever +layout = Layout('N...') + +# Second dimension is 'channels' others are whatever +layout = Layout('?C...') + +# Last dimension is 'channels' others are whatever +layout = Layout('...C') +# ! [ov:layout:dynamic] + +# ! [ov:layout:predefined] +from openvino.runtime import layout_helpers +# returns 0 for batch +layout_helpers.batch_idx(Layout('NCDHW')) + +# returns 1 for channels +layout_helpers.channels_idx(Layout('NCDHW')) + +# returns 2 for depth +layout_helpers.depth_idx(Layout('NCDHW')) + +# returns -2 for height +layout_helpers.height_idx(Layout('...HW')) + +# returns -1 for width +layout_helpers.width_idx(Layout('...HW')) +# ! [ov:layout:predefined] + +# ! [ov:layout:dump] +layout = Layout('NCHW') +print(layout) # prints [N,C,H,W] +# ! [ov:layout:dump] diff --git a/docs/snippets/ov_preprocessing.cpp b/docs/snippets/ov_preprocessing.cpp new file mode 100644 index 00000000000..42c277c567a --- /dev/null +++ b/docs/snippets/ov_preprocessing.cpp @@ -0,0 +1,152 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include +#include + +void ppp_input_1(ov::preprocess::PrePostProcessor& ppp) { + //! [ov:preprocess:input_1] + ppp.input() // no index/name is needed if model has one input + .preprocess().scale(50.f); + + ppp.output() // same for output + .postprocess().convert_element_type(ov::element::u8); + //! [ov:preprocess:input_1] + //! [ov:preprocess:mean_scale] + ppp.input("input").preprocess().mean(128).scale(127); + //! [ov:preprocess:mean_scale] + //! [ov:preprocess:mean_scale_array] + // Suppose model's shape is {1, 3, 224, 224} + ppp.input("input").model().set_layout("NCHW"); // N=1, C=3, H=224, W=224 + // Mean/Scale has 3 values which matches with C=3 + ppp.input("input").preprocess() + .mean({103.94, 116.78, 123.68}).scale({57.21, 57.45, 57.73}); + //! [ov:preprocess:mean_scale_array] + //! [ov:preprocess:convert_element_type] + // First define data type for your tensor + ppp.input("input").tensor().set_element_type(ov::element::u8); + + // Then define preprocessing step + ppp.input("input").preprocess().convert_element_type(ov::element::f32); + + // If conversion is needed to `model's` element type, 'f32' can be omitted + ppp.input("input").preprocess().convert_element_type(); + //! [ov:preprocess:convert_element_type] + //! [ov:preprocess:convert_layout] + // First define layout for your tensor + ppp.input("input").tensor().set_layout("NHWC"); + + // Then define layout of model + ppp.input("input").model().set_layout("NCHW"); + + std::cout << ppp; // Will print 'implicit layout conversion step' + //! [ov:preprocess:convert_layout] + //! [ov:preprocess:convert_layout_2] + ppp.input("input").tensor().set_shape({1, 480, 640, 3}); + // Model expects shape {1, 3, 480, 640} + ppp.input("input").preprocess().convert_layout({0, 3, 1, 2}); + // 0 -> 0; 3 -> 1; 1 -> 2; 2 -> 3 + //! [ov:preprocess:convert_layout_2] + + //! [ov:preprocess:resize_1] + ppp.input("input").tensor().set_shape({1, 3, 960, 1280}); + ppp.input("input").model().set_layout("??HW"); + ppp.input("input").preprocess().resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR, 480, 640); + //! [ov:preprocess:resize_1] + //! [ov:preprocess:resize_2] + ppp.input("input").tensor().set_shape({1, 3, 960, 1280}); + ppp.input("input").model().set_layout("??HW"); // Model accepts {1, 3, 480, 640} shape + // Resize to model's dimension + ppp.input("input").preprocess().resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + //! [ov:preprocess:resize_2] + + //! [ov:preprocess:convert_color_1] + ppp.input("input").tensor().set_color_format(ov::preprocess::ColorFormat::BGR); + ppp.input("input").preprocess().convert_color(ov::preprocess::ColorFormat::RGB); + //! [ov:preprocess:convert_color_1] + //! [ov:preprocess:convert_color_2] + // This will split original `input` to 2 separate inputs: `input/y' and 'input/uv' + ppp.input("input").tensor().set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES); + ppp.input("input").preprocess().convert_color(ov::preprocess::ColorFormat::RGB); + std::cout << ppp; // Dump preprocessing steps to see what will happen + //! [ov:preprocess:convert_color_2] +} + +void ppp_input_2(ov::preprocess::PrePostProcessor& ppp) { + //! [ov:preprocess:input_index] + auto &input_1 = ppp.input(1); // Gets 2nd input in a model + auto &output_1 = ppp.output(2); // Get output with index=2 (3rd one) in a model + //! [ov:preprocess:input_index] +} + +void ppp_input_name(ov::preprocess::PrePostProcessor& ppp) { + //! [ov:preprocess:input_name] + auto &input_image = ppp.input("image"); + auto &output_result = ppp.output("result"); + //! [ov:preprocess:input_name] +} + +int main() { + std::string model_path; + std::string input_name; + //! [ov:preprocess:create] + ov::Core core; + std::shared_ptr model = core.read_model(model_path); + ov::preprocess::PrePostProcessor ppp(model); + //! [ov:preprocess:create] + + //! [ov:preprocess:tensor] + ov::preprocess::InputInfo& input = ppp.input(input_name); + input.tensor() + .set_element_type(ov::element::u8) + .set_shape({1, 480, 640, 3}) + .set_layout("NHWC") + .set_color_format(ov::preprocess::ColorFormat::BGR); + //! [ov:preprocess:tensor] + //! [ov:preprocess:model] + // `model's input` already `knows` it's shape and data type, no need to specify them here + input.model().set_layout("NCHW"); + //! [ov:preprocess:model] + //! [ov:preprocess:steps] + input.preprocess() + .convert_element_type(ov::element::f32) + .convert_color(ov::preprocess::ColorFormat::RGB) + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR) + .mean({100.5, 101, 101.5}) + .scale({50., 51., 52.}); + // Not needed, such conversion will be added implicitly + // .convert_layout("NCHW"); + //! [ov:preprocess:steps] + //! [ov:preprocess:custom] + ppp.input("input_image").preprocess() + .custom([](const ov::Output& node) { + // Custom nodes can be inserted as Pre-processing steps + return std::make_shared(node); + }); + //! [ov:preprocess:custom] + //! [ov:preprocess:postprocess] + // Model's output has 'NCHW' layout + ppp.output("result_image").model().set_layout("NCHW"); + + // Set target user's tensor to U8 type + 'NHWC' layout + // Precision & layout conversions will be done implicitly + ppp.output("result_image").tensor() + .set_layout("NHWC") + .set_element_type(ov::element::u8); + + // Also it is possible to insert some custom operations + ppp.output("result_image").postprocess() + .custom([](const ov::Output& node) { + // Custom nodes can be inserted as Post-processing steps + return std::make_shared(node); + }); + //! [ov:preprocess:postprocess] + //! [ov:preprocess:build] + std::cout << "Dump preprocessor: " << ppp << std::endl; + model = ppp.build(); + //! [ov:preprocess:build] + + OPENVINO_ASSERT(model, "Model is invalid"); + return 0; +} diff --git a/docs/snippets/ov_preprocessing.py b/docs/snippets/ov_preprocessing.py new file mode 100644 index 00000000000..db27f14a171 --- /dev/null +++ b/docs/snippets/ov_preprocessing.py @@ -0,0 +1,171 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +from openvino.preprocess import ResizeAlgorithm, ColorFormat +from openvino.runtime import Layout, Type + + +xml_path = '' +input_name = '' +# ! [ov:preprocess:create] +from openvino.preprocess import PrePostProcessor +from openvino.runtime import Core + +core = Core() +model = core.read_model(model=xml_path) +ppp = PrePostProcessor(model) +# ! [ov:preprocess:create] + +# ! [ov:preprocess:tensor] +from openvino.preprocess import ColorFormat +from openvino.runtime import Layout, Type +ppp.input(input_name).tensor() \ + .set_element_type(Type.u8) \ + .set_shape([1, 480, 640, 3]) \ + .set_layout(Layout('NHWC')) \ + .set_color_format(ColorFormat.BGR) +# ! [ov:preprocess:tensor] +# ! [ov:preprocess:model] +# `model's input` already `knows` it's shape and data type, no need to specify them here +ppp.input(input_name).model().set_layout(Layout('NCHW')) +# ! [ov:preprocess:model] +# ! [ov:preprocess:steps] +from openvino.preprocess import ResizeAlgorithm +ppp.input(input_name).preprocess() \ + .convert_element_type(Type.f32) \ + .convert_color(ColorFormat.RGB) \ + .resize(ResizeAlgorithm.RESIZE_LINEAR) \ + .mean([100.5, 101, 101.5]) \ + .scale([50., 51., 52.]) +# .convert_layout(Layout('NCHW')); # Not needed, such conversion will be added implicitly +# ! [ov:preprocess:steps] +# ! [ov:preprocess:build] +print(f'Dump preprocessor: {ppp}') +model = ppp.build() +# ! [ov:preprocess:build] + +# ! [ov:preprocess:input_index] +ppp.input(1) # Gets 2nd input in a model +ppp.output(2) # Gets output with index=2 (3rd one) in a model +# ! [ov:preprocess:input_index] + + +# ! [ov:preprocess:input_name] +ppp.input('image') +ppp.output('result') +# ! [ov:preprocess:input_name] + +# ! [ov:preprocess:input_1] +# no index/name is needed if model has one input +ppp.input().preprocess().scale(50.) + +# same for output +ppp.output() \ + .postprocess().convert_element_type(Type.u8) +# ! [ov:preprocess:input_1] +# ! [ov:preprocess:mean_scale] +ppp.input('input').preprocess().mean(128).scale(127) +# ! [ov:preprocess:mean_scale] +# ! [ov:preprocess:mean_scale_array] +# Suppose model's shape is {1, 3, 224, 224} +# N=1, C=3, H=224, W=224 +ppp.input('input').model().set_layout(Layout('NCHW')) +# Mean/Scale has 3 values which matches with C=3 +ppp.input('input').preprocess() \ + .mean([103.94, 116.78, 123.68]).scale([57.21, 57.45, 57.73]) +# ! [ov:preprocess:mean_scale_array] +# ! [ov:preprocess:convert_element_type] +# First define data type for your tensor +ppp.input('input').tensor().set_element_type(Type.u8) + +# Then define preprocessing step +ppp.input('input').preprocess().convert_element_type(Type.f32) + +# If conversion is needed to `model's` element type, 'f32' can be omitted +ppp.input('input').preprocess().convert_element_type() +# ! [ov:preprocess:convert_element_type] +# ! [ov:preprocess:convert_layout] +# First define layout for your tensor +ppp.input('input').tensor().set_layout(Layout('NHWC')) + +# Then define layout of model +ppp.input('input').model().set_layout(Layout('NCHW')) + +print(ppp) # Will print 'implicit layout conversion step' +# ! [ov:preprocess:convert_layout] +# ! [ov:preprocess:convert_layout_2] +ppp.input('input').tensor().set_shape([1, 480, 640, 3]) + +# Model expects shape {1, 3, 480, 640} +ppp.input('input').preprocess()\ + .convert_layout([0, 3, 1, 2]) +# 0 -> 0; 3 -> 1; 1 -> 2; 2 -> 3 +# ! [ov:preprocess:convert_layout_2] + +# ! [ov:preprocess:resize_1] +ppp.input('input').tensor().set_shape([1, 3, 960, 1280]) +ppp.input('input').model().set_layout(Layout('??HW')) +ppp.input('input').preprocess()\ + .resize(ResizeAlgorithm.RESIZE_LINEAR, 480, 640) +# ! [ov:preprocess:resize_1] +# ! [ov:preprocess:resize_2] +ppp.input('input').tensor().set_shape([1, 3, 960, 1280]) +# Model accepts {1, 3, 480, 640} shape, thus last dimensions are 'H' and 'W' +ppp.input('input').model().set_layout(Layout('??HW')) +# Resize to model's dimension +ppp.input('input').preprocess().resize(ResizeAlgorithm.RESIZE_LINEAR) +# ! [ov:preprocess:resize_2] +# ! [ov:preprocess:convert_color_1] +ppp.input('input').tensor().set_color_format(ColorFormat.BGR) + +ppp.input('input').preprocess().convert_color(ColorFormat.RGB) +# ! [ov:preprocess:convert_color_1] +# ! [ov:preprocess:convert_color_2] +# This will split original `input` to 2 separate inputs: `input/y' and 'input/uv' +ppp.input('input').tensor()\ + .set_color_format(ColorFormat.NV12_TWO_PLANES) + +ppp.input('input').preprocess()\ + .convert_color(ColorFormat.RGB) +print(ppp) # Dump preprocessing steps to see what will happen +# ! [ov:preprocess:convert_color_2] + +# ! [ov:preprocess:custom] +# It is possible to insert some custom operations +import openvino.runtime.opset8 as ops +from openvino.runtime import Output +from openvino.runtime.utils.decorators import custom_preprocess_function + +@custom_preprocess_function +def custom_abs(output: Output): + # Custom nodes can be inserted as Preprocessing steps + return ops.abs(output) + +ppp.input("input_image").preprocess() \ + .custom(custom_abs) +# ! [ov:preprocess:custom] + +# ! [ov:preprocess:postprocess] +# Model's output has 'NCHW' layout +ppp.output('result_image').model().set_layout(Layout('NCHW')) + +# Set target user's tensor to U8 type + 'NHWC' layout +# Precision & layout conversions will be done implicitly +ppp.output('result_image').tensor()\ + .set_layout(Layout("NHWC"))\ + .set_element_type(Type.u8) + +# Also it is possible to insert some custom operations +import openvino.runtime.opset8 as ops +from openvino.runtime import Output +from openvino.runtime.utils.decorators import custom_preprocess_function + +@custom_preprocess_function +def custom_abs(output: Output): + # Custom nodes can be inserted as Post-processing steps + return ops.abs(output) + +ppp.output("result_image").postprocess()\ + .custom(custom_abs) +# ! [ov:preprocess:postprocess] diff --git a/src/core/include/openvino/core/layout.hpp b/src/core/include/openvino/core/layout.hpp index f15bb69065c..12135cefb1a 100644 --- a/src/core/include/openvino/core/layout.hpp +++ b/src/core/include/openvino/core/layout.hpp @@ -16,6 +16,23 @@ namespace ov { +/// \brief ov::Layout represents the text information of tensor's dimensions/axes. E.g. layout `NCHW` means that 4D +/// tensor `{-1, 3, 480, 640}` will have: +/// - 0: `N = -1`: batch dimension is dynamic +/// - 1: `C = 3`: number of channels is '3' +/// - 2: `H = 480`: image height is 480 +/// - 3: `W = 640`: image width is 640 +/// +/// Examples: `ov::Layout` can be specified for: +/// - Preprocessing purposes. E.g. +/// - To apply normalization (means/scales) it is usually required to set 'C' dimension in a layout. +/// - To resize the image to specified width/height it is needed to set 'H' and 'W' dimensions in a layout +/// - To transpose image - source and target layout can be set (see +/// `ov::preprocess::PreProcessSteps::convert_layout`) +/// - To set/get model's batch (see `ov::get_batch`/`ov::set_batch') it is required in general to specify 'N' dimension +/// in layout for appropriate inputs +/// +/// Refer also to `ov::layout` namespace for various additional helper functions of `ov::Layout` class OPENVINO_API Layout { public: /// \brief Constructs a dynamic Layout with no layout information. @@ -61,6 +78,7 @@ public: /// \brief String representation of Layout std::string to_string() const; + /// \brief Returns 'true' if layout has no information, i.e. equals to Layout() bool empty() const { return *this == Layout(); } diff --git a/src/core/include/openvino/core/preprocess/postprocess_steps.hpp b/src/core/include/openvino/core/preprocess/postprocess_steps.hpp index dbf1dccdb9c..fd14d3ff845 100644 --- a/src/core/include/openvino/core/preprocess/postprocess_steps.hpp +++ b/src/core/include/openvino/core/preprocess/postprocess_steps.hpp @@ -42,37 +42,39 @@ public: /// \brief Add 'convert layout' operation to specified layout. /// - /// \details Adds appropriate 'transpose' operation between model layout and user's desired layout. - /// Current implementation requires source and destination layout to have same number of dimensions - /// - /// \example Example: when model data has output in 'NCHW' layout ([1, 3, 224, 224]) but user needs - /// interleaved output image ('NHWC', [1, 224, 224, 3]). Post-processing may look like this: - /// - /// \code{.cpp} auto proc = PrePostProcessor(function); - /// proc.output().model(OutputTensorInfo().set_layout("NCHW"); // model output is NCHW - /// proc.output().postprocess().convert_layout("NHWC"); // User needs output as NHWC - /// \endcode - /// /// \param dst_layout New layout after conversion. If not specified - destination layout is obtained from /// appropriate tensor output properties. /// /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + /// + /// Adds appropriate 'transpose' operation between model layout and user's desired layout. + /// Current implementation requires source and destination layout to have same number of dimensions + /// + /// Example: when model data has output in 'NCHW' layout ([1, 3, 224, 224]) but user needs + /// interleaved output image ('NHWC', [1, 224, 224, 3]). Post-processing may look like this: + /// + /// \code{.cpp} + /// + /// auto proc = PrePostProcessor(function); + /// proc.output().model(OutputTensorInfo().set_layout("NCHW"); // model output is NCHW + /// proc.output().postprocess().convert_layout("NHWC"); // User needs output as NHWC + /// \endcode PostProcessSteps& convert_layout(const Layout& dst_layout = {}); /// \brief Add convert layout operation by direct specification of transposed dimensions. /// - /// \example Example: model produces output with shape [1, 3, 480, 640] and user's needs + /// \param dims Dimensions array specifying places for new axis. If not empty, array size (N) must match to input + /// shape rank. Array values shall contain all values from 0 to N-1. If empty, no actual conversion will be added. + /// + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + /// + /// Example: model produces output with shape [1, 3, 480, 640] and user's needs /// interleaved output image [1, 480, 640, 3]. Post-processing may look like this: /// /// \code{.cpp} auto proc = PrePostProcessor(function); /// proc.output().postprocess().convert_layout({0, 2, 3, 1}); /// function = proc.build(); /// \endcode - /// - /// \param dims Dimensions array specifying places for new axis. If not empty, array size (N) must match to input - /// shape rank. Array values shall contain all values from 0 to N-1. If empty, no actual conversion will be added. - /// - /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. PostProcessSteps& convert_layout(const std::vector& dims); /// \brief Signature for custom postprocessing operation. Custom postprocessing operation takes one output node and diff --git a/src/core/include/openvino/core/preprocess/pre_post_process.hpp b/src/core/include/openvino/core/preprocess/pre_post_process.hpp index b4bd49614af..09a401511f9 100644 --- a/src/core/include/openvino/core/preprocess/pre_post_process.hpp +++ b/src/core/include/openvino/core/preprocess/pre_post_process.hpp @@ -15,10 +15,6 @@ class Model; namespace preprocess { /// \brief Main class for adding pre- and post- processing steps to existing ov::Model -/// API has Builder-like style to allow chaining calls in client's code, like -/// \code{.cpp} -/// auto proc = PrePostProcessor(function).input().input(); -/// \endcode /// /// This is a helper class for writing easy pre- and post- processing operations on ov::Model object assuming that /// any preprocess operation takes one input and produces one output. diff --git a/src/core/include/openvino/core/preprocess/preprocess_steps.hpp b/src/core/include/openvino/core/preprocess/preprocess_steps.hpp index 2809221aee3..91529179d36 100644 --- a/src/core/include/openvino/core/preprocess/preprocess_steps.hpp +++ b/src/core/include/openvino/core/preprocess/preprocess_steps.hpp @@ -117,44 +117,48 @@ public: /// \brief Add 'convert layout' operation to specified layout. /// - /// \details Adds appropriate 'transpose' operation between user layout and target layout. - /// Current implementation requires source and destination layout to have same number of dimensions - /// - /// \example Example: when user data has 'NHWC' layout (example is RGB image, [1, 224, 224, 3]) but model expects - /// planar input image ('NCHW', [1, 3, 224, 224]). Preprocessing may look like this: - /// - /// \code{.cpp} auto proc = PrePostProcessor(function); - /// proc.input().tensor().set_layout("NHWC"); // User data is NHWC - /// proc.input().preprocess().convert_layout("NCHW")) // model expects input as NCHW - /// \endcode - /// /// \param dst_layout New layout after conversion. If not specified - destination layout is obtained from /// appropriate model input properties. /// /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + /// + /// Adds appropriate 'transpose' operation between user layout and target layout. + /// Current implementation requires source and destination layout to have same number of dimensions + /// + /// Example: when user data has 'NHWC' layout (example is RGB image, [1, 224, 224, 3]) but model expects + /// planar input image ('NCHW', [1, 3, 224, 224]). Preprocessing may look like this: + /// + /// \code{.cpp} + /// auto proc = PrePostProcessor(model); + /// proc.input().tensor().set_layout("NHWC"); // User data is NHWC + /// proc.input().preprocess().convert_layout("NCHW")) // model expects input as NCHW + /// \endcode PreProcessSteps& convert_layout(const Layout& dst_layout = {}); /// \brief Add convert layout operation by direct specification of transposed dimensions. /// - /// \example Example: when user data has input RGB image {1x480x640x3} but model expects - /// planar input image ('NCHW', [1, 3, 480, 640]). Preprocessing may look like this: - /// - /// \code{.cpp} - /// auto proc = PrePostProcessor(function); - /// proc.input().preprocess().convert_layout({0, 3, 1, 2}); - /// /// \param dims Dimensions array specifying places for new axis. If not empty, array size (N) must match to input /// shape rank. Array values shall contain all values from 0 to N-1. If empty, no actual conversion will be added. /// /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + /// + /// Example: when user data has input RGB image {1x480x640x3} but model expects + /// planar input image ('NCHW', [1, 3, 480, 640]). Preprocessing may look like this: + /// + /// \code{.cpp} + /// auto proc = PrePostProcessor(function); + /// proc.input().preprocess().convert_layout({0, 3, 1, 2}); + /// \endcode PreProcessSteps& convert_layout(const std::vector& dims); /// \brief Reverse channels operation. /// - /// \details Adds appropriate operation which reverses channels layout. Operation requires layout having 'C' + /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. + /// + /// Adds appropriate operation which reverses channels layout. Operation requires layout having 'C' /// dimension Operation convert_color (RGB<->BGR) does reversing of channels also, but only for NHWC layout /// - /// \example Example: when user data has 'NCHW' layout (example is [1, 3, 224, 224] RGB order) but model expects + /// Example: when user data has 'NCHW' layout (example is [1, 3, 224, 224] RGB order) but model expects /// BGR planes order. Preprocessing may look like this: /// /// \code{.cpp} @@ -163,7 +167,6 @@ public: /// proc.input().preprocess().reverse_channels(); /// \endcode /// - /// \return Reference to 'this' to allow chaining with other calls in a builder-like manner. PreProcessSteps& reverse_channels(); }; From d26fd3aa224d2903021f4cc12702b5d1f74de1f4 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 21 Feb 2022 22:39:26 +0300 Subject: [PATCH 041/310] Ability to fully override OUTPUT_DIR (#10524) --- .../IEDevScriptsConfig.cmake | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake index 3946ab990e4..e7f342a1b6c 100644 --- a/cmake/developer_package/IEDevScriptsConfig.cmake +++ b/cmake/developer_package/IEDevScriptsConfig.cmake @@ -158,16 +158,22 @@ else () endif() add_definitions(-DIE_BUILD_POSTFIX=\"${IE_BUILD_POSTFIX}\") +macro(ov_set_if_not_defined var value) + if(NOT DEFINED ${var}) + set(${var} ${value}) + endif() +endmacro() + if(NOT UNIX) - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + ov_set_if_not_defined(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) + ov_set_if_not_defined(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) else() - set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/lib) - set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/lib) + ov_set_if_not_defined(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/lib) + ov_set_if_not_defined(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}/lib) endif() -set(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) -set(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) +ov_set_if_not_defined(CMAKE_COMPILE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) +ov_set_if_not_defined(CMAKE_PDB_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) +ov_set_if_not_defined(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_ROOT}/${BIN_FOLDER}) if(APPLE) set(CMAKE_MACOSX_RPATH ON) From e7145bd343e3e8dae59f5e26c48ff3fe34690c0a Mon Sep 17 00:00:00 2001 From: Maxim Gordeev Date: Mon, 21 Feb 2022 23:29:38 +0300 Subject: [PATCH 042/310] [IE Samples] Changed input's tensor preprocessing for speech sample (#10552) * Changed input's tensor preprocessing * improved processing --- samples/cpp/speech_sample/main.cpp | 39 ++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index dc5fad5c4b6..fd484be240e 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -506,13 +506,42 @@ int main(int argc, char* argv[]) { inferRequest.frameIndex = -1; continue; } + ptrInputBlobs.clear(); + if (FLAGS_iname.empty()) { + for (auto& input : cInputInfo) { + ptrInputBlobs.push_back(inferRequest.inferRequest.get_tensor(input)); + } + } else { + std::vector inputNameBlobs = convert_str_to_vector(FLAGS_iname); + for (const auto& input : inputNameBlobs) { + ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input); + if (!blob) { + std::string errMessage("No blob with name : " + input); + throw std::logic_error(errMessage); + } + ptrInputBlobs.push_back(blob); + } + } + + /** Iterate over all the input blobs **/ + for (size_t i = 0; i < numInputFiles; ++i) { + ov::Tensor minput = ptrInputBlobs[i]; + if (!minput) { + std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) + + "] to be inherited from Tensor, " + + "but in fact we were not able to cast input to Tensor"); + throw std::logic_error(errMessage); + } + memcpy(minput.data(), inputFrame[i], minput.get_byte_size()); + // Used to infer fewer frames than the batch size + if (batchSize != numFramesThisBatch) { + memset(minput.data() + numFramesThisBatch * numFrameElementsInput[i], + 0, + (batchSize - numFramesThisBatch) * numFrameElementsInput[i]); + } + } // ----------------------------------------------------------------------------------------------------- int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r); - for (int i = 0; i < executableNet.inputs().size(); i++) { - inferRequest.inferRequest.set_input_tensor( - i, - ov::Tensor(ov::element::f32, executableNet.inputs()[i].get_shape(), inputFrame[i])); - } /* Starting inference in asynchronous mode*/ inferRequest.inferRequest.start_async(); inferRequest.frameIndex = index < 0 ? -2 : index; From d7ad1bd9cd1e75b98446b672bd6bf7ec2b5249bd Mon Sep 17 00:00:00 2001 From: Mikhail Letavin Date: Tue, 22 Feb 2022 00:40:26 +0300 Subject: [PATCH 043/310] [GPU] Extra graph transformation passes in case of Dynamic Batch for correct optimization behavior (#10561) --- src/plugins/intel_gpu/src/plugin/program.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp index a57842ae8bf..6ff9e179a93 100644 --- a/src/plugins/intel_gpu/src/plugin/program.cpp +++ b/src/plugins/intel_gpu/src/plugin/program.cpp @@ -8,6 +8,7 @@ #include "openvino/core/graph_util.hpp" #include "intel_gpu/plugin/itt.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" +#include "intel_gpu/plugin/transformations_pipeline.hpp" using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -198,6 +199,11 @@ Program::Program(InferenceEngine::CNNNetwork& network, std::shared_ptrreshape(new_shapes); + { + auto deviceInfo = engine->get_device_info(); + TransformationsPipeline transformations(config, deviceInfo); + transformations.apply(new_func); + } // reshape network input/output maps accordingly // for correct network compilation From 5be402750ad458a390c4dc225d906648a4b7c9ac Mon Sep 17 00:00:00 2001 From: Edward Shogulin Date: Tue, 22 Feb 2022 02:02:11 +0300 Subject: [PATCH 044/310] [LPT] FuseConvert transformation extension (#10558) * [LPT] FuseConvert transformation extension * [LPT] Tests * [LPT] Cleanup & tests refactoring --- .../include/low_precision/fake_quantize.hpp | 1 - .../fake_quantize_decomposition.hpp | 1 - .../low_precision/fuse_fake_quantize.hpp | 30 --- .../src/fake_quantize.cpp | 3 +- .../src/fuse_convert.cpp | 8 +- .../src/fuse_fake_quantize.cpp | 193 ------------------ .../src/low_precision.cpp | 1 - .../fuse_convert_transformation.cpp | 59 +++++- ...ntize_with_multi_inputs_transformation.cpp | 4 +- ...sformations_after_split_transformation.cpp | 1 - .../common/dequantization_operations.hpp | 12 ++ .../common/fake_quantize_on_data.hpp | 6 + .../fuse_convert_function.hpp | 1 + .../src/fuse_convert_function.cpp | 12 +- 14 files changed, 87 insertions(+), 245 deletions(-) delete mode 100644 src/common/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp delete mode 100644 src/common/low_precision_transformations/src/fuse_fake_quantize.cpp diff --git a/src/common/low_precision_transformations/include/low_precision/fake_quantize.hpp b/src/common/low_precision_transformations/include/low_precision/fake_quantize.hpp index 1df89215758..e04626f057c 100644 --- a/src/common/low_precision_transformations/include/low_precision/fake_quantize.hpp +++ b/src/common/low_precision_transformations/include/low_precision/fake_quantize.hpp @@ -7,7 +7,6 @@ #include #include #include "layer_transformation.hpp" -#include "low_precision/fuse_fake_quantize.hpp" namespace ngraph { namespace pass { diff --git a/src/common/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp b/src/common/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp index bf6bdbce879..171e2515a75 100644 --- a/src/common/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp +++ b/src/common/low_precision_transformations/include/low_precision/fake_quantize_decomposition.hpp @@ -7,7 +7,6 @@ #include #include #include "layer_transformation.hpp" -#include "low_precision/fuse_fake_quantize.hpp" namespace ngraph { namespace pass { diff --git a/src/common/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp b/src/common/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp deleted file mode 100644 index fc5aa7ce130..00000000000 --- a/src/common/low_precision_transformations/include/low_precision/fuse_fake_quantize.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include "low_precision/layer_transformation.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -class LP_TRANSFORMATIONS_API FuseFakeQuantizeTransformation : public LayerTransformation { -public: - NGRAPH_RTTI_DECLARATION; - FuseFakeQuantizeTransformation(const Params& params); - bool transform(TransformationContext& context, ngraph::pattern::Matcher &m) override; - bool isPrecisionPreserved(std::shared_ptr layer) const noexcept override; - -private: - std::shared_ptr handle( - TransformationContext& context, - const std::shared_ptr& fakeQuantize) const; -}; - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/src/common/low_precision_transformations/src/fake_quantize.cpp b/src/common/low_precision_transformations/src/fake_quantize.cpp index 25787b894c9..72628c3b999 100644 --- a/src/common/low_precision_transformations/src/fake_quantize.cpp +++ b/src/common/low_precision_transformations/src/fake_quantize.cpp @@ -175,7 +175,8 @@ std::shared_ptr FakeQuantizeTransformation::fuseElementwis return nullptr; } - const auto data = fq::getDataNode(eltwise); + // issue #79980 + const auto data = eltwise->get_input_size() == 1ul ? eltwise->get_input_node_shared_ptr(0) : fq::getDataNode(eltwise); const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise); const auto newFakeQuantize = ov::as_type_ptr(fakeQuantize->clone_with_new_inputs({ diff --git a/src/common/low_precision_transformations/src/fuse_convert.cpp b/src/common/low_precision_transformations/src/fuse_convert.cpp index a6b0c713981..003dc5098f2 100644 --- a/src/common/low_precision_transformations/src/fuse_convert.cpp +++ b/src/common/low_precision_transformations/src/fuse_convert.cpp @@ -23,8 +23,14 @@ FuseConvertTransformation::FuseConvertTransformation(const Params& params) : Lay auto multiply = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); auto subtract = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); auto add = pattern::wrap_type({ pattern::wrap_type(), pattern::wrap_type() }); + auto fakeQuantize = pattern::wrap_type({ + pattern::wrap_type({pattern::wrap_type()}), + pattern::any_input(), + pattern::any_input(), + pattern::any_input(), + pattern::any_input()}); auto matcher = std::make_shared( - std::make_shared(OutputVector{ multiply, subtract, add }), + std::make_shared(OutputVector{ multiply, subtract, add, fakeQuantize }), "FuseConvertTransformation"); ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { diff --git a/src/common/low_precision_transformations/src/fuse_fake_quantize.cpp b/src/common/low_precision_transformations/src/fuse_fake_quantize.cpp deleted file mode 100644 index fa9ba5b1c27..00000000000 --- a/src/common/low_precision_transformations/src/fuse_fake_quantize.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "low_precision/fuse_fake_quantize.hpp" -#include -#include -#include -#include "low_precision/common/ie_lpt_exception.hpp" -#include "low_precision/network_helper.hpp" - -namespace ngraph { -namespace pass { -namespace low_precision { - -NGRAPH_RTTI_DEFINITION(ngraph::pass::low_precision::FuseFakeQuantizeTransformation, "FuseFakeQuantizeTransformation", 0); - -FuseFakeQuantizeTransformation::FuseFakeQuantizeTransformation(const Params& params) : LayerTransformation(params) { - auto matcher = pattern::wrap_type(); - - ngraph::graph_rewrite_callback callback = [this](pattern::Matcher& m) { - auto op = m.get_match_root(); - if (transformation_callback(op)) { - return false; - } - return transform(*context, m); - }; - - auto m = std::make_shared(matcher, "FuseFakeQuantizeTransformation"); - this->register_matcher(m, callback); -} - -bool FuseFakeQuantizeTransformation::transform(TransformationContext& context, ngraph::pattern::Matcher &m) { - auto fakeQuantize = ov::as_type_ptr(m.get_match_root()); - if (!fakeQuantize) - return false; - - do { - fakeQuantize = handle(context, fakeQuantize); - } while (fakeQuantize != nullptr); - return true; -} - -namespace fuse_fq { -namespace { - -std::shared_ptr updateShape(std::shared_ptr op, const PartialShape& targetPShape) { - assert(targetPShape.is_static()); - assert(op->get_output_partial_shape(0).is_static()); - const Shape targetShape = targetPShape.to_shape(); - const Shape shape = op->get_output_shape(0); - - if ((shape.size() < targetShape.size()) && (shape.size() > 1ul)) { - op = fold( - op, - std::make_shared(ngraph::element::i32, Shape{ 1 }, std::vector({ 0ul }))); - } - return op; -} - -std::shared_ptr getDataNode(const std::shared_ptr& eltwise) { - if (!ov::is_type(eltwise->get_input_node_shared_ptr(0))) { - return eltwise->get_input_node_shared_ptr(0); - } - - if (!ov::is_type(eltwise->get_input_node_shared_ptr(1))) { - return eltwise->get_input_node_shared_ptr(1); - } - - return nullptr; -} - -std::shared_ptr getConstant(const std::shared_ptr& eltwise) { - if (eltwise->get_input_size() != 2) { - return nullptr; - } - - std::shared_ptr constant = ov::as_type_ptr(eltwise->get_input_node_shared_ptr(1)); - if (constant != nullptr) { - return constant; - } - - return ov::as_type_ptr(eltwise->get_input_node_shared_ptr(0)); -} - -bool eltwiseWithConstant(const std::shared_ptr& eltwise) { - std::shared_ptr constant = getConstant(eltwise); - if (constant == nullptr) { - return false; - } - - Shape shape = constant->get_shape(); - if ((!shape.empty()) && (shape_size(shape) != 1ul)) { - const auto eltwisePShape = eltwise->get_output_partial_shape(0); - if (eltwisePShape.rank().is_dynamic()) { - return false; - } - - const size_t eltwiseOutRank = eltwisePShape.rank().get_length(); - if ((eltwiseOutRank - shape.size()) > 1) { - return false; - } - - if ((eltwiseOutRank - shape.size()) == 1ul) { - shape.insert(shape.begin(), 1ul); - } - - for (size_t i = 2ul; i < shape.size(); ++i) { - if (shape[i] != 1ul) { - return false; - } - } - } - - return getDataNode(eltwise) != nullptr; -} - -} // namespace -} // namespace fuse_fq - -std::shared_ptr FuseFakeQuantizeTransformation::handle( - TransformationContext& context, - const std::shared_ptr& fakeQuantize) const { - const std::shared_ptr eltwise = fakeQuantize->get_input_node_shared_ptr(0); - - std::shared_ptr inputLowConst = fakeQuantize->get_input_node_shared_ptr(1); - std::shared_ptr inputHightConst = fakeQuantize->get_input_node_shared_ptr(2); - - std::shared_ptr constant = fuse_fq::getConstant(eltwise); - if (ov::is_type(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { - const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? - constant : - foldConvert(constant, eltwise->get_output_element_type(0)); - - inputLowConst = fuse_fq::updateShape(fold(inputLowConst, value), fakeQuantize->get_output_partial_shape(0)); - inputHightConst = fuse_fq::updateShape(fold(inputHightConst, value), fakeQuantize->get_output_partial_shape(0)); - } else if (ov::is_type(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { - const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? - constant : - foldConvert(constant, eltwise->get_output_element_type(0)); - - inputLowConst = fuse_fq::updateShape(fold(inputLowConst, value), fakeQuantize->get_output_partial_shape(0)); - inputHightConst = fuse_fq::updateShape(fold(inputHightConst, value), fakeQuantize->get_output_partial_shape(0)); - } else if (ov::is_type(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { - const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? - constant : - foldConvert(constant, eltwise->get_output_element_type(0)); - - inputLowConst = fuse_fq::updateShape(fold(inputLowConst, value), fakeQuantize->get_output_partial_shape(0)); - inputHightConst = fuse_fq::updateShape(fold(inputHightConst, value), fakeQuantize->get_output_partial_shape(0)); - } else if (ov::is_type(eltwise) && fuse_fq::eltwiseWithConstant(eltwise)) { - if (ov::is_type(fuse_fq::getDataNode(eltwise)) || - ov::is_type(fuse_fq::getDataNode(eltwise))) { - return nullptr; - } - - const auto value = constant->get_output_element_type(0) == eltwise->get_output_element_type(0) ? - constant : - foldConvert(constant, eltwise->get_output_element_type(0)); - - inputLowConst = fuse_fq::updateShape(fold(inputLowConst, value), fakeQuantize->get_output_partial_shape(0)); - inputHightConst = fuse_fq::updateShape(fold(inputHightConst, value), fakeQuantize->get_output_partial_shape(0)); - } else if (ov::is_type(eltwise)) { - // issue #40611 - if ((eltwise->get_input_element_type(0) == element::i32) && (eltwise->get_output_element_type(0) == element::f32)) { - return nullptr; - } - } else { - return nullptr; - } - - const auto data = fuse_fq::getDataNode(eltwise); - const size_t outputIdx = NetworkHelper::getParentOutputIndex(data, eltwise); - - std::shared_ptr newFakeQuantize = ov::as_type_ptr(fakeQuantize->clone_with_new_inputs({ - data->output(outputIdx), - inputLowConst, - inputHightConst, - fakeQuantize->input_value(3), - fakeQuantize->input_value(4) })); - - replace_node(fakeQuantize, newFakeQuantize); - NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize); - return newFakeQuantize; -} - -bool FuseFakeQuantizeTransformation::isPrecisionPreserved(std::shared_ptr layer) const noexcept { - return false; -} - -} // namespace low_precision -} // namespace pass -} // namespace ngraph diff --git a/src/common/low_precision_transformations/src/low_precision.cpp b/src/common/low_precision_transformations/src/low_precision.cpp index 038003bfa8c..e91373b1e0f 100644 --- a/src/common/low_precision_transformations/src/low_precision.cpp +++ b/src/common/low_precision_transformations/src/low_precision.cpp @@ -74,7 +74,6 @@ #include "low_precision/convert.hpp" #include "low_precision/fold_fake_quantize.hpp" #include "low_precision/fuse_convert.hpp" -#include "low_precision/fuse_fake_quantize.hpp" #include "low_precision/fuse_subtract_to_fake_quantize.hpp" #include "low_precision/fuse_multiply_to_fake_quantize.hpp" #include "low_precision/multiply_to_group_convolution.hpp" diff --git a/src/tests/functional/inference_engine/lp_transformations/fuse_convert_transformation.cpp b/src/tests/functional/inference_engine/lp_transformations/fuse_convert_transformation.cpp index 4d766f589c1..972ac81c8e4 100644 --- a/src/tests/functional/inference_engine/lp_transformations/fuse_convert_transformation.cpp +++ b/src/tests/functional/inference_engine/lp_transformations/fuse_convert_transformation.cpp @@ -30,12 +30,14 @@ public: public: ngraph::element::Type inputPrecision; ngraph::builder::subgraph::DequantizationOperations dequantization; + ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize; }; class Expected { public: ngraph::element::Type inputPrecision; ngraph::builder::subgraph::DequantizationOperations dequantization; + ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize; }; bool constInput; @@ -58,6 +60,7 @@ public: inputShape, testValues.actual.inputPrecision, testValues.actual.dequantization, + testValues.actual.fakeQuantize, testValues.constInput); SimpleLowPrecisionTransformer transformer; @@ -68,6 +71,7 @@ public: inputShape, testValues.expected.inputPrecision, testValues.expected.dequantization, + testValues.expected.fakeQuantize, testValues.constInput); } @@ -77,9 +81,13 @@ public: std::ostringstream result; result << - inputShape << "_" << - testValues.actual.inputPrecision << "_" << - testValues.actual.dequantization << "_" << + "IS_" << inputShape << "_" << + "AIP_" << testValues.actual.inputPrecision << "_" << + "ADEQ_" << testValues.actual.dequantization << "_" << + "AFQ_" << testValues.actual.fakeQuantize << "_" << + "EIP_" << testValues.expected.inputPrecision << "_" << + "EDEQ_" << testValues.expected.dequantization << "_" << + "EFQ_" << testValues.expected.fakeQuantize << "_" << testValues.constInput; return result.str(); } @@ -111,7 +119,8 @@ const std::vector testValues = { { ngraph::element::f32 }, {1.f}, {0.45f} - } + }, + {} }, { ngraph::element::u8, @@ -119,7 +128,8 @@ const std::vector testValues = { {}, DequantizationOperations::Subtract({1.f}, ngraph::element::f32).setConstantPrecision(ngraph::element::f32), {0.45f} - } + }, + {} } }, // fuse to multiply @@ -132,7 +142,8 @@ const std::vector testValues = { { ngraph::element::f32 }, {}, {0.45f} - } + }, + {} }, { ngraph::element::u8, @@ -140,7 +151,8 @@ const std::vector testValues = { {}, {}, DequantizationOperations::Multiply({0.45f}, ngraph::element::f32).setConstantPrecision(ngraph::element::f32) - } + }, + {} } }, // Convert with unexpected precision @@ -149,11 +161,13 @@ const std::vector testValues = { LayerTransformation::createParamsU8I8(), { ngraph::element::f32, - {{ ngraph::element::i32 }, {}, {3.f}} + {{ ngraph::element::i32 }, {}, {3.f}}, + {} }, { ngraph::element::f32, - {{ ngraph::element::i32 }, {}, {3.f}} + {{ ngraph::element::i32 }, {}, {3.f}}, + {} } }, }; @@ -173,6 +187,27 @@ const std::vector inputShapes = { }; const std::vector testValuesWithConstant = { + // Constant + // | + // Convert Const Const Const Const + // \ \ | / / + // \ \ | / / + // FakeQuantize + // + { + true, + LayerTransformation::createParamsU8I8(), + { + ngraph::element::u8, + {{ngraph::element::f32}, {}, {}}, + { 256, {}, {0.f}, {0.1f}, {0.f}, {0.1f}, ov::element::f32} + }, + { + ngraph::element::f32, + {}, + { 256, {}, {0.f}, {0.1f}, {0.f}, {0.1f}, ov::element::f32} + } + }, // fuse to const { true, @@ -183,7 +218,8 @@ const std::vector testValuesWithConstant = { ngraph::element::f32 }, {1.f}, {0.45f} - } + }, + {} }, { ngraph::element::f32, @@ -191,7 +227,8 @@ const std::vector testValuesWithConstant = {}, {1.f}, {0.45f} - } + }, + {} } }, }; diff --git a/src/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_with_multi_inputs_transformation.cpp b/src/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_with_multi_inputs_transformation.cpp index 0df3320562a..2f949a05c1c 100644 --- a/src/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_with_multi_inputs_transformation.cpp +++ b/src/tests/functional/inference_engine/lp_transformations/fuse_fake_quantize_with_multi_inputs_transformation.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp" #include "lpt_ngraph_functions/common/dequantization_operations.hpp" @@ -62,7 +62,7 @@ public: testValues.actual.fakeQuantizeOnData); SimpleLowPrecisionTransformer transformer; - transformer.add(testValues.params); + transformer.add(testValues.params); transformer.transform(actualFunction); referenceFunction = ngraph::builder::subgraph::FuseFakeQuantizeFunction::get( diff --git a/src/tests/functional/inference_engine/lp_transformations/transformations_after_split_transformation.cpp b/src/tests/functional/inference_engine/lp_transformations/transformations_after_split_transformation.cpp index 89ce7756452..d054d36f912 100644 --- a/src/tests/functional/inference_engine/lp_transformations/transformations_after_split_transformation.cpp +++ b/src/tests/functional/inference_engine/lp_transformations/transformations_after_split_transformation.cpp @@ -35,7 +35,6 @@ // cleanup transformations #include "low_precision/fuse_convert.hpp" -#include "low_precision/fuse_fake_quantize.hpp" #include "low_precision/fuse_subtract_to_fake_quantize.hpp" #include "low_precision/fuse_multiply_to_fake_quantize.hpp" #include "low_precision/multiply_to_group_convolution.hpp" diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp index bfd8c44eadd..0930f48337d 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/dequantization_operations.hpp @@ -117,10 +117,16 @@ public: }; inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations::Convert& convert) { + if (convert.empty()) { + return out << "{}"; + } return out << "_" << (convert.outPrecision != element::undefined ? convert.outPrecision.get_type_name() : ""); } inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations::Subtract& subtract) { + if (subtract.empty()) { + return out << "{}"; + } return out << "_" << subtract.values << "_" << subtract.outPrecision << "_" << @@ -132,6 +138,9 @@ inline std::ostream& operator<<(std::ostream& out, const DequantizationOperation } inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations::Multiply& multiply) { + if (multiply.empty()) { + return out << "{}"; + } return out << "_" << multiply.values << "_" << multiply.outPrecision << "_" << @@ -142,6 +151,9 @@ inline std::ostream& operator<<(std::ostream& out, const DequantizationOperation } inline std::ostream& operator<<(std::ostream& out, const DequantizationOperations& data) { + if (data.empty()) { + return out << "{}"; + } return out << "_" << data.convert << "_" << data.subtract << "_" << data.multiply; } diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp index ce0a816b90d..6612c978a15 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/common/fake_quantize_on_data.hpp @@ -54,6 +54,9 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector& valu } inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnData& data) { + if (data.empty()) { + return out << "{}"; + } return out << "_" << data.quantizationLevel << data.constantShape << "_" << data.inputLowValues << "_" << data.inputHighValues << "_" << data.outputLowValues << "_" << data.outputHighValues << "_" << (data.outputPrecision == ngraph::element::undefined ? "" : data.outputPrecision.get_type_name()); @@ -89,6 +92,9 @@ public: }; inline std::ostream& operator<<(std::ostream& out, const FakeQuantizeOnDataWithConstant& data) { + if (data.empty()) { + return out << "{}"; + } return out << "_" << data.quantizationLevel << (data.constantShapes.empty() ? ngraph::Shape{} : data.constantShapes[0]) << "_" << data.inputLowValues << "_" << data.inputHighValues << "_" << diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fuse_convert_function.hpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fuse_convert_function.hpp index 793948fd991..ecb203d6672 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fuse_convert_function.hpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/include/lpt_ngraph_functions/fuse_convert_function.hpp @@ -20,6 +20,7 @@ public: const ngraph::PartialShape& inputShape, const ngraph::element::Type inputPrecision, const ngraph::builder::subgraph::DequantizationOperations& dequantization, + const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantize, const bool constInput); static std::shared_ptr getWithFQ( diff --git a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/fuse_convert_function.cpp b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/fuse_convert_function.cpp index 48bce4c1bad..6f20e1bea9d 100644 --- a/src/tests/ngraph_helpers/lpt_ngraph_functions/src/fuse_convert_function.cpp +++ b/src/tests/ngraph_helpers/lpt_ngraph_functions/src/fuse_convert_function.cpp @@ -16,6 +16,7 @@ std::shared_ptr FuseConvertFunction::get( const ngraph::PartialShape& inputShape, const ngraph::element::Type inputPrecision, const ngraph::builder::subgraph::DequantizationOperations& dequantization, + const ngraph::builder::subgraph::FakeQuantizeOnData& fakeQuantize, const bool constInput) { std::shared_ptr parent; std::shared_ptr input; @@ -28,14 +29,19 @@ std::shared_ptr FuseConvertFunction::get( parent = input; } - const std::shared_ptr dequantizationOp = makeDequantization(parent, dequantization); - dequantizationOp->set_friendly_name("output"); + parent = makeDequantization(parent, dequantization); + + if (!fakeQuantize.empty()) { + parent = makeFakeQuantize(parent, fakeQuantize.outputPrecision, fakeQuantize); + } + + parent->set_friendly_name("output"); auto parameters = constInput ? ngraph::ParameterVector{}: ngraph::ParameterVector{ input }; - ngraph::ResultVector results{ std::make_shared(dequantizationOp) }; + ngraph::ResultVector results{std::make_shared(parent)}; return std::make_shared(results, parameters, "FuseConvertFunction"); } From aea0532d768fb0d2f0ca28b2f1179d2362f55e5b Mon Sep 17 00:00:00 2001 From: Andrey Zaytsev Date: Tue, 22 Feb 2022 02:15:58 +0300 Subject: [PATCH 045/310] Fixed POT docs (#10574) --- tools/pot/README.md | 26 +++++++++++++------------- tools/pot/configs/README.md | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/pot/README.md b/tools/pot/README.md index aeed38b3732..4076119f3a6 100644 --- a/tools/pot/README.md +++ b/tools/pot/README.md @@ -5,7 +5,7 @@ .. toctree:: :maxdepth: 1 :hidden: - + pot_InstallationGuide pot_docs_LowPrecisionOptimizationGuide pot_compression_algorithms_quantization_README @@ -25,10 +25,10 @@ special methods without model retraining or fine-tuning, for example, post-train require a training dataset or a pipeline. To apply post-training algorithms from the POT, you need: * A floating-point precision model, FP32 or FP16, converted into the OpenVINO™ Intermediate Representation (IR) format and run on CPU with the OpenVINO™. -* A representative calibration dataset representing a use case scenario, for example, 300 images. +* A representative calibration dataset representing a use case scenario, for example, 300 images. Figure below shows the optimization workflow: -![](docs/images/workflow_simple.png) +![](docs/images/workflow_simple.png) ### Features @@ -44,7 +44,7 @@ For benchmarking results collected for the models optimized with POT tool, see [ POT is opensourced on GitHub as a part of OpenVINO and available at https://github.com/openvinotoolkit/openvino/tools/pot. Further documentation presumes that you are familiar with basic Deep Learning concepts, such as model inference, -dataset preparation, model optimization, as well as with the OpenVINO™ toolkit and its components, such as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) +dataset preparation, model optimization, as well as with the OpenVINO™ toolkit and its components, such as [Model Optimizer](@ref openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide) and [Accuracy Checker Tool](@ref omz_tools_accuracy_checker). ## Get started @@ -54,27 +54,27 @@ To install POT, follow the [Installation Guide](docs/InstallationGuide.md). ### Usage options -![](docs/images/use_cases.png) +![](docs/images/use_cases.png) The POT provides three basic usage options: * **Command-line interface (CLI)**: * [**Simplified mode**](@ref pot_docs_simplified_mode): use this option if the model belongs to the Computer Vision domain and you do have an unannotated dataset for optimization. Note that this optimization method can cause a deviation of model accuracy. - * [**Model Zoo flow**](@ref pot_compression_cli_README): this option is recommended if the model is imported from OpenVINO™ -[Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or there is a valid [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README) -configuration file for the model that allows validating model accuracy using [Accuracy Checker Tool](@ref omz_tools_accuracy_checker_README). + * [**Model Zoo flow**](@ref pot_compression_cli_README): this option is recommended if the model is imported from OpenVINO™ +[Model Zoo](https://github.com/openvinotoolkit/open_model_zoo) or there is a valid [Accuracy Checker Tool](@ref omz_tools_accuracy_checker) +configuration file for the model that allows validating model accuracy using [Accuracy Checker Tool](@ref omz_tools_accuracy_checker). * [**Python\* API**](@ref pot_compression_api_README): this option allows integrating the optimization methods implemented in POT into -a Python* inference script that uses [OpenVINO Python* API](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_docs_api_overview.html). +a Python* inference script that uses [OpenVINO Python* API](https://docs.openvino.ai/latest/openvino_inference_engine_ie_bridges_python_docs_api_overview.html). -POT is also integrated into [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench), a web-based graphical environment -that enables you to optimize, tune, analyze, visualize, and compare performance of deep learning models. +POT is also integrated into [Deep Learning Workbench](@ref workbench_docs_Workbench_DG_Introduction) (DL Workbench), a web-based graphical environment +that enables you to optimize, tune, analyze, visualize, and compare performance of deep learning models. ### Examples OpenVINO provides several examples to demonstrate the POT optimization workflow: * Command-line example: - * [Quantization of Image Classification model](https://docs.openvino.ai/latest/pot_configs_examples_README.html) + * [Quantization of Image Classification model](https://docs.openvino.ai/latest/pot_configs_examples_README.html) * API tutorials: * [Quantization of Image Classification model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/301-tensorflow-training-openvino) * [Quantization of Object Detection model from Model Zoo](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/111-detection-quantization) @@ -91,5 +91,5 @@ OpenVINO provides several examples to demonstrate the POT optimization workflow: * [Low Precision Optimization Guide](docs/LowPrecisionOptimizationGuide.md) * [Post-Training Optimization Best Practices](docs/BestPractices.md) -* [POT Frequently Asked Questions](docs/FrequentlyAskedQuestions.md) +* [POT Frequently Asked Questions](docs/FrequentlyAskedQuestions.md) * [INT8 Quantization by Using Web-Based Interface of the DL Workbench](https://docs.openvino.ai/latest/workbench_docs_Workbench_DG_Int_8_Quantization.html) diff --git a/tools/pot/configs/README.md b/tools/pot/configs/README.md index 7e24a55049c..d92b73bbef8 100644 --- a/tools/pot/configs/README.md +++ b/tools/pot/configs/README.md @@ -33,7 +33,7 @@ The main parameter is `"type"` which can take two possible options: `"accuracy_c - **Simplified mode** engines. These engines can be used only with `DefaultQuantization` algorithm to get a fully quantized model. They do not use the Accuracy Checker tool and annotation. In the case, of this mode the following parameters are applicable: - `"data_source"` Specifies the path to the directory​ where to calibration data is stored. - `"layout"` - (Optional) Layout of input data. Supported values: [`"NCHW"`, `"NHWC"`, `"CHW"`, `"CWH"`]​. -- **Accuracy Checker** engine. It relies on the [Deep Learning Accuracy Validation Framework](@ref omz_tools_accuracy_checker_README) (Accuracy Checker) when inferencing DL models and working with datasets. +- **Accuracy Checker** engine. It relies on the [Deep Learning Accuracy Validation Framework](@ref omz_tools_accuracy_checker) (Accuracy Checker) when inferencing DL models and working with datasets. The benefit of this mode is you can compute accuracy in case you have annotations. When this mode is selected, you can use the accuracy aware algorithms family. There are two options to define engine parameters in this mode: - Refer to the existing Accuracy Checker configuration file which is represented by the YAML file. It can be a file used for full-precision model validation. In this case, you should define only the `"config"` parameter containing a path to the AccuracyChecker configuration file. @@ -49,8 +49,8 @@ This section defines optimization algorithms and their parameters. For more deta ## Examples of the Configuration File -For a quick start, many examples of configuration files are provided [here](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples). There you can find ready-to-use configurations for the models from various domains: Computer Vision (Image - Classification, Object Detection, Segmentation), Natural Language Processing, Recommendation Systems. We basically +For a quick start, many examples of configuration files are provided [here](https://github.com/openvinotoolkit/openvino/blob/master/tools/pot/configs/examples). There you can find ready-to-use configurations for the models from various domains: Computer Vision (Image + Classification, Object Detection, Segmentation), Natural Language Processing, Recommendation Systems. We basically put configuration files for the models which require non-default configuration settings in order to get accurate results. For details on how to run the Post-Training Optimization Tool with a sample configuration file, see the [example](@ref pot_configs_examples_README). From 33062bef7a06770ebbc3eee6bd67956bf79297c7 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Tue, 22 Feb 2022 11:35:04 +0900 Subject: [PATCH 046/310] [GPU] Fix permute performance degradation (#10559) * [GPU] Fix permute performance degradation Signed-off-by: Andrew Kwangwoong Park * add description for update Signed-off-by: Andrew Kwangwoong Park --- .../src/graph/include/permute_inst.h | 12 +++++ .../intel_gpu/src/graph/layout_optimizer.cpp | 54 +++++-------------- 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/include/permute_inst.h b/src/plugins/intel_gpu/src/graph/include/permute_inst.h index f1c4b6e9c80..65db4644c5f 100644 --- a/src/plugins/intel_gpu/src/graph/include/permute_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/permute_inst.h @@ -24,6 +24,18 @@ public: program_node& input() const { return get_dependency(0); } std::vector get_permute_order() const { return get_primitive()->permute_order; } + bool is_rotating_except_batch() const { + // Target transform: Rotate feature dim to back to be taken as inner-most axis + // ex) 0(b), 4(f), 1(z), 2(y), 3(x) + // ex) 0(b), 3(f), 1(y), 2(x) + auto& order = get_primitive()->permute_order; + if ((int32_t) order[1] != order.size() - 1) return false; + if ((int32_t) order[0] != 0) return false; + for (int32_t i = 2; i < (int32_t) order.size(); ++i) { + if ((int32_t)order[i] != (i - 1)) return false; + } + return true; + } }; using permute_node = typed_program_node; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index e732095bdb3..646ee81a433 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -375,23 +375,11 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, return true; if (next.is_type()) { - auto is_rotating_except_batch = [](const std::vector& order) { - // Target transform: Rotate feature dim to back to be taken as inner-most axis - // ex) 0(b), 4(f), 1(z), 2(y), 3(x) - // ex) 0(b), 3(f), 1(y), 2(x) - if ((int32_t) order[1] != order.size() - 1) return false; - if ((int32_t) order[0] != 0) return false; - for (int32_t i = 2; i < (int32_t) order.size(); ++i) { - if ((int32_t)order[i] != (i - 1)) return false; - } - return true; - }; - auto& permute_order = next.as().get_primitive()->permute_order; if ((fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::b_fs_yx_fsv32 || fmt_prev == format::b_fs_zyx_fsv32 || fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::b_fs_zyx_fsv16 || fmt_prev == format::bs_fs_yx_bsv16_fsv16) && permute_order[1] == 2 - && (!is_rotating_except_batch(permute_order))) { + && (!next.as().is_rotating_except_batch())) { return false; } return true; @@ -439,23 +427,11 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node return true; if (prev.is_type()) { - auto is_rotating_except_batch = [](const std::vector& order) { - // Target transform: Rotate feature dim to back to be taken as inner-most axis - // ex) 0(b), 4(f), 1(z), 2(y), 3(x) - // ex) 0(b), 3(f), 1(y), 2(x) - if ((int32_t) order[1] != order.size() - 1) return false; - if ((int32_t) order[0] != 0) return false; - for (int32_t i = 2; i < (int32_t) order.size(); ++i) { - if ((int32_t)order[i] != (i - 1)) return false; - } - return true; - }; - auto& permute_order = prev.as().get_primitive()->permute_order; if ((fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::b_fs_yx_fsv32 || fmt_prev == format::b_fs_zyx_fsv32 || fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::b_fs_zyx_fsv16 || fmt_prev == format::bs_fs_yx_bsv16_fsv16) && permute_order[1] == 2 - && (!is_rotating_except_batch(permute_order))) { + && (!prev.as().is_rotating_except_batch())) { return false; } return true; @@ -1707,6 +1683,17 @@ format layout_optimizer::get_preferred_format(program_node& node) { } else { expected = format::any; } + } else if (node.is_type()) { + if (node.get_dependencies().size() == 1 && node.get_dependencies().front()->is_type()) { + auto& conv_node = node.get_dependencies().front()->as(); + const auto& fmt = get_preferred_format(conv_node); + // if the preferred format of the previous conv of permute is fs_b_yx_fsv32, + // it is better to set to b_fs_yx_fsv32 that supports tiled permute (permute_tile_8x8_4x4_fsv) + // because fs_b_yx_fsv32 is only supported by permute_ref. + if (node.as().is_rotating_except_batch() && fmt == format::fs_b_yx_fsv32) { + expected = format::b_fs_yx_fsv32; + } + } } return expected; @@ -1716,21 +1703,8 @@ bool layout_optimizer::all_users_simple_format_until_output(program_node& origin if (cur_node.is_output()) return true; if (cur_depth > max_depth) return false; - auto is_rotating_except_batch = [](const std::vector& order) { - // Target transform: Rotate feature dim to back to be taken as inner-most axis - // ex) 0(b), 4(f), 1(z), 2(y), 3(x) - // ex) 0(b), 3(f), 1(y), 2(x) - if ((int32_t) order[1] != order.size() - 1) return false; - if ((int32_t) order[0] != 0) return false; - for (int32_t i = 2; i < (int32_t) order.size(); ++i) { - if ((int32_t)order[i] != (i - 1)) return false; - } - return true; - }; - if (cur_node.is_type()) { - auto& permute_order = cur_node.as().get_primitive()->permute_order; - if (!is_rotating_except_batch(permute_order)) + if (!cur_node.as().is_rotating_except_batch()) return false; } From 1891967ad36f5ca3f90621b474729ba1dd7bf80d Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Tue, 22 Feb 2022 09:11:43 +0300 Subject: [PATCH 047/310] [POT] Add quantization templates to wheel (#10557) --- tools/pot/setup.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/pot/setup.py b/tools/pot/setup.py index 1c855e6d50f..bcc6c6bdd2b 100644 --- a/tools/pot/setup.py +++ b/tools/pot/setup.py @@ -25,6 +25,11 @@ class InstallCmd(install): # install requires self.do_egg_install() + def_quant_path = os.path.join("configs", "default_quantization_template.json") + aa_quant_path = os.path.join("configs", "accuracy_aware_quantization_template.json") + copyfile(def_quant_path, os.path.join(self.install_purelib, prefix, "pot", def_quant_path)) + copyfile(aa_quant_path, os.path.join(self.install_purelib, prefix, "pot", aa_quant_path)) + version_txt = os.path.join(prefix, "pot", "version.txt") if os.path.exists(version_txt): copyfile(os.path.join(version_txt), From 746b77c74ad0250d87ee1c28bc4896af8d193488 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Tue, 22 Feb 2022 15:34:46 +0900 Subject: [PATCH 048/310] [GPU] Revised unique ID setting scheme. (#10548) * Revised unique ID setting scheme. Previously it was using program id to distinguish the loop body networks' id. However, it results in cl cache miss for same network loaded multiple time, because program ids are differnt. Now revised it to use parent primitive id instead of program_id for unique id of nodes in body networks. * Revised adding unique_id to entry points to have a temporal number as unique id * Revert the canceld change * Added test to check whether two networks loaded from same function creates same cl cache --- .../graph/graph_optimizer/compile_graph.cpp | 3 +- .../graph_optimizer/post_input_reorder.cpp | 2 +- .../remove_redundant_reorders.cpp | 2 +- .../src/graph/include/program_node.h | 16 +++++++-- .../src/graph/kernel_selector_helper.cpp | 2 +- src/plugins/intel_gpu/src/graph/program.cpp | 1 + .../intel_gpu/src/graph/program_node.cpp | 3 +- .../functional/plugin/gpu/behavior/cache.cpp | 33 +++++++++++++++++++ 8 files changed, 53 insertions(+), 9 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index c749edc9784..5359ca294a3 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -20,9 +20,8 @@ using namespace cldnn; void compile_graph::run(program& p) { OV_ITT_SCOPED_TASK(itt::domains::CLDNN, "CLDNN::pass::CompileGraph"); - size_t order_idx = 0; for (auto& node : p.get_processing_order()) { - node->set_unique_id(std::to_string(order_idx++)); + node->set_unique_id(); if (!node->is_type()) { node->get_output_layout(); } diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp index a8ff3c4d739..d2ab615804d 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_input_reorder.cpp @@ -62,7 +62,7 @@ void post_input_reorder::run(program& p) { input_layout.size, input_layout.data_padding); auto& reorder = add_reorder(p, input, node, current_layout); - reorder.set_unique_id(node->get_unique_id() + "_input_reorder"); + reorder.set_unique_id(); reorder.get_output_layout(false); node->set_output_layout(previous_layout, false); reorder.set_selected_impl(reorder.type()->choose_impl(reorder)); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp index eb6b8ca1ea6..a4476bf6b17 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/remove_redundant_reorders.cpp @@ -35,7 +35,7 @@ void remove_redundant_reorders::run(program& p) { if (!update_implementations) return; - node.set_unique_id(node.get_unique_id() + "_reorder"); + node.set_unique_id(); auto new_impl = node.type()->choose_impl(node); node.set_selected_impl(std::move(new_impl)); }; diff --git a/src/plugins/intel_gpu/src/graph/include/program_node.h b/src/plugins/intel_gpu/src/graph/include/program_node.h index 6c80fc6958a..bc0d0c0607f 100644 --- a/src/plugins/intel_gpu/src/graph/include/program_node.h +++ b/src/plugins/intel_gpu/src/graph/include/program_node.h @@ -7,6 +7,7 @@ #include "intel_gpu/primitives/primitive.hpp" #include "intel_gpu/primitives/activation.hpp" #include "intel_gpu/primitives/implementation_desc.hpp" +#include "intel_gpu/graph/program.hpp" #include "kernel_selector_helper.h" #include "meta_utils.h" @@ -17,6 +18,7 @@ #include #include #include +#include namespace cldnn { @@ -350,11 +352,19 @@ public: bool need_lockable_memory() const; - std::string get_unique_id() const { return unique_id; } - void set_unique_id(std::string id) { unique_id = id; } + size_t get_unique_id() const { return unique_id; } + + void set_unique_id() { + unique_id = cur_id++; + } + + static void reset_unique_id() { + cur_id = 0; + } protected: - std::string unique_id; + size_t unique_id = 0; + static thread_local size_t cur_id; std::shared_ptr desc; program& myprog; diff --git a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp index cab06f3e87e..66bc73c0afe 100644 --- a/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/kernel_selector_helper.cpp @@ -839,7 +839,7 @@ void set_params(const program_node& node, kernel_selector::params& params) { const auto& program = node.get_program(); const auto& device_info = program.get_engine().get_device_info(); - params.uniqueID = std::to_string(program.get_id()) + "_" + node.get_unique_id(); + params.uniqueID = std::to_string(node.get_unique_id()); params.engineInfo.bSubGroupSupport = device_info.supports_subgroups; params.engineInfo.bSubGroupShortSupport = device_info.supports_subgroups_short; params.engineInfo.bSubGroupCharSupport = device_info.supports_subgroups_char; diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 2518bd8903a..73fcf10162e 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -107,6 +107,7 @@ program::program(engine& engine_ref, prepare_nodes(topology); _kernels_cache = std::unique_ptr(new kernels_cache(_engine, prog_id, kernel_selector::KernelBase::get_db().get_batch_header_str())); + program_node::reset_unique_id(); if (no_optimizations) { init_graph(); } else { diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 61861e42bee..5d1dc6a6aba 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -3,7 +3,6 @@ // #include "program_node.h" -#include "intel_gpu/graph/program.hpp" #include "program_helpers.h" #include "primitive_inst.h" @@ -24,6 +23,8 @@ using namespace cldnn; +thread_local size_t program_node::cur_id = 0; + program_node::program_node(std::shared_ptr prim, program& prog) : desc(prim), myprog(prog), org_id(prim ? (prim->id) : 0) { if (prim) diff --git a/src/tests/functional/plugin/gpu/behavior/cache.cpp b/src/tests/functional/plugin/gpu/behavior/cache.cpp index 2d64e8812b9..853e2612497 100644 --- a/src/tests/functional/plugin/gpu/behavior/cache.cpp +++ b/src/tests/functional/plugin/gpu/behavior/cache.cpp @@ -49,6 +49,39 @@ TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinaries) { } } +TEST_F(CompiledKernelsCacheTest, TwoNetworksWithSameModelCreatesSameCache) { + std::shared_ptr ie = PluginCache::get().ie(); + // Create two CNNNetwork from same ngraph::Function + InferenceEngine::CNNNetwork cnnNet1(function); + InferenceEngine::CNNNetwork cnnNet2(function); + std::map config = {{ CONFIG_KEY(CACHE_DIR), cache_path }}; + try { + // Load 1st CNNNetwork + auto execNet1 = ie->LoadNetwork(cnnNet1, "GPU", config); + auto n_cache_files = CommonTestUtils::listFilesWithExt(cache_path, "cl_cache").size(); + + // Check that directory with cached kernels exists after loading network + ASSERT_TRUE(CommonTestUtils::directoryExists(cache_path)) << "Directory with cached kernels doesn't exist"; + // Load 2nd CNNNetwork + auto execNet2 = ie->LoadNetwork(cnnNet2, "GPU", config); + + // Check that two loaded networks with same function creates same caches + ASSERT_EQ(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), n_cache_files); + + // Remove directory and check that it doesn't exist anymore + ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0); + ASSERT_FALSE(CommonTestUtils::directoryExists(cache_path)); + } catch (std::exception& ex) { + // Cleanup in case of any exception + if (CommonTestUtils::directoryExists(cache_path)) { + ASSERT_GE(CommonTestUtils::removeFilesWithExt(cache_path, "cl_cache"), 0); + ASSERT_EQ(CommonTestUtils::removeDir(cache_path), 0); + } + FAIL() << ex.what() << std::endl; + } +} + + #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT TEST_F(CompiledKernelsCacheTest, CanCreateCacheDirAndDumpBinariesUnicodePath) { From 4075f8ed51f3f297f51cc9f0922156cf1265cdc2 Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Tue, 22 Feb 2022 11:38:02 +0300 Subject: [PATCH 049/310] [CPU] Fix ScaleShift and FQ merge optimization (#9244) --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 83 +++-- src/plugins/intel_cpu/src/node.cpp | 9 +- .../cpu/single_layer_tests/convolution.cpp | 14 + .../plugin/cpu/single_layer_tests/matmul.cpp | 2 + .../subgraph_tests/src/conv_sum_broadcast.cpp | 96 ++--- .../plugin/cpu/test_utils/cpu_test_utils.cpp | 16 +- .../plugin/cpu/test_utils/cpu_test_utils.hpp | 12 +- .../cpu/test_utils/fusing_test_utils.cpp | 13 +- .../cpu/test_utils/fusing_test_utils.hpp | 330 ++++++++++-------- 9 files changed, 319 insertions(+), 256 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index fadac2d363e..827692487b1 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -337,25 +337,37 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSuitableSecondInput = [](MKLDNNNodePtr node, VectorDims dataDims) { + auto isSuitableSecondInput = [](const MKLDNNNodePtr& node, VectorDims dataDims) { if (node->getType() != Input || !node->isConstant()) return false; - auto secondInputDims = node->getOutputShapeAtPort(0).getStaticDims(); + const auto secondInputDims = node->getOutputShapeAtPort(0).getStaticDims(); if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) return false; - if (secondInputDims[0] != 1 || !dimsEqualWeak(secondInputDims[1], dataDims[1])) + auto getChannelAxis = [](const VectorDims& dims) { + auto channelAxis = -1; + for (int i = 0; i < dims.size(); i ++) { + if (dims[i] != 1) { + if (channelAxis != -1) // more than one axis is != 1 + return -1; + else + channelAxis = i; + } + } + return channelAxis; + }; + + const auto channelAxis = getChannelAxis(secondInputDims); + if (channelAxis == -1) return false; - for (size_t i = 2; i < secondInputDims.size(); i++) { - if (secondInputDims[i] != 1) - return false; - } + if (secondInputDims[0] != 1 || !dimsEqualWeak(secondInputDims[channelAxis], dataDims[channelAxis])) + return false; return true; }; - auto isSuitableParentNode = [&](MKLDNNNodePtr node) { + auto isSuitableParentNode = [&](const MKLDNNNodePtr& node) { if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() || node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) return false; @@ -363,7 +375,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { return isSuitableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getInputShapeAtPort(0).getDims()); }; - auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](const MKLDNNNodePtr& parentNode, const MKLDNNNodePtr& childNode) { if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; @@ -430,7 +442,7 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { graph.RemoveEdge(remEdge); } - auto parentEltwise = parentNode; + auto& parentEltwise = parentNode; MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); auto &graphEdges = graph.GetEdges(); graphEdges.push_back(newEdge); @@ -1701,36 +1713,37 @@ void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) { void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto getConstPort = [](const MKLDNNNodePtr node) -> int { - if (node->getParentEdgesAtPort(0)[0]->getParent()->getType() == Input && node->getParentEdgesAtPort(0)[0]->getParent()->isConstant()) { - return 0; - } else if (node->getParentEdgesAtPort(1)[0]->getParent()->getType() == Input && node->getParentEdgesAtPort(1)[0]->getParent()->isConstant()) { - return 1; - } else { + auto getNonConstPort = [](const MKLDNNNodePtr& node) { + std::vector nonConstPorts; + for (size_t i = 0; i < node->getParentEdges().size(); i++) { + const auto& parent = node->getParentEdgeAt(i)->getParent(); + if (!(parent->getType() == Input && parent->isConstant())) + nonConstPorts.push_back(i); + } + // there are more than 1 nonconst port or missed + if (nonConstPorts.size() != 1) return -1; - } + + return nonConstPorts[0]; }; - auto isSuitableScaleShiftNode = [getConstPort](MKLDNNNodePtr node) { - if (one_of(node->getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd)) { - MKLDNNNode *parent = nullptr; - if (node->getAlgorithm() != EltwiseMulAdd) { - const auto constPort = getConstPort(node); - if (constPort == -1) { - return false; - } - parent = node->getParentEdgesAtPort(1 - constPort)[0]->getParent().get(); - } - return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->canBePerformedAsScaleShift(parent); - } - return false; + auto isSuitableScaleShiftNode = [getNonConstPort](const MKLDNNNodePtr& node) { + if (!one_of(node->getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd)) + return false; + + const auto nonConstPort = getNonConstPort(node); + if (nonConstPort == -1) + return false; + + const MKLDNNNodePtr eltwiseInput = node->getParentEdgeAt(nonConstPort)->getParent(); + return node->getChildEdges().size() == 1 && node->canBePerformedAsScaleShift(eltwiseInput.get()); }; - auto isSuitableFakeQuantizeNode = [](MKLDNNNodePtr node) { + auto isSuitableFakeQuantizeNode = [](const MKLDNNNodePtr& node) { return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; }; - auto fuseScaleShiftAndFakeQuantizeNodes = [getConstPort](MKLDNNNodePtr parent, MKLDNNNodePtr child) { + auto fuseScaleShiftAndFakeQuantizeNodes = [getNonConstPort](const MKLDNNNodePtr& parent, const MKLDNNNodePtr& child) { auto fakeQuantizeNode = std::dynamic_pointer_cast(child); if (fakeQuantizeNode == nullptr) IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node"; @@ -1742,11 +1755,13 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node"; } - std::tie(scalesBuffer, shiftsBuffer) = parentEltwise->getScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get()); + const MKLDNNNodePtr eltwiseInput = parentEltwise->getParentEdgeAt(getNonConstPort(parent))->getParent(); + std::tie(scalesBuffer, shiftsBuffer) = parentEltwise->getScalesAndShifts(eltwiseInput.get()); const auto &outputShape = child->getOutputShapeAtPort(0); VectorDims outputDims = outputShape.getDims(); - const size_t channelPos = outputDims.size() > 1 ? 1 : 0; + const size_t channelPos = parent->getParentEdgeAt(0)->getParent()->getFusingAxis(); + if (outputShape.isDynamic()) { if (outputDims[channelPos] == Shape::UNDEFINED_DIM) { if (scalesBuffer.size() > 1) { diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 02bbfe7f625..15945a2ecff 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -1263,11 +1263,12 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr } bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const { - size_t fusingPort = 0; - // @todo graph optimizer can provide parentNode as nullptr. Should be avoided - const size_t channelAxis = parentNode ? parentNode->getFusingAxis() : MKLDNNNode::getFusingAxis(); + IE_ASSERT(parentNode); - for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) { + size_t fusingPort = 0; + const size_t channelAxis = parentNode->getFusingAxis(); + + for (size_t i = 0; i < getParentEdges().size(); i++) { MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get(); if (node == nullptr) { IE_THROW() << "Cannot get parent node for " << getName() << " on " << i << " port"; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp index 00e2da6c101..bbf30caeba1 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/convolution.cpp @@ -1156,6 +1156,20 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32, ConvolutionLayerCPUTest, ::testing::Values(cpuEmptyPluginConfig)), ConvolutionLayerCPUTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_FP32_fusingScaleShiftAndFakeQuantizePerChannel, ConvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine( + convParams_ExplicitPadding_3D, + ::testing::Values(ElementType::f32), + ::testing::Values(ElementType::undefined), + ::testing::Values(ElementType::undefined), + ::testing::ValuesIn(inputShapes3d), + ::testing::Values(CommonTestUtils::DEVICE_CPU)), + ::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D)), + ::testing::Values(fusingScaleShiftAndFakeQuantizePerChannel), + ::testing::Values(cpuEmptyPluginConfig)), + ConvolutionLayerCPUTest::getTestCaseName); + INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_BF16, ConvolutionLayerCPUTest, ::testing::Combine( ::testing::Combine( diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp index fb311d6eaf2..49f6a649d1f 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/matmul.cpp @@ -427,6 +427,7 @@ std::vector fusingParamsSet3D_smoke { fusingBias, fusingMultiplyPerChannel, fusingFakeQuantizePerChannel, + fusingScaleShiftAndFakeQuantizePerChannel, }; std::vector fusingParamsSet3D_nightly { @@ -866,6 +867,7 @@ std::vector matmulFusingParams { * so Relu cannot be fused in this case. Should be analysed */ // fusingFakeQuantizePerChannelRelu, fusingFakeQuantizePerTensorRelu, + fusingScaleShiftAndFakeQuantizePerChannel, }; const auto matMulParams = ::testing::Combine(::testing::ValuesIn(IS), diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp index 4a82b0adcf2..33c54170556 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp @@ -119,72 +119,72 @@ TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) { namespace { const auto fusingMulAddFQMullAdd = fusingSpecificParams{ std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Multiply(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Add(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Multiply(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Add(PerChannel)"}}), {"Add"} }; const auto fusingDivSubFQ = fusingSpecificParams{ std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + {[](postNodeConfig& cfg){ + ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.input); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Divide(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + {[](postNodeConfig& cfg){ + ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.input); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Subtract(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} }; const auto fusingSigmoidFQFQ = fusingSpecificParams{ std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sigmoid); }, "Sigmoid"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"Sigmoid", "FakeQuantize", "FakeQuantize"} }; const auto fusingClampFQ = fusingSpecificParams{ std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Clamp, {}, {3.0f, 6.0f}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Clamp, {}, {3.0f, 6.0f}); }, "Clamp"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"} }; diff --git a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp index 502779f21b2..ce4a4edd8cc 100644 --- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp +++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp @@ -114,24 +114,24 @@ std::string CPUTestsBase::impls2str(const std::vector &priority) { return str; } -void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const { +void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType) const { if (nodeType.empty()) return; ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined."; InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo(); auto function = execGraphInfo.getFunction(); - CheckPluginRelatedResultsImpl(function, std::move(nodeType)); + CheckPluginRelatedResultsImpl(function, nodeType); } -void CPUTestsBase::CheckPluginRelatedResults(ov::CompiledModel &execNet, std::string nodeType) const { +void CPUTestsBase::CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::string& nodeType) const { if (nodeType.empty()) return; ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined."; auto function = execNet.get_runtime_model(); - CheckPluginRelatedResultsImpl(function, std::move(nodeType)); + CheckPluginRelatedResultsImpl(function, nodeType); } -void CPUTestsBase::CheckPluginRelatedResultsImpl(std::shared_ptr function, std::string nodeType) const { +void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr& function, const std::string& nodeType) const { ASSERT_NE(nullptr, function); for (const auto &node : function->get_ops()) { const auto & rtInfo = node->get_rt_info(); @@ -140,7 +140,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(std::shared_ptrsecond.as(); }; - auto getExecValueOutputsLayout = [] (std::shared_ptr node) -> std::string { + auto getExecValueOutputsLayout = [] (const std::shared_ptr& node) -> std::string { auto rtInfo = node->get_rt_info(); auto it = rtInfo.find(ExecGraphInfoSerialization::OUTPUT_LAYOUTS); IE_ASSERT(rtInfo.end() != it); @@ -261,7 +261,9 @@ std::string CPUTestsBase::getPrimitiveType() const { } CPUTestsBase::CPUInfo -CPUTestsBase::makeCPUInfo(std::vector inFmts, std::vector outFmts, std::vector priority) { +CPUTestsBase::makeCPUInfo(const std::vector& inFmts, + const std::vector& outFmts, + const std::vector& priority) { CPUInfo cpuInfo; if (!inFmts.empty()) { diff --git a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp index 14d5fa04711..a95d2b3857f 100644 --- a/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp +++ b/src/tests/functional/plugin/cpu/test_utils/cpu_test_utils.hpp @@ -124,9 +124,9 @@ public: static cpu_memory_format_t cpu_str2fmt(const char *str); static std::string fmts2str(const std::vector &fmts, const std::string &prefix); static std::string impls2str(const std::vector &priority); - static CPUInfo makeCPUInfo(std::vector inFmts, - std::vector outFmts, - std::vector priority); + static CPUInfo makeCPUInfo(const std::vector& inFmts, + const std::vector& outFmts, + const std::vector& priority); //TODO: change to setter method static std::string makeSelectedTypeStr(std::string implString, ngraph::element::Type_t elType); @@ -136,11 +136,11 @@ public: const std::shared_ptr &lastNode, std::string name); - void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, std::string nodeType) const; - void CheckPluginRelatedResults(ov::CompiledModel &execNet, std::string nodeType) const; + void CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork &execNet, const std::string& nodeType) const; + void CheckPluginRelatedResults(const ov::CompiledModel &execNet, const std::string& nodeType) const; protected: - virtual void CheckPluginRelatedResultsImpl(std::shared_ptr function, std::string nodeType) const; + virtual void CheckPluginRelatedResultsImpl(const std::shared_ptr& function, const std::string& nodeType) const; /** * @brief This function modifies the initial single layer test graph to add any necessary modifications that are specific to the cpu test scope. * @param ngPrc Graph precision. diff --git a/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp b/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp index 4b16a5e131a..2b0e2106974 100644 --- a/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp +++ b/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.cpp @@ -36,7 +36,7 @@ CpuTestWithFusing::modifyGraph(const ngraph::element::Type &ngPrc, ngraph::Param return retNode; } -void CpuTestWithFusing::CheckFusingResults(std::shared_ptr function, std::string nodeType) const { +void CpuTestWithFusing::CheckFusingResults(const std::shared_ptr& function, const std::string& nodeType) const { ASSERT_NE(nullptr, function); bool isNodeFound = false; for (const auto & op : function->get_ops()) { @@ -55,7 +55,7 @@ void CpuTestWithFusing::CheckFusingResults(std::shared_ptr func std::string opFriendlyName = op->get_friendly_name(); auto pos = originalLayersNames.find(opFriendlyName); ASSERT_TRUE(pos != std::string::npos) << "Operation name " << op->get_friendly_name() << " has not been found in originalLayersNames!"; - for (auto fusedOp : fusedOps) { + for (const auto& fusedOp : fusedOps) { pos = originalLayersNames.find(fusedOp, checkFusingPosition ? pos : 0); ASSERT_TRUE(pos != std::string::npos) << "Fused op " << fusedOp << " has not been found!"; } @@ -64,7 +64,7 @@ void CpuTestWithFusing::CheckFusingResults(std::shared_ptr func ASSERT_TRUE(isNodeFound) << "Node type name: \"" << nodeType << "\" has not been found."; } -void CpuTestWithFusing::CheckPluginRelatedResultsImpl(std::shared_ptr function, std::string nodeType) const { +void CpuTestWithFusing::CheckPluginRelatedResultsImpl(const std::shared_ptr& function, const std::string& nodeType) const { CPUTestsBase::CheckPluginRelatedResultsImpl(function, nodeType); CheckFusingResults(function, nodeType); } @@ -87,8 +87,11 @@ std::shared_ptr postNodesMgr::addPostOps(const ngraph::element::Type &ngPrc, ngraph::ParameterVector ¶ms, const std::shared_ptr &lastNode) const { std::shared_ptr tmpNode = lastNode; - for (auto postNode : _postNodes) { - tmpNode = postNode.makeNode(tmpNode, ngPrc, params); + postNodeConfig cfg{lastNode, tmpNode, ngPrc, params}; + + for (const auto& postNode : _postNodes) { + cfg.input = tmpNode; + tmpNode = postNode.makeNode(cfg); } return tmpNode; } diff --git a/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp index b51c986d9be..9a1a0e55678 100644 --- a/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp +++ b/src/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp @@ -10,8 +10,15 @@ namespace CPUTestUtils { +struct postNodeConfig { + const std::shared_ptr target; + std::shared_ptr input; + const ngraph::element::Type& type; + ngraph::ParameterVector& params; +}; + struct postNodeBuilder { - std::function(std::shared_ptr, const ngraph::element::Type&, ngraph::ParameterVector&)> makeNode; + std::function(postNodeConfig& cfg)> makeNode; std::string name; }; @@ -26,7 +33,7 @@ public: class postFunctionMgr : public postOpMgr { public: - postFunctionMgr(std::shared_ptr function) : _pFunction(function) {} + postFunctionMgr(std::shared_ptr function) : _pFunction(std::move(function)) {} std::shared_ptr addPostOps(const ngraph::element::Type &ngPrc, ngraph::ParameterVector ¶ms, const std::shared_ptr &lastNode) const override; @@ -65,10 +72,10 @@ protected: ngraph::ParameterVector ¶ms, const std::shared_ptr &lastNode) override; - void CheckPluginRelatedResultsImpl(std::shared_ptr function, std::string nodeType) const override; + void CheckPluginRelatedResultsImpl(const std::shared_ptr& function, const std::string& nodeType) const override; private: - void CheckFusingResults(std::shared_ptr function, std::string nodeType) const; + void CheckFusingResults(const std::shared_ptr& function, const std::string& nodeType) const; protected: std::shared_ptr postOpMgrPtr; @@ -98,292 +105,311 @@ static ngraph::Shape generatePerChannelShape(const std::shared_ptr const auto emptyFusingSpec = fusingSpecificParams{nullptr, {}}; const auto fusingRelu = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu); }, "Relu"}}), {"Relu"}}; const auto fusingElu = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Elu, {}, {2.0f}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Elu, {}, {2.0f}); }, "Elu"}}), {"Elu"}}; const auto fusingGelu = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Gelu); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Gelu); }, "Gelu"}}), {"Gelu"}}; const auto fusingSigmoid = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sigmoid); }, "Sigmoid"}}), {"Sigmoid"}}; const auto fusingClamp = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Clamp, {}, {3.0f, 6.0f}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Clamp, {}, {3.0f, 6.0f}); }, "Clamp"}}), {"Clamp"}}; const auto fusingTanh = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Tanh); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Tanh); }, "Tanh"}}), {"Tanh"}}; const auto fusingAbs = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Abs); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Abs); }, "Abs"}}), {"Abs"}}; const auto fusingSqrt = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sqrt); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sqrt); }, "Sqrt"}}), {"Sqrt"}}; const auto fusingPReluPerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape newShape = generatePerChannelShape(inpNode); + {[](postNodeConfig& cfg){ + ngraph::Shape newShape = generatePerChannelShape(cfg.target); auto data = NGraphFunctions::Utils::generateVector(ngraph::shape_size(newShape)); - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data); + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::LeakyRelu, newShape, data); }, "PRelu(PerChannel)"}}), {"PRelu"}}; const auto fusingPReluPerTensor = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + {[](postNodeConfig& cfg){ ngraph::Shape shape(1, 1); auto data = NGraphFunctions::Utils::generateVector(ngraph::shape_size(shape)); - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, shape, data); + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::LeakyRelu, shape, data); }, "PRelu(PerTensor)"}}), {"PRelu"}}; const auto fusingSwish = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Swish, {}, {1.0f}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Swish, {}, {1.0f}); }, "Swish"}}), {"Swish"}}; const auto fusingSoftPlus = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::SoftPlus, {}, {}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::SoftPlus, {}, {}); }, "SoftPlus"}}), {"SoftPlus"}}; const auto fusingHSwish = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::HSwish, {}, {}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::HSwish, {}, {}); }, "HSwish"}}), {"HSwish"}}; const auto fusingMish = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Mish, {}, {}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Mish, {}, {}); }, "Mish"}}), {"Mish"}}; const auto fusingHSigmoid = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::HSigmoid); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::HSigmoid); }, "HSigmoid"}}), {"HSigmoid"}}; const auto fusingReluAdd = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu); }, "Relu"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg){ + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Add(PerChannel)"}}), {"Relu", "Add"}}; const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu); }, "Relu"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg){ + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Multiply(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg){ + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Add(PerChannel)"}}), {"Relu", "Add"}}; const auto fusingScaleShift = fusingSpecificParams{ std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Multiply(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Add(PerChannel)"}}), {"Add"} }; +const auto fusingScaleShiftAndFakeQuantizePerChannel = fusingSpecificParams{ std::make_shared(std::vector{ + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); + }, "Multiply(PerChannel)"}, + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); + }, "Add(PerChannel)"}, + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + // auto newShape = ngraph::Shape(cfg.inputNode->get_output_partial_shape(0).size(), 1); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); + }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}}; + const auto fusingFakeQuantizePerTensor = fusingSpecificParams{ std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape(inpNode->get_output_partial_shape(0).size(), 1); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape(cfg.input->get_output_partial_shape(0).size(), 1); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerTensor)"}}), {"FakeQuantize"} }; const auto fusingFakeQuantizePerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize"}}; const auto fusingFakeQuantizePerChannelRelu = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - ngraph::Shape newShape = generatePerChannelShape(inpNode); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + ngraph::Shape newShape = generatePerChannelShape(cfg.target); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu); }, "Relu"}}), {"FakeQuantize", "Relu"}}; const auto fusingFQPerChannelSigmoidFQPerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - auto shape = inpNode->get_output_partial_shape(0); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + auto shape = cfg.input->get_output_partial_shape(0); if (shape.size() == 1) IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; ngraph::Shape newShape(shape.size(), 1); newShape[1] = shape[1].get_length(); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Sigmoid); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Sigmoid); }, "Sigmoid"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto localPrc = inpNode->get_element_type(); - auto shape = inpNode->get_output_partial_shape(0); + {[](postNodeConfig& cfg){ + auto localPrc = cfg.input->get_element_type(); + auto shape = cfg.input->get_output_partial_shape(0); if (shape.size() == 1) IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; ngraph::Shape newShape(shape.size(), 1); newShape[1] = shape[1].get_length(); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerChannel)"}}), {"FakeQuantize", "Sigmoid", "FakeQuantize"}}; const auto fusingFakeQuantizePerTensorRelu = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - auto localPrc = inpNode->get_element_type(); - auto newShape = ngraph::Shape(inpNode->get_output_partial_shape(0).size(), 1); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg) { + auto localPrc = cfg.input->get_element_type(); + auto newShape = ngraph::Shape(cfg.input->get_output_partial_shape(0).size(), 1); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerTensor)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Relu); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Relu); }, "Relu"}}), {"FakeQuantize", "Relu"}}; const auto fusingSum = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); - ngraph::ParameterVector newParams = ngraph::builder::makeDynamicParams(ngPrc, {shape}); - params.insert(params.end(), newParams.begin(), newParams.end()); + {[](postNodeConfig& cfg){ + auto shape = cfg.input->get_output_partial_shape(0); + ngraph::ParameterVector newParams = ngraph::builder::makeDynamicParams(cfg.type, {shape}); + cfg.params.insert(cfg.params.end(), newParams.begin(), newParams.end()); auto newParamOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(newParams)); - return std::make_shared(inpNode, newParamOuts[0]); + return std::make_shared(cfg.input, newParamOuts[0]); }, "Add(Parameters)"}}), {"Add"}}; const auto fusingSumEluFQ = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); - ngraph::ParameterVector newParams = ngraph::builder::makeDynamicParams(ngPrc, {shape}); - params.insert(params.end(), newParams.begin(), newParams.end()); + {[](postNodeConfig& cfg){ + auto shape = cfg.input->get_output_partial_shape(0); + ngraph::ParameterVector newParams = ngraph::builder::makeDynamicParams(cfg.type, {shape}); + cfg.params.insert(cfg.params.end(), newParams.begin(), newParams.end()); auto newParamOuts = ngraph::helpers::convert2OutputVector( ngraph::helpers::castOps2Nodes(newParams)); - return std::make_shared(inpNode, newParamOuts[0]); + return std::make_shared(cfg.input, newParamOuts[0]); }, "Add(Parameters)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::Elu, {}, {2.0f}); + {[](postNodeConfig& cfg){ + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::Elu, {}, {2.0f}); }, "Elu"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - auto localPrc = inpNode->get_element_type(); - auto newShape = ngraph::Shape(inpNode->get_output_partial_shape(0).size(), 1); - return ngraph::builder::makeFakeQuantize(inpNode, localPrc, 256, newShape); + {[](postNodeConfig& cfg) { + auto localPrc = cfg.input->get_element_type(); + auto newShape = ngraph::Shape(cfg.input->get_output_partial_shape(0).size(), 1); + return ngraph::builder::makeFakeQuantize(cfg.input, localPrc, 256, newShape); }, "FakeQuantize(PerTensor)"}}), {"Add", "Elu", "FakeQuantize"}}; const auto fusingMultiplyPerTensor = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + {[](postNodeConfig& cfg){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Multiply(PerTensor)"}}), {"Multiply"}}; const auto fusingMultiplyPerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + {[](postNodeConfig& cfg){ + ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.target); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Multiply(PerChannel)"}}), {"Multiply"}}; const auto fusingAddPerTensor = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + {[](postNodeConfig& cfg){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Add(PerTensor)"}}), {"Add"}}; const auto fusingAddPerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + {[](postNodeConfig& cfg){ + ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.target); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Add(PerChannel)"}}), {"Add"}}; const auto fusingSubtractPerTensor = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + {[](postNodeConfig& cfg){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Subtract(PerTensor)"}}), {"Subtract"}}; const auto fusingSubtractPerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + {[](postNodeConfig& cfg){ + ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.target); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Subtract(PerChannel)"}}), {"Subtract"}}; const auto fusingDividePerTensor = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ + {[](postNodeConfig& cfg){ ngraph::Shape secondMultInShape(1, 1); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Divide(PerTensor)"}}), {"Divide"}}; const auto fusingDividePerChannel = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - ngraph::Shape secondMultInShape = generatePerChannelShape(inpNode); - auto secondMultInput = ngraph::builder::makeConstant(ngPrc, secondMultInShape, std::vector{}, true); - return std::make_shared(inpNode, secondMultInput); + {[](postNodeConfig& cfg){ + ngraph::Shape secondMultInShape = generatePerChannelShape(cfg.target); + auto secondMultInput = ngraph::builder::makeConstant(cfg.type, secondMultInShape, std::vector{}, true); + return std::make_shared(cfg.input, secondMultInput); }, "Divide(PerChannel)"}}), {"Divide"}}; const auto fusingPRelu1D = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); + {[](postNodeConfig& cfg){ + auto shape = cfg.input->get_output_partial_shape(0); ngraph::Shape newShape({static_cast(shape[1].get_length())}); auto data = NGraphFunctions::Utils::generateVector(ngraph::shape_size(newShape)); - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data); + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::LeakyRelu, newShape, data); }, "PRelu1D"}}), {"PRelu"}}; + const auto fusingPRelu1DScaleShift = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ - auto shape = inpNode->get_output_partial_shape(0); + {[](postNodeConfig& cfg){ + auto shape = cfg.input->get_output_partial_shape(0); ngraph::Shape newShape({static_cast(shape[1].get_length())}); auto data = NGraphFunctions::Utils::generateVector(ngraph::shape_size(newShape)); - return ngraph::builder::makeActivation(inpNode, ngPrc, ngraph::helpers::LeakyRelu, newShape, data); + return ngraph::builder::makeActivation(cfg.input, cfg.type, ngraph::helpers::LeakyRelu, newShape, data); }, "PRelu1D"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Multiply(PerChannel)"}, - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - ngraph::Shape newShape = generatePerChannelShape(inpNode); - auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector{}, true); - return std::make_shared(inpNode, constNode); + {[](postNodeConfig& cfg) { + ngraph::Shape newShape = generatePerChannelShape(cfg.input); + auto constNode = ngraph::builder::makeConstant(cfg.type, newShape, std::vector{}, true); + return std::make_shared(cfg.input, constNode); }, "Add(PerChannel)"}}), {"Add"} }; const auto fusingBias = fusingSpecificParams{std::make_shared(std::vector{ - {[](std::shared_ptr inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { - size_t last_dim = inpNode->get_output_partial_shape(0).rbegin()->get_length(); - auto bias = ngraph::builder::makeConstant(ngPrc, ngraph::Shape{last_dim}, std::vector{}, true); - return std::make_shared(inpNode, bias); - }, "fusingBias"}}), {"Add"}}; + {[](postNodeConfig& cfg) { + size_t last_dim = cfg.input->get_output_partial_shape(0).rbegin()->get_length(); + auto bias = ngraph::builder::makeConstant(cfg.type, ngraph::Shape{last_dim}, std::vector{}, true); + return std::make_shared(cfg.input, bias); + }, "fusingBias"}}), {"Add"}}; } // namespace CPUTestUtils From 4afd8667cfd41cbee3dfcc72d4fd3763fcc8e194 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Tue, 22 Feb 2022 12:05:18 +0300 Subject: [PATCH 050/310] DO detachment (#10577) --- .../dimension_tracking.hpp | 11 +--- .../dimension_tracking.cpp | 28 +++++++- src/plugins/auto_batch/auto_batch.cpp | 5 +- .../dimension_tracking.cpp | 65 ++++++++++++++++++- 4 files changed, 95 insertions(+), 14 deletions(-) diff --git a/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp b/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp index 123bf121f8e..a1137a70233 100644 --- a/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp @@ -17,7 +17,6 @@ namespace ov { namespace pass { class TRANSFORMATIONS_API FindBatch; -class TRANSFORMATIONS_API FindBatchDontTrack; } // namespace pass } // namespace ov @@ -25,15 +24,10 @@ class TRANSFORMATIONS_API FindBatchDontTrack; class ov::pass::FindBatch: public ov::pass::ModelPass { public: OPENVINO_RTTI("FindBatch"); - FindBatch(bool track = true) : track(track) {} + FindBatch(bool detach_detection_output = false, bool track = true) : track(track), detach_do(detach_detection_output) {} bool run_on_model(const std::shared_ptr& m) override; protected: - bool track = true; -}; - -class ov::pass::FindBatchDontTrack: public ov::pass::FindBatch { -public: - FindBatchDontTrack() : FindBatch(false) {} + bool track = true, detach_do = false; }; namespace ov { @@ -48,5 +42,6 @@ namespace batch_util { const std::map, ov::PartialShape>& parameter_to_shape, bool leave_batch_dynamic = true); bool check_batch_tracks_through_all_the_nodes(const std::shared_ptr& m); P2Btype find_batch(const std::shared_ptr &m); + bool detach_detection_output(const std::shared_ptr& f); } // namespace batch_util } // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp b/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp index edf96b55df1..197f7e38ecf 100644 --- a/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include "dimension_tracker.hpp" @@ -221,16 +222,41 @@ bool ov::batch_util::check_batch_tracks_through_all_the_nodes(const std::shared_ return failed_to_propagate_batch; } +bool ov::batch_util::detach_detection_output(const std::shared_ptr& f) { + ResultVector new_outputs, outputs_to_delete; + for (auto& result_node : f->get_results()) { + auto do_node = result_node->input_value(0).get_node_shared_ptr(); + if (ov::is_type(do_node)) // cases with do->convert->result + do_node = do_node->get_input_node_shared_ptr(0); + if (ov::is_type(do_node) || ov::is_type(do_node)) { + for (auto& new_result_src : do_node->input_values()) { + auto new_result = std::make_shared(new_result_src); + ngraph::copy_runtime_info(result_node, new_result); + new_outputs.push_back(new_result); + } + outputs_to_delete.push_back(result_node); + } + } + for (auto& result : outputs_to_delete) + f->remove_result(result); + f->add_results(new_outputs); + return !new_outputs.empty() || !outputs_to_delete.empty(); +} + bool ov::pass::FindBatch::run_on_model(const std::shared_ptr& m) { auto te = std::make_shared(); ov::DimensionTracker dt(te); + bool model_has_changed = false; + if (detach_do) + model_has_changed |= batch_util::detach_detection_output(m); + const auto& parameters = m->get_parameters(); std::map, PartialShape> parameter_to_shape; for (const auto& parameter : parameters) { auto shape = parameter->get_partial_shape(); if (shape.rank().is_dynamic()) - return false; + return model_has_changed; parameter_to_shape[parameter] = shape; } diff --git a/src/plugins/auto_batch/auto_batch.cpp b/src/plugins/auto_batch/auto_batch.cpp index b5b2665442b..872d0e3bb17 100644 --- a/src/plugins/auto_batch/auto_batch.cpp +++ b/src/plugins/auto_batch/auto_batch.cpp @@ -775,10 +775,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN // find the batch dim ov::pass::Manager m; m.register_pass(); - if (check_dims) - m.register_pass(); - else - m.register_pass(); + m.register_pass(true, check_dims); m.run_passes(function); // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts // input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl) diff --git a/src/tests/functional/inference_engine/transformations/common_optimizations/dimension_tracking.cpp b/src/tests/functional/inference_engine/transformations/common_optimizations/dimension_tracking.cpp index df5b6343c1b..ca65499ef84 100644 --- a/src/tests/functional/inference_engine/transformations/common_optimizations/dimension_tracking.cpp +++ b/src/tests/functional/inference_engine/transformations/common_optimizations/dimension_tracking.cpp @@ -224,7 +224,7 @@ TEST(TransformationTests, AutoBatch_FindBatch_NegativeTracking) { ov::pass::Manager m; m.register_pass(); - m.register_pass(); + m.register_pass(false, false); m.run_passes(f); ASSERT_NO_THROW(check_rt_info(f)); @@ -238,6 +238,69 @@ TEST(TransformationTests, AutoBatch_FindBatch_NegativeTracking) { ASSERT_TRUE(!ov::DimensionTracker::get_label(out_shape[0])) << out_shape; } +TEST(TransformationTests, AutoBatch_FindBatch_AutoBatch_LabelPropagation_DO_detachment) { + const auto& data = std::make_shared(ov::element::f32, ov::Shape{1, 4, 10, 10}); + + const auto& constant_0 = std::make_shared(ov::element::f32, ov::Shape{1, 1, 1, 1}); + const auto& mul_0 = std::make_shared(data, constant_0); + + const auto& filters = std::make_shared(ov::element::f32, ov::Shape{1, 4, 1, 1}); + const auto& conv = std::make_shared( + mul_0, filters, ov::Strides{1, 1}, ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0}, ov::Strides{1, 1}); + + const auto& box_logits_reshape = std::make_shared( + ov::element::i64, ov::Shape{2}, std::vector{0, -1}); + const auto& box_logits = std::make_shared(conv, box_logits_reshape, true); + + const auto& four_times = std::make_shared(box_logits, std::make_shared( + ov::element::i64, ov::Shape{2}, std::vector{1, 4})); + + const auto& third_input_reshape = std::make_shared( + ov::element::i64, ov::Shape{3}, std::vector{0, 1, -1}); + const auto& third_input = std::make_shared(four_times, third_input_reshape, true); + + ngraph::op::DetectionOutput::Attributes attr; + attr.num_classes = 4; + attr.background_label_id = 0; + attr.top_k = 75; + attr.variance_encoded_in_target = true; + attr.keep_top_k = {50}; + attr.code_type = std::string{"caffe.PriorBoxParameter.CORNER"}; + attr.share_location = true; + attr.nms_threshold = 0.5f; + attr.confidence_threshold = 0.5f; + attr.clip_after_nms = false; + attr.clip_before_nms = false; + attr.decrease_label_id = false; + attr.normalized = true; + attr.input_height = 1; + attr.input_width = 1; + attr.objectness_score = 0.4f; + + const auto& detection = std::make_shared(four_times, four_times, third_input, attr); + const auto& convert = std::make_shared(detection, ov::element::f32); + + const auto& f = std::make_shared(ov::NodeVector{convert}, ov::ParameterVector{data}); + + ov::pass::Manager m; + m.register_pass(); + m.register_pass(true); + m.run_passes(f); + ASSERT_NO_THROW(check_rt_info(f)); + + const auto& shape = data->get_partial_shape(); + ASSERT_TRUE(ov::DimensionTracker::get_label(shape[0])) << shape; + ASSERT_TRUE(!ov::DimensionTracker::get_label(shape[1])) << shape; + ASSERT_TRUE(!ov::DimensionTracker::get_label(shape[2])) << shape; + ASSERT_TRUE(!ov::DimensionTracker::get_label(shape[3])) << shape; + ASSERT_EQ(f->get_results().size(), 3); + for (const auto& result : f->get_results()) { + const auto& out_shape = result->get_output_partial_shape(0); + ASSERT_TRUE(ov::DimensionTracker::get_label(out_shape[0])) << out_shape; + ASSERT_TRUE(!ov::DimensionTracker::get_label(out_shape[1])) << out_shape; + } +} + TEST(partial_shape, cout_with_label) { ov::Dimension a = 5; ov::DimensionTracker::set_label(a, 100500); From 100fff83bf211fb44f9476ce3fe620a55506c87c Mon Sep 17 00:00:00 2001 From: Nikolay Tyukaev Date: Tue, 22 Feb 2022 12:05:55 +0300 Subject: [PATCH 051/310] dynamic title tag (#10575) * dynamic title tag * dynamic title tag --- docs/_static/js/custom.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/_static/js/custom.js b/docs/_static/js/custom.js index 8b027164517..bfc54b881be 100644 --- a/docs/_static/js/custom.js +++ b/docs/_static/js/custom.js @@ -27,6 +27,7 @@ var wapSection = 'openvinotoolkit'; $(document).ready(function () { createVersions(); + updateTitleTag(); init_col_sections(); init_switchers(); handleSwitcherParam(); @@ -44,6 +45,13 @@ function getPageUrlWithVersion(version) { return encodeURI(newURL); } +function updateTitleTag() { + var title = $('title'); + var currentVersion = getCurrentVersion(); + var newTitle = (title.text() + ' — Version(' + currentVersion + ')').replace(/\s+/g, ' ').trim(); + title.text(newTitle); +} + function getCurrentVersion() { var protocol = window.location.protocol + "//"; var index = window.location.href.indexOf(protocol); From 5247fdfcaf7d47e160491b2de673bd5a85f852fb Mon Sep 17 00:00:00 2001 From: Maxim Shevtsov Date: Tue, 22 Feb 2022 12:15:19 +0300 Subject: [PATCH 052/310] avoiding layouts (#10560) --- src/plugins/auto_batch/auto_batch.cpp | 128 ++++++++++++++++++++++---- src/plugins/auto_batch/auto_batch.hpp | 18 +++- 2 files changed, 122 insertions(+), 24 deletions(-) diff --git a/src/plugins/auto_batch/auto_batch.cpp b/src/plugins/auto_batch/auto_batch.cpp index 872d0e3bb17..53c3ef7fca4 100644 --- a/src/plugins/auto_batch/auto_batch.cpp +++ b/src/plugins/auto_batch/auto_batch.cpp @@ -30,19 +30,19 @@ using namespace InferenceEngine; std::vector supported_configKeys = {CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG), CONFIG_KEY(AUTO_BATCH_TIMEOUT)}; template -Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, size_t batch_id, size_t batch_num) { +Blob::Ptr create_shared_blob_on_top_of_batched_blob(Blob::Ptr batched_blob, + std::string name, + const std::set& batched_names, + size_t batch_id, + size_t batch_num) { typedef typename PrecisionTrait::value_type TYPE; typedef typename std::add_pointer::type TYPEPTR; auto ptr = batched_blob->buffer().as(); auto sizePerBatch = batched_blob->size() / batch_num; - auto layout = batched_blob->getTensorDesc().getLayout(); SizeVector dims = batched_blob->getTensorDesc().getDims(); // for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim - if (layout == InferenceEngine::Layout::NC || layout == InferenceEngine::Layout::NCDHW || - layout == InferenceEngine::Layout::NCHW || layout == InferenceEngine::Layout::NHWC || - layout == InferenceEngine::Layout::NDHWC) { + if (batched_names.count(name)) { dims[0] = 1; - assert(batched_blob->getTensorDesc().getPrecision() == precision); return make_shared_blob({precision, dims, batched_blob->getTensorDesc().getLayout()}, ptr + sizePerBatch * batch_id, sizePerBatch); @@ -57,27 +57,32 @@ AutoBatchInferRequest::AutoBatchInferRequest(const std::vector>& outputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, int batch_id, - int num_batch) + int num_batch, + const std::set& batchedInputs, + const std::set& batchedOutputs) : IInferRequestInternal(inputs, outputs), _myBatchedRequestWrapper(workerRequest), _batchId(batch_id), _batchSize(num_batch) { - ShareBlobsWithBatchRequest(); + ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs); } AutoBatchInferRequest::AutoBatchInferRequest(const InputsDataMap& networkInputs, const OutputsDataMap& networkOutputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequest, int batch_id, - int num_batch) + int num_batch, + const std::set& batchedInputs, + const std::set& batchedOutputs) : IInferRequestInternal(networkInputs, networkOutputs), _myBatchedRequestWrapper(workerRequest), _batchId(batch_id), _batchSize(num_batch) { - ShareBlobsWithBatchRequest(); + ShareBlobsWithBatchRequest(batchedInputs, batchedOutputs); } -void AutoBatchInferRequest::ShareBlobsWithBatchRequest() { +void AutoBatchInferRequest::ShareBlobsWithBatchRequest(const std::set& batchedInputs, + const std::set& batchedOutputs) { // Allocate all input blobs for (const auto& it : _networkInputs) { auto blob = _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first); @@ -86,78 +91,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() { case InferenceEngine::Precision::FP32: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I32: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I8: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U32: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::FP64: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::FP16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::BF16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U64: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I64: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U8: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; case InferenceEngine::Precision::BOOL: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedInputs, _batchId, _batchSize); break; @@ -174,78 +205,104 @@ void AutoBatchInferRequest::ShareBlobsWithBatchRequest() { case InferenceEngine::Precision::FP32: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I32: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I8: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U32: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::FP64: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::FP16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::BF16: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U64: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::I64: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::U8: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; case InferenceEngine::Precision::BOOL: res = create_shared_blob_on_top_of_batched_blob( _myBatchedRequestWrapper._inferRequestBatched->GetBlob(it.first), + it.first, + batchedOutputs, _batchId, _batchSize); break; @@ -371,12 +428,16 @@ AutoBatchExecutableNetwork::AutoBatchExecutableNetwork( const InferenceEngine::SoExecutableNetworkInternal& networkWithBatch, const InferenceEngine::SoExecutableNetworkInternal& networkWithoutBatch, const DeviceInformation& networkDevice, - const std::unordered_map& config) + const std::unordered_map& config, + const std::set& batchedInputs, + const std::set& batchedOutputs) : InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared()), _network{networkWithBatch}, _networkWithoutBatch{networkWithoutBatch}, - _config{config} { + _config{config}, + _batchedInputs(batchedInputs), + _batchedOutputs(batchedOutputs) { // WA for gcc 4.8 ( fails compilation with member init-list) _device = networkDevice; auto time_out = config.find(CONFIG_KEY(AUTO_BATCH_TIMEOUT)); @@ -411,7 +472,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn networkOutputs, workerRequestPtrAndId.first, workerRequestPtrAndId.second, - _device.batchForDevice); + _device.batchForDevice, + _batchedInputs, + _batchedOutputs); } InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateInferRequestImpl( @@ -427,7 +490,9 @@ InferenceEngine::IInferRequestInternal::Ptr AutoBatchExecutableNetwork::CreateIn outputs, workerRequestPtrAndId.first, workerRequestPtrAndId.second, - _device.batchForDevice); + _device.batchForDevice, + _batchedInputs, + _batchedOutputs); } std::pair AutoBatchExecutableNetwork::GetWorkerInferRequest() { @@ -761,6 +826,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN deviceConfigNoAutoBatch[CONFIG_KEY(ALLOW_AUTO_BATCHING)] = CONFIG_VALUE(NO); std::set batched_inputs; + std::set batched_outputs; // check that the auto-batching is applicable in general try { // if applicable, the Auto-Batching is implicitly enabled via the performance hints @@ -768,7 +834,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN const bool bTputInPlg = core->GetConfig(deviceName, CONFIG_KEY(PERFORMANCE_HINT)).as() == tput; const auto& mode = deviceConfig.find(CONFIG_KEY(PERFORMANCE_HINT)); const bool bTputInLoadCfg = (mode != deviceConfig.end() && mode->second == tput); - // if the auto-batching is enabled implicitly, we shall check the dims carefully, to avoid outstanding failures + // if the auto-batching is enabled implicitly, check the dims carefully, to avoid outstanding failures const bool check_dims = (bTputInPlg || bTputInLoadCfg); CNNNetwork clonedNetwork(InferenceEngine::details::cloneNetwork(network)); auto function = clonedNetwork.getFunction(); @@ -778,7 +844,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN m.register_pass(true, check_dims); m.run_passes(function); // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts - // input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl) + // input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl) const auto& params = function->get_parameters(); for (size_t input_id = 0; input_id < params.size(); input_id++) { const auto& input = params[input_id]; @@ -801,8 +867,28 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN << "Auto-batching operates only networks with inputs/outputs batched by 0th dimension"; } } - if (!batched_inputs.size()) - IE_THROW(NotImplemented) << "Auto-batching supports only networks with inputs featuring batched dim!"; + const auto& results = function->get_results(); + for (size_t output_id = 0; output_id < results.size(); output_id++) { + const auto& output = results[output_id]; + const auto& shape = output->get_output_partial_shape(0); + // check the batch dim: either 0th (and the original batch size of 1) or none + if (shape.size() && ov::DimensionTracker::get_label(shape[0])) { + if (shape[0] != 1) + IE_THROW(NotImplemented) << "Auto-batching does not reshape/re-batch originally batched networks!"; + const auto& node = output->input_value(0); + batched_outputs.insert(ngraph::op::util::get_ie_output_name( + ov::Output(node.get_node(), node.get_index()))); + } else { + // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch + for (size_t s = 1; s < shape.size(); s++) + if (ov::DimensionTracker::get_label(shape[s])) + IE_THROW(NotImplemented) + << "Auto-batching operates only networks with outputs batched by 0th dimension"; + } + } + if (!batched_inputs.size() || !batched_outputs.size()) + IE_THROW(NotImplemented) + << "Auto-batching supports only networks with inputs/outputs featuring batched dim!"; } catch (...) { metaDevice.batchForDevice = 1; } @@ -878,7 +964,9 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN return std::make_shared(executableNetworkWithBatch, executableNetworkWithoutBatch, metaDevice, - networkConfig); + networkConfig, + batched_inputs, + batched_outputs); } InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadExeNetworkImpl( diff --git a/src/plugins/auto_batch/auto_batch.hpp b/src/plugins/auto_batch/auto_batch.hpp index e5aff7f3c0f..0994d4377f7 100644 --- a/src/plugins/auto_batch/auto_batch.hpp +++ b/src/plugins/auto_batch/auto_batch.hpp @@ -49,7 +49,9 @@ public: const InferenceEngine::SoExecutableNetworkInternal& networkForDevice, const InferenceEngine::SoExecutableNetworkInternal& networkForDeviceWithoutBatch, const DeviceInformation& networkDevices, - const std::unordered_map& config); + const std::unordered_map& config, + const std::set& batchedIntputs, + const std::set& batchedOutputs); void SetConfig(const std::map& config) override; InferenceEngine::Parameter GetConfig(const std::string& name) const override; @@ -80,6 +82,9 @@ protected: bool _needPerfCounters = false; std::atomic_size_t _numRequestsCreated = {0}; std::atomic_int _timeOut = {0}; // in ms + + const std::set _batchedInputs; + const std::set _batchedOutputs; }; class AutoBatchInferRequest : public InferenceEngine::IInferRequestInternal { @@ -89,12 +94,16 @@ public: const InferenceEngine::OutputsDataMap& networkOutputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, int batch_id, - int num_batch); + int num_batch, + const std::set& batchedIntputs, + const std::set& batchedOutputs); explicit AutoBatchInferRequest(const std::vector>& inputs, const std::vector>& outputs, AutoBatchExecutableNetwork::WorkerInferRequest& workerRequestPtr, int batch_id, - int num_batch); + int num_batch, + const std::set& batchedIntputs, + const std::set& batchedOutputs); // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request) void SetBlobsToAnotherRequest(InferenceEngine::SoIInferRequestInternal& req); @@ -110,7 +119,8 @@ public: protected: void CopyBlobIfNeeded(InferenceEngine::Blob::CPtr src, InferenceEngine::Blob::Ptr dst, bool bInput); - void ShareBlobsWithBatchRequest(); + void ShareBlobsWithBatchRequest(const std::set& batchedIntputs, + const std::set& batchedOutputs); size_t _batchId; size_t _batchSize; }; From 472ebc0cd91d9dba0909c8207700c8b0de690784 Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Tue, 22 Feb 2022 12:20:32 +0300 Subject: [PATCH 053/310] [TF FE] Add translators for ScatterND, Conv3DBackpropInputV2 ops (#10550) * Add translators for ScatterND, ConvBackpropInputV2 ops * add a new line --- .../tensorflow/src/op/conv_3d_backprop.cpp | 104 ++++++++++++++++++ .../tensorflow/src/op/scatter_nd.cpp | 29 +++++ src/frontends/tensorflow/src/op_table.cpp | 4 + 3 files changed, 137 insertions(+) create mode 100644 src/frontends/tensorflow/src/op/conv_3d_backprop.cpp create mode 100644 src/frontends/tensorflow/src/op/scatter_nd.cpp diff --git a/src/frontends/tensorflow/src/op/conv_3d_backprop.cpp b/src/frontends/tensorflow/src/op/conv_3d_backprop.cpp new file mode 100644 index 00000000000..c29c881bd9d --- /dev/null +++ b/src/frontends/tensorflow/src/op/conv_3d_backprop.cpp @@ -0,0 +1,104 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op_table.hpp" +#include "openvino/opsets/opset8.hpp" + +using namespace std; +using namespace ov::opset8; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { + +OutputVector translate_conv_3d_backprop_input_v2_op(const NodeContext& node) { + auto ng_filter = node.get_input(1); + auto ng_out_backprop = node.get_input(2); + + // TODO: refactor me to be less redundant with other convolution ops + auto tf_strides = node.get_attribute>("strides"); + auto tf_dilations = node.get_attribute>("dilations"); + auto tf_padding_type = node.get_attribute("padding"); + auto tf_data_format = node.get_attribute("data_format"); + + TENSORFLOW_OP_VALIDATION(node, + tf_data_format == "NDHWC" || tf_data_format == "NCDHW", + "Conv3DBackpropInputV2 data format is neither NDHWC nor NCDHW. " + "Provided data format: ", + tf_data_format); + + std::vector tf_input_sizes; + get_const_input(node, 0, &tf_input_sizes); + + if (std::any_of(tf_input_sizes.begin(), tf_input_sizes.end(), [](int32_t size) { + return size <= 0; + })) { + FRONT_END_THROW("Conv3DBackpropInputV2 input sizes must be positive integers"); + } + + bool is_ndhwc = (tf_data_format == "NDHWC"); + + ov::Strides ng_strides(3); + ov::Strides ng_dilations(3); + ov::Shape ng_image_shape(3); + ov::Shape ng_kernel_shape(3); + ov::Shape ng_batch_shape(5); + + convert_nhwc_to_hw(is_ndhwc, tf_strides, ng_strides); + convert_nhwc_to_hw(is_ndhwc, tf_dilations, ng_dilations); + convert_nhwc_to_hw(is_ndhwc, tf_input_sizes, ng_image_shape); + convert_nhwc_to_nchw(node.get_name(), is_ndhwc, ng_out_backprop); + if (is_ndhwc) { + ng_batch_shape = {static_cast(tf_input_sizes[0]), + static_cast(tf_input_sizes[4]), + static_cast(tf_input_sizes[1]), + static_cast(tf_input_sizes[2]), + static_cast(tf_input_sizes[3])}; + } else { + ng_batch_shape = {static_cast(tf_input_sizes[0]), + static_cast(tf_input_sizes[1]), + static_cast(tf_input_sizes[2]), + static_cast(tf_input_sizes[3]), + static_cast(tf_input_sizes[4])}; + } + + auto& ng_filter_shape = ng_filter.get_shape(); + ng_kernel_shape[0] = ng_filter_shape[0]; + ng_kernel_shape[1] = ng_filter_shape[1]; + ng_kernel_shape[2] = ng_filter_shape[2]; + transpose_3d<4, 3, 0, 1, 2>(ng_filter); + + ov::CoordinateDiff ng_padding_below; + ov::CoordinateDiff ng_padding_above; + + make_padding(tf_padding_type, + ng_image_shape, + ng_kernel_shape, + ng_strides, + ng_dilations, + ng_padding_below, + ng_padding_above); + + auto ng_output_shape = make_shared(element::i64, + Shape{ng_batch_shape.size() - 2}, + vector(ng_batch_shape.begin() + 2, ng_batch_shape.end())); + + auto res_node = make_shared(ng_out_backprop, + ng_filter, + ng_output_shape, + ng_strides, + ng_padding_below, + ng_padding_above, + ng_dilations); + auto res = res_node->output(0); + + convert_nchw_to_nhwc(node.get_name(), is_ndhwc, res); + set_node_name(node.get_name(), res.get_node_shared_ptr()); + return {res}; +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op/scatter_nd.cpp b/src/frontends/tensorflow/src/op/scatter_nd.cpp new file mode 100644 index 00000000000..9eb95b7538a --- /dev/null +++ b/src/frontends/tensorflow/src/op/scatter_nd.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "op_table.hpp" +#include "openvino/opsets/opset8.hpp" + +using namespace std; +using namespace ov::opset8; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_scatter_nd_op(const NodeContext& node) { + auto input_indices = node.get_input(0); + auto updates = node.get_input(1); + auto shape = node.get_input(2); + + auto input_data = make_shared(updates.get_element_type(), Shape{1}, 0); + auto broadcast = make_shared(input_data, shape); + auto res = make_shared(broadcast, input_indices, updates); + set_node_name(node.get_name(), res); + return res->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index aeeb0983278..12c26217117 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -33,6 +33,7 @@ OP_CONVERTER(translate_const_op); OP_CONVERTER(translate_conv_2d_op); OP_CONVERTER(translate_conv_2d_backprop_input_op); OP_CONVERTER(translate_conv_3d_op); +OP_CONVERTER(translate_conv_3d_backprop_input_v2_op); OP_CONVERTER(translate_cumsum_op); OP_CONVERTER(translate_crop_and_resize_op); OP_CONVERTER(translate_depth_to_space_op); @@ -73,6 +74,7 @@ OP_CONVERTER(translate_reverse_op); OP_CONVERTER(translate_roll_op); OP_CONVERTER(translate_round_op); OP_CONVERTER(translate_rsqrt_op); +OP_CONVERTER(translate_scatter_nd_op); OP_CONVERTER(translate_select_op); OP_CONVERTER(translate_shape_op); OP_CONVERTER(translate_size_op); @@ -167,6 +169,7 @@ const std::map get_supported_ops() { {"Conv2D", translate_conv_2d_op}, {"Conv2DBackpropInput", translate_conv_2d_backprop_input_op}, {"Conv3D", translate_conv_3d_op}, + {"Conv3DBackpropInputV2", translate_conv_3d_backprop_input_v2_op}, {"CropAndResize", translate_crop_and_resize_op}, {"Cumsum", translate_cumsum_op}, {"DepthToSpace", translate_depth_to_space_op}, @@ -220,6 +223,7 @@ const std::map get_supported_ops() { {"Roll", translate_roll_op}, {"Round", translate_round_op}, {"Rsqrt", translate_rsqrt_op}, + {"ScatterNd", translate_scatter_nd_op}, {"Select", translate_select_op}, {"SelectV2", translate_select_op}, {"Shape", translate_shape_op}, From 3f56438d061f35e862d4a8cd0a9027f3d59757e5 Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Tue, 22 Feb 2022 12:42:24 +0300 Subject: [PATCH 054/310] [CPU] Align return types handling for all the new API parameters (#10363) --- .../test_compiled_model.py | 4 ++-- src/plugins/intel_cpu/src/exec_network.cpp | 15 +++++++----- src/plugins/intel_cpu/src/plugin.cpp | 24 +++++++++++-------- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/bindings/python/tests/test_inference_engine/test_compiled_model.py b/src/bindings/python/tests/test_inference_engine/test_compiled_model.py index 60b5f44e8fc..e0bb872bf4d 100644 --- a/src/bindings/python/tests/test_inference_engine/test_compiled_model.py +++ b/src/bindings/python/tests/test_inference_engine/test_compiled_model.py @@ -29,8 +29,8 @@ def test_get_property(device): pytest.skip("Can't run on ARM plugin due-to CPU dependent test") func = core.read_model(model=test_net_xml, weights=test_net_bin) exec_net = core.compile_model(func, device) - config = exec_net.get_property("PERF_COUNT") - assert config == "NO" + profiling_enabled = exec_net.get_property("PERF_COUNT") + assert not profiling_enabled def test_get_runtime_model(device): diff --git a/src/plugins/intel_cpu/src/exec_network.cpp b/src/plugins/intel_cpu/src/exec_network.cpp index b2f20cd9020..5116c57989a 100644 --- a/src/plugins/intel_cpu/src/exec_network.cpp +++ b/src/plugins/intel_cpu/src/exec_network.cpp @@ -26,6 +26,7 @@ #include "cpp_interfaces/interface/ie_iplugin_internal.hpp" #include "ie_icore.hpp" #include "openvino/runtime/properties.hpp" +#include "openvino/util/common_util.hpp" #include #include @@ -310,7 +311,8 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name) if (name == ov::model_name) { // @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name - return graph.dump()->get_friendly_name(); + const std::string modelName = graph.dump()->get_friendly_name(); + return decltype(ov::model_name)::value_type(modelName); } else if (name == ov::optimal_number_of_infer_requests) { const auto streams = config.streamExecutorConfig._streams; return decltype(ov::optimal_number_of_infer_requests)::value_type(streams); // ov::optimal_number_of_infer_requests has no negative values @@ -332,19 +334,20 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name) return ov::Affinity::NONE; } else if (name == ov::inference_num_threads) { const auto num_threads = config.streamExecutorConfig._threads; - return num_threads; + return decltype(ov::inference_num_threads)::value_type(num_threads); } else if (name == ov::enable_profiling.name()) { const bool perfCount = config.collectPerfCounters; - return perfCount ? "YES" : "NO"; + return decltype(ov::enable_profiling)::value_type(perfCount); } else if (name == ov::hint::inference_precision) { const auto enforceBF16 = config.enforceBF16; - return enforceBF16 ? ov::element::bf16 : ov::element::f32; + const auto inference_precision = enforceBF16 ? ov::element::bf16 : ov::element::f32; + return decltype(ov::hint::inference_precision)::value_type(inference_precision); } else if (name == ov::hint::performance_mode) { - const auto perfHint = config.perfHintsConfig.ovPerfHint; + const auto perfHint = ov::util::from_string(config.perfHintsConfig.ovPerfHint, ov::hint::performance_mode); return perfHint; } else if (name == ov::hint::num_requests) { const auto perfHintNumRequests = config.perfHintsConfig.ovPerfHintNumRequests; - return perfHintNumRequests; + return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests); } /* Internally legacy parameters are used with new API as part of migration procedure. * This fallback can be removed as soon as migration completed */ diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index b1e46577633..afe9058d4a7 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -106,6 +106,7 @@ #include #include #include "openvino/runtime/core.hpp" +#include "openvino/util/common_util.hpp" #include #include "performance_heuristics.hpp" @@ -568,6 +569,7 @@ void Engine::ApplyPerformanceHints(std::map &config, c if (mode_name == CONFIG_VALUE(LATENCY)) { config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA); + config[ov::num_streams.name()] = ov::util::to_string(ov::NumStreams(ov::NumStreams::NUMA)); } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) { const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; @@ -625,6 +627,7 @@ void Engine::ApplyPerformanceHints(std::map &config, c engConfig.perfHintsConfig.ovPerfHintNumRequests); } config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams); + config[ov::num_streams.name()] = ov::util::to_string(ov::NumStreams(num_streams)); } } @@ -745,11 +748,11 @@ Parameter Engine::GetConfig(const std::string& name, const std::map(); + const auto perfHint = ov::util::from_string(engConfig.perfHintsConfig.ovPerfHint, ov::hint::performance_mode); + return perfHint; } else if (name == ov::hint::num_requests) { const auto perfHintNumRequests = engConfig.perfHintsConfig.ovPerfHintNumRequests; return decltype(ov::hint::num_requests)::value_type(perfHintNumRequests); @@ -840,12 +843,13 @@ Parameter Engine::GetMetric(const std::string& name, const std::map availableDevices = { "" }; - return availableDevices; + return decltype(ov::available_devices)::value_type(availableDevices); } else if (name == ov::device::capabilities) { std::vector capabilities; if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16)) @@ -857,13 +861,13 @@ Parameter Engine::GetMetric(const std::string& name, const std::map range = std::make_tuple(1, 1, 1); - return range; + return decltype(ov::range_for_async_infer_requests)::value_type(range); } else if (name == ov::range_for_streams) { const std::tuple range = std::make_tuple(1, parallel_get_max_threads()); - return range; + return decltype(ov::range_for_streams)::value_type(range); } /* Internally legacy parameters are used with new API as part of migration procedure. * This fallback can be removed as soon as migration completed */ From 171ad9536fce215e745aa91cdcaf5f6947ba0f94 Mon Sep 17 00:00:00 2001 From: Mikhail Letavin Date: Tue, 22 Feb 2022 12:45:32 +0300 Subject: [PATCH 055/310] [GPU] Disable unrolling by default for LSTMsequence and TensorIterator having length>=16 (#10406) --- .../intel_gpu/src/plugin/transformations_pipeline.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 7fbc43833b1..ddcdaf5e71d 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -233,6 +233,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { } else if (const auto &lstm_seq = std::dynamic_pointer_cast(node)) { return lstm_seq->get_clip() == 0.0f && lstm_seq->get_activations() == std::vector{"sigmoid", "tanh", "tanh"} && + max_seq_len < 16 && !ngraph::op::util::is_seq_len_provided(lstm_seq->get_input_node_shared_ptr(3), max_seq_len); } @@ -459,10 +460,9 @@ void TransformationsPipeline::apply(std::shared_ptr func) { [this](const std::shared_ptr &node) -> bool { auto sub_graph_op = std::dynamic_pointer_cast(node); int64_t num_iter = sub_graph_op->get_num_iterations(); - if (num_iter == 1) { - return false; - } - return !config.enable_loop_unrolling; + if (!config.enable_loop_unrolling) + return num_iter != 1; + return num_iter >= 16; }); manager.register_pass(); From d57fb75ba63be8f59f8a5449b5f76a8f7ce35e9f Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 22 Feb 2022 12:58:07 +0300 Subject: [PATCH 056/310] migration to OV2.0 (#10562) --- .../include/matchers/base_matcher.hpp | 12 +- .../include/matchers/convolutions.hpp | 8 +- .../include/matchers/matchers_manager.hpp | 12 +- .../include/matchers/single_op.hpp | 24 ++-- .../subgraphs_dumper/include/op_cloner.hpp | 4 +- .../subgraphs_dumper/include/ops_cache.hpp | 10 +- .../src/matchers/base_matcher.cpp | 2 +- .../src/matchers/convolutions.cpp | 20 ++-- .../src/matchers/matchers_manager.cpp | 12 +- .../src/matchers/single_op.cpp | 94 +++++++-------- .../subgraphs_dumper/src/op_cloner.cpp | 108 +++++++++--------- .../subgraphs_dumper/src/ops_cache.cpp | 48 ++++---- .../tests/matchers/convolutions_matcher.cpp | 76 ++++++------ .../tests/matchers/generic_single_op.cpp | 70 ++++++------ .../tests/matchers/matchers_config.cpp | 8 +- 15 files changed, 250 insertions(+), 258 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp index 4b6ce7d5adc..5954319880e 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/base_matcher.hpp @@ -37,7 +37,7 @@ public: bool is_fallback_config; bool ignore_matching = false; - virtual bool op_in_config(const std::shared_ptr &node) = 0; + virtual bool op_in_config(const std::shared_ptr &node) = 0; virtual ~iMatcherConfig() = default; }; @@ -54,8 +54,8 @@ public: MatcherConfig(bool ignore_matching) : iMatcherConfig({}, {}, sizeof...(OPTypes) == 0, ignore_matching) {} - bool op_in_config(const std::shared_ptr &node) override { - std::initializer_list vals{(ngraph::is_type(node))...}; + bool op_in_config(const std::shared_ptr &node) override { + std::initializer_list vals{(ov::is_type(node))...}; return std::any_of(vals.begin(), vals.end(), [](bool i) { return i; }); }; }; @@ -66,8 +66,8 @@ class Matcher { friend class MatchersManager; public: - virtual bool match(const std::shared_ptr &node, - const std::shared_ptr &ref, + virtual bool match(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const = 0; virtual ~Matcher() = default; @@ -75,7 +75,7 @@ public: protected: virtual void configure(const pugi::xml_document &cfg) = 0; - iMatcherConfig::Ptr get_config(const std::shared_ptr &node) const; + iMatcherConfig::Ptr get_config(const std::shared_ptr &node) const; std::vector default_configs; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/convolutions.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/convolutions.hpp index 29e0cd574e8..dff594200e1 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/convolutions.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/convolutions.hpp @@ -9,11 +9,11 @@ class ConvolutionsMatcher : public SingleOpMatcher { public: ConvolutionsMatcher(); - bool match_inputs(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match_inputs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const override; - bool match(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const override; protected: diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/matchers_manager.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/matchers_manager.hpp index 27446072cae..e48f117adc1 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/matchers_manager.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/matchers_manager.hpp @@ -19,20 +19,20 @@ public: explicit MatchersManager(const std::string &cfg_path = {}); - bool match_all(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match_all(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info); - bool match_any(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match_any(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info); // TODO: Implement default xml config file generation by Matchers void generate_config() {} private: - std::vector run_matchers(const std::shared_ptr &node, - const std::shared_ptr &ref, + std::vector run_matchers(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info); // TODO: No copy constructor for xml_document // pugi::xml_document m_cfg; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/single_op.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/single_op.hpp index 490e582bae5..06e15cf99e2 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/single_op.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/matchers/single_op.hpp @@ -17,24 +17,24 @@ class SingleOpMatcher : public Matcher { public: SingleOpMatcher(); - bool match(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const override; - bool same_op_type(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool same_op_type(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const; - virtual bool match_inputs(const std::shared_ptr &node, - const std::shared_ptr &ref, + virtual bool match_inputs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const; - bool match_outputs(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match_outputs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const; - bool same_attrs(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool same_attrs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const; - bool match_ports(const std::shared_ptr &node, - const std::shared_ptr &ref, + bool match_ports(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const; protected: diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/op_cloner.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/op_cloner.hpp index a7fb23567c7..651105864bf 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/op_cloner.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/op_cloner.hpp @@ -14,9 +14,9 @@ namespace SubgraphsDumper { struct ClonersMap { - using clone_fn = std::function(const std::shared_ptr &, + using clone_fn = std::function(const std::shared_ptr &, LayerTestsUtils::OPInfo &meta)>; - using cloners_map_type = std::map; + using cloners_map_type = std::map; static float constant_size_threshold_mb; static const cloners_map_type cloners; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp index 79901e33b62..35d9dcc0247 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/include/ops_cache.hpp @@ -17,15 +17,15 @@ namespace SubgraphsDumper { class OPCache { public: OPCache() : num_neighbours_to_cache(0), manager(MatchersManager()), - m_ops_cache(std::map, LayerTestsUtils::OPInfo>()) {} + m_ops_cache(std::map, LayerTestsUtils::OPInfo>()) {} static std::unique_ptr make_cache() { return std::unique_ptr(new OPCache()); } - void update_ops_cache(const std::shared_ptr &op, const std::string &source_model = {}); + void update_ops_cache(const std::shared_ptr &op, const std::string &source_model = {}); - void update_ops_cache(const std::shared_ptr &func, const bool extract_body = true, const std::string &source_model = {}); + void update_ops_cache(const std::shared_ptr &func, const bool extract_body = true, const std::string &source_model = {}); void serialize_cached_ops(const std::string &serialization_dir); @@ -36,7 +36,7 @@ public: float get_size_of_cached_ops(); protected: - std::map, LayerTestsUtils::OPInfo> m_ops_cache; + std::map, LayerTestsUtils::OPInfo> m_ops_cache; MatchersManager manager; size_t num_neighbours_to_cache = 0; enum SerializationStatus { @@ -44,7 +44,7 @@ protected: FAILED = 1, RETRY = 2, }; - SerializationStatus serialize_function(const std::pair, LayerTestsUtils::OPInfo> &op_info, + SerializationStatus serialize_function(const std::pair, LayerTestsUtils::OPInfo> &op_info, const std::string &serialization_dir); }; } // namespace SubgraphsDumper diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/base_matcher.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/base_matcher.cpp index acb09dc7f02..fc40d4dccba 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/base_matcher.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/base_matcher.cpp @@ -6,7 +6,7 @@ #include "common_test_utils/common_utils.hpp" -SubgraphsDumper::iMatcherConfig::Ptr SubgraphsDumper::Matcher::get_config(const std::shared_ptr &node) const { +SubgraphsDumper::iMatcherConfig::Ptr SubgraphsDumper::Matcher::get_config(const std::shared_ptr &node) const { for (const auto &cfg : default_configs) { if (cfg->op_in_config(node)) { return cfg; diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/convolutions.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/convolutions.cpp index d346bad4942..668c07ddd8a 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/convolutions.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/convolutions.cpp @@ -8,15 +8,15 @@ using namespace SubgraphsDumper; ConvolutionsMatcher::ConvolutionsMatcher() { default_configs = { std::make_shared>(std::vector{}, std::vector{0, 1}) + ov::op::v1::Convolution, + ov::op::v1::ConvolutionBackpropData, + ov::op::v1::GroupConvolution, + ov::op::v1::GroupConvolutionBackpropData>>(std::vector{}, std::vector{0, 1}) }; } -bool ConvolutionsMatcher::match(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool ConvolutionsMatcher::match(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { const auto &cfg = get_config(node); if (match_only_configured_ops() && cfg->is_fallback_config) { @@ -31,8 +31,8 @@ bool ConvolutionsMatcher::match(const std::shared_ptr &node, same_attrs(node, ref, op_info) && match_ports(node, ref, op_info); } -bool ConvolutionsMatcher::match_inputs(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool ConvolutionsMatcher::match_inputs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { if (node->get_input_size() != ref->get_input_size()) { return false; @@ -46,8 +46,8 @@ bool ConvolutionsMatcher::match_inputs(const std::shared_ptr &node if (!(rankIsEqual && elemTypeIsEqual && is_dynamic)) { return false; } - bool has_groups = std::dynamic_pointer_cast(node) != nullptr || - std::dynamic_pointer_cast(node); + bool has_groups = std::dynamic_pointer_cast(node) != nullptr || + std::dynamic_pointer_cast(node); size_t kernel_size_offset = has_groups ? 3 : 2; auto ref_weights_shape = ref->get_input_tensor(1).get_shape(); auto cur_weights_shape = node->get_input_tensor(1).get_shape(); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/matchers_manager.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/matchers_manager.cpp index 33ad300b469..9c2a930e837 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/matchers_manager.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/matchers_manager.cpp @@ -6,8 +6,8 @@ using namespace SubgraphsDumper; -bool MatchersManager::match_any(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool MatchersManager::match_any(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) { for (const auto &it : m_matchers) { if (it.second->match(node, ref, op_info)) return true; @@ -15,8 +15,8 @@ bool MatchersManager::match_any(const std::shared_ptr &node, return false; } -bool MatchersManager::match_all(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool MatchersManager::match_all(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) { const auto matches = this->run_matchers(node, ref, op_info); return std::all_of(matches.begin(), matches.end(), [](bool i) { return i; }); @@ -31,8 +31,8 @@ MatchersManager::MatchersManager(const std::string &cfg_path) { } } -std::vector MatchersManager::run_matchers(const std::shared_ptr &node, - const std::shared_ptr &ref, +std::vector MatchersManager::run_matchers(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) { std::vector matches; for (const auto &it : m_matchers) { diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp index dfb1b417734..06f779fe4c2 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/matchers/single_op.cpp @@ -10,10 +10,10 @@ using namespace SubgraphsDumper; template -bool compare_constants_data(const std::shared_ptr &op, - const std::shared_ptr &ref) { - size_t elements_count = ngraph::shape_size(op->get_shape()); - if (elements_count != ngraph::shape_size(ref->get_shape())) { +bool compare_constants_data(const std::shared_ptr &op, + const std::shared_ptr &ref) { + size_t elements_count = ov::shape_size(op->get_shape()); + if (elements_count != ov::shape_size(ref->get_shape())) { return false; } const auto &op_data = op->cast_vector(); @@ -29,37 +29,37 @@ bool compare_constants_data(const std::shared_ptr &op, } // TODO: Move to some utils? -bool compare_constants_data(const std::shared_ptr &op, - const std::shared_ptr &ref) { +bool compare_constants_data(const std::shared_ptr &op, + const std::shared_ptr &ref) { switch (op->get_element_type()) { - case ngraph::element::Type_t::boolean: + case ov::element::Type_t::boolean: return compare_constants_data(op, ref); - case ngraph::element::Type_t::bf16: - return compare_constants_data(op, ref); - case ngraph::element::Type_t::f16: - return compare_constants_data(op, ref); - case ngraph::element::Type_t::f32: + case ov::element::Type_t::bf16: + return compare_constants_data(op, ref); + case ov::element::Type_t::f16: + return compare_constants_data(op, ref); + case ov::element::Type_t::f32: return compare_constants_data(op, ref); - case ngraph::element::Type_t::f64: + case ov::element::Type_t::f64: return compare_constants_data(op, ref); - case ngraph::element::Type_t::i8: + case ov::element::Type_t::i8: return compare_constants_data(op, ref); - case ngraph::element::Type_t::i16: + case ov::element::Type_t::i16: return compare_constants_data(op, ref); - case ngraph::element::Type_t::i32: + case ov::element::Type_t::i32: return compare_constants_data(op, ref); - case ngraph::element::Type_t::i64: + case ov::element::Type_t::i64: return compare_constants_data(op, ref); // TODO cast_vector doesn't support u1 now -// case ngraph::element::Type_t::u1: +// case ov::element::Type_t::u1: // return compare_constants_data(op, ref); - case ngraph::element::Type_t::u8: + case ov::element::Type_t::u8: return compare_constants_data(op, ref); - case ngraph::element::Type_t::u16: + case ov::element::Type_t::u16: return compare_constants_data(op, ref); - case ngraph::element::Type_t::u32: + case ov::element::Type_t::u32: return compare_constants_data(op, ref); - case ngraph::element::Type_t::u64: + case ov::element::Type_t::u64: return compare_constants_data(op, ref); default: std::cout << "Can't compare constants" << op << " with " << ref << "\n" << "Unsupported data type"; @@ -67,15 +67,15 @@ bool compare_constants_data(const std::shared_ptr &op, } } -bool SingleOpMatcher::same_op_type(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool SingleOpMatcher::same_op_type(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { return node->get_type_info().name == ref->get_type_info().name && node->get_type_info().version == ref->get_type_info().version; } -bool SingleOpMatcher::match_inputs(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool SingleOpMatcher::match_inputs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { if (node->get_input_size() != ref->get_input_size()) { return false; @@ -96,8 +96,8 @@ bool SingleOpMatcher::match_inputs(const std::shared_ptr &node, } bool -SingleOpMatcher::match_outputs(const std::shared_ptr &node, - const std::shared_ptr &ref, +SingleOpMatcher::match_outputs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { if (node->get_output_size() != ref->get_output_size()) { return false; @@ -121,14 +121,14 @@ SingleOpMatcher::match_outputs(const std::shared_ptr &node, return true; } -bool SingleOpMatcher::same_attrs(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool SingleOpMatcher::same_attrs(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { return attributes::compare(node.get(), ref.get(), Comparator::CmpValues::ATTRIBUTES).valid; } -bool SingleOpMatcher::match_ports(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool SingleOpMatcher::match_ports(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { const auto &cfg = get_config(node); const std::vector &ignored_ports = cfg->ignored_ports; @@ -140,8 +140,8 @@ bool SingleOpMatcher::match_ports(const std::shared_ptr &node, const auto &cur_node_input = node->input_value(port_id); const auto &ref_node_input = ref->input_value(port_id); - const auto &cur_const_input = ngraph::get_constant_from_source(cur_node_input); - const auto &ref_const_input = ngraph::get_constant_from_source(ref_node_input); + const auto &cur_const_input = ov::get_constant_from_source(cur_node_input); + const auto &ref_const_input = ov::get_constant_from_source(ref_node_input); // Check that both OP an reference port inputs are constant and have same data if (cur_const_input && ref_const_input && @@ -155,8 +155,8 @@ bool SingleOpMatcher::match_ports(const std::shared_ptr &node, return true; } -bool SingleOpMatcher::match(const std::shared_ptr &node, - const std::shared_ptr &ref, +bool SingleOpMatcher::match(const std::shared_ptr &node, + const std::shared_ptr &ref, const LayerTestsUtils::OPInfo &op_info) const { for (const auto& input_node : node->inputs()) { if (input_node.get_partial_shape().is_dynamic()) { @@ -181,19 +181,19 @@ bool SingleOpMatcher::match(const std::shared_ptr &node, SingleOpMatcher::SingleOpMatcher() { default_configs = { std::make_shared>(std::vector{}, std::vector{0}), - std::make_shared>(std::vector{}, - std::vector{0, 1, 2, 3, 4}), + std::make_shared>(std::vector{}, + std::vector{0, 1, 2, 3, 4}), std::make_shared>(std::vector{}, std::vector{0, 1}), + ov::op::v0::MatMul, + ov::op::v1::Add, + ov::op::v1::Multiply, + ov::op::v1::Subtract, + ov::op::v1::Power>>(std::vector{}, std::vector{0, 1}), std::make_shared>(true) + ov::op::v1::Convolution, + ov::op::v1::ConvolutionBackpropData, + ov::op::v1::GroupConvolution, + ov::op::v1::GroupConvolutionBackpropData>>(true) }; } \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp index 2680b7f84d0..fbfd74abc1c 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/op_cloner.cpp @@ -11,7 +11,7 @@ namespace { template -void get_port_range(const std::shared_ptr &const_node, LayerTestsUtils::PortInfo &port_info) { +void get_port_range(const std::shared_ptr &const_node, LayerTestsUtils::PortInfo &port_info) { std::vector data = const_node->cast_vector(); if (!data.empty()) { auto min_max = std::minmax_element(data.begin(), data.end()); @@ -21,48 +21,48 @@ void get_port_range(const std::shared_ptr &const_node, Lay } -void get_port_range(const std::shared_ptr &constant_input, LayerTestsUtils::PortInfo &port_info) { +void get_port_range(const std::shared_ptr &constant_input, LayerTestsUtils::PortInfo &port_info) { switch (constant_input->get_element_type()) { - case ngraph::element::Type_t::boolean: + case ov::element::Type_t::boolean: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::bf16: - get_port_range(constant_input, port_info); + case ov::element::Type_t::bf16: + get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::f16: - get_port_range(constant_input, port_info); + case ov::element::Type_t::f16: + get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::f32: + case ov::element::Type_t::f32: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::f64: + case ov::element::Type_t::f64: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::i8: + case ov::element::Type_t::i8: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::i16: + case ov::element::Type_t::i16: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::i32: + case ov::element::Type_t::i32: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::i64: + case ov::element::Type_t::i64: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::u1: + case ov::element::Type_t::u1: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::u8: + case ov::element::Type_t::u8: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::u16: + case ov::element::Type_t::u16: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::u32: + case ov::element::Type_t::u32: get_port_range(constant_input, port_info); break; - case ngraph::element::Type_t::u64: + case ov::element::Type_t::u64: get_port_range(constant_input, port_info); break; default: @@ -70,36 +70,36 @@ void get_port_range(const std::shared_ptr &constant_input, } } -std::shared_ptr clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { - ngraph::OutputVector op_inputs; +std::shared_ptr clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { + ov::OutputVector op_inputs; bool static_inputs = true; for (size_t i = 0; i < node->get_input_size(); ++i) { const auto input = node->input(i).get_source_output(); static_inputs &= input.get_partial_shape().is_static(); auto port_info = LayerTestsUtils::PortInfo(); - const auto constant = ngraph::get_constant_from_source(input); + const auto constant = ov::get_constant_from_source(input); if (constant != nullptr) { get_port_range(constant, port_info); float weights_size = - static_cast(ngraph::shape_size(constant->get_shape()) * + static_cast(ov::shape_size(constant->get_shape()) * constant->get_element_type().size()) / (1024 * 1024); if (weights_size > ClonersMap::constant_size_threshold_mb) { std::cout << "Constant with size " << weights_size << " detected on port " << i << " of OP " << node << std::endl << "The constant will be replaced with parameter and initial data ranges meta info" << std::endl; - auto param = std::make_shared(constant->get_element_type(), - constant->get_shape()); + auto param = std::make_shared(constant->get_element_type(), + constant->get_shape()); op_inputs.push_back(param); } else { - const auto clone = std::make_shared(constant->get_element_type(), + const auto clone = std::make_shared(constant->get_element_type(), constant->get_shape(), constant->get_data_ptr()); op_inputs.push_back(clone); } } else { - auto param = std::make_shared(input.get_element_type(), + auto param = std::make_shared(input.get_element_type(), input.get_partial_shape()); op_inputs.push_back(param); } @@ -109,17 +109,17 @@ std::shared_ptr clone(const std::shared_ptr &node, L return op_clone; } -std::shared_ptr clone_weightable_node(const std::shared_ptr &node, +std::shared_ptr clone_weightable_node(const std::shared_ptr &node, const std::vector &weight_ports, LayerTestsUtils::OPInfo &meta) { - ngraph::OutputVector op_inputs; + ov::OutputVector op_inputs; for (size_t i = 0; i < node->get_input_size(); ++i) { const auto input = node->input(i).get_source_output(); - const auto constant_input = ngraph::get_constant_from_source(input); + const auto constant_input = ov::get_constant_from_source(input); auto port_info = LayerTestsUtils::PortInfo(); // Input is Parameter or dynamic data pass if (!constant_input) { - auto param = std::make_shared(input.get_element_type(), + auto param = std::make_shared(input.get_element_type(), input.get_partial_shape()); op_inputs.push_back(param); meta.ports_info[i] = port_info; @@ -129,18 +129,18 @@ std::shared_ptr clone_weightable_node(const std::shared_ptr(ngraph::shape_size(constant_input->get_shape()) * + static_cast(ov::shape_size(constant_input->get_shape()) * constant_input->get_element_type().size()) / (1024 * 1024); if (weights_size > ClonersMap::constant_size_threshold_mb) { std::cout << "Constant with size " << weights_size << " detected on port " << i << " of OP " << node << std::endl << "The constant will be replaced with parameter and initial data ranges meta info" << std::endl; - auto param = std::make_shared(constant_input->get_element_type(), - constant_input->get_shape()); + auto param = std::make_shared(constant_input->get_element_type(), + constant_input->get_shape()); op_inputs.push_back(param); } else { - const auto clone = std::make_shared(constant_input->get_element_type(), + const auto clone = std::make_shared(constant_input->get_element_type(), constant_input->get_shape(), constant_input->get_data_ptr()); op_inputs.push_back(clone); @@ -149,8 +149,8 @@ std::shared_ptr clone_weightable_node(const std::shared_ptr(constant_input->get_element_type(), - constant_input->get_shape()); + auto param = std::make_shared(constant_input->get_element_type(), + constant_input->get_shape()); port_info.convert_to_const = true; meta.ports_info[i] = port_info; op_inputs.push_back(param); @@ -160,52 +160,52 @@ std::shared_ptr clone_weightable_node(const std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {1}, meta); } -std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {1}, meta); } -std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {1}, meta); } -std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {1}, meta); } -std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {0, 1}, meta); } -std::shared_ptr clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {0, 1}, meta); } -std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {0, 1}, meta); } -std::shared_ptr -clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr +clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {0, 1}, meta); } -std::shared_ptr clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { +std::shared_ptr clone(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { return clone_weightable_node(node, {0, 1}, meta); } template -std::shared_ptr clone_node(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { - return clone(ngraph::as_type_ptr(node), meta); +std::shared_ptr clone_node(const std::shared_ptr &node, LayerTestsUtils::OPInfo &meta) { + return clone(ov::as_type_ptr(node), meta); } } // namespace diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp index 85770913eeb..af72f961e34 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/ops_cache.cpp @@ -13,7 +13,7 @@ using namespace SubgraphsDumper; -void OPCache::update_ops_cache(const std::shared_ptr &op, +void OPCache::update_ops_cache(const std::shared_ptr &op, const std::string &source_model) { const bool op_found = [&] { for (auto &&it : m_ops_cache) { @@ -28,7 +28,7 @@ void OPCache::update_ops_cache(const std::shared_ptr &op, const auto &clone_fn = SubgraphsDumper::ClonersMap::cloners.at(op->get_type_info()); LayerTestsUtils::OPInfo meta(source_model); try { - const std::shared_ptr op_clone = clone_fn(op, meta); + const std::shared_ptr op_clone = clone_fn(op, meta); op_clone->set_friendly_name(op_clone->get_friendly_name() + "_cached"); m_ops_cache.insert({op_clone, meta}); } catch (std::exception &e) { @@ -37,33 +37,33 @@ void OPCache::update_ops_cache(const std::shared_ptr &op, } } -void OPCache::update_ops_cache(const std::shared_ptr &func, const bool extract_body, const std::string &source_model) { +void OPCache::update_ops_cache(const std::shared_ptr &func, const bool extract_body, const std::string &source_model) { size_t cached_ops_count = m_ops_cache.size(); for (const auto &op : func->get_ordered_ops()) { - if (ngraph::is_type(op) || - ngraph::is_type(op) || - ngraph::is_type(op) || + if (ov::is_type(op) || + ov::is_type(op) || + ov::is_type(op) || // ReadValue and Assign have to be handled in pair // Will be handled as part of 48838 - ngraph::is_type(op) || - ngraph::is_type(op) + ov::is_type(op) || + ov::is_type(op) ) { continue; } if (extract_body) { - if (ngraph::is_type(op)) { - auto if_op = std::dynamic_pointer_cast(op); - std::vector> bodies; + if (ov::is_type(op)) { + auto if_op = std::dynamic_pointer_cast(op); + std::vector> bodies; for (size_t i = 0; i < if_op->get_internal_subgraphs_size(); i++) { auto if_body = if_op->get_function(i); update_ops_cache(if_body, extract_body, source_model); } - } else if (ngraph::is_type(op)) { - auto loop = std::dynamic_pointer_cast(op); + } else if (ov::is_type(op)) { + auto loop = std::dynamic_pointer_cast(op); auto loop_body = loop->get_function(); update_ops_cache(loop_body, extract_body, source_model); - } else if (ngraph::is_type(op)) { - auto ti = std::dynamic_pointer_cast(op); + } else if (ov::is_type(op)) { + auto ti = std::dynamic_pointer_cast(op); auto ti_body = ti->get_body(); update_ops_cache(ti_body, extract_body, source_model); } @@ -123,10 +123,10 @@ float OPCache::get_size_of_cached_ops() { float size = 0; for (const auto &op : m_ops_cache) { for (size_t i = 0; i < op.first->get_input_size(); ++i) { - const auto constant = std::dynamic_pointer_cast( + const auto constant = std::dynamic_pointer_cast( op.first->get_input_node_shared_ptr(i)); if (constant != nullptr) { - size += static_cast(ngraph::shape_size(constant->get_shape()) * + size += static_cast(ov::shape_size(constant->get_shape()) * constant->get_element_type().size()) / (1024 * 1024); } } @@ -135,7 +135,7 @@ float OPCache::get_size_of_cached_ops() { } OPCache::SerializationStatus -OPCache::serialize_function(const std::pair, LayerTestsUtils::OPInfo> &op, +OPCache::serialize_function(const std::pair, LayerTestsUtils::OPInfo> &op, const std::string &serialization_dir) { try { if (op.first->get_friendly_name() == "Relu_8793_cached") { @@ -144,19 +144,19 @@ OPCache::serialize_function(const std::pair, Layer std::cout << "Serializing function wrapping op " << op.first << std::endl; std::cout << "Taken from model: " << op.second.source_model << std::endl; - ngraph::ParameterVector params; + ov::ParameterVector params; for (size_t i = 0; i < op.first->get_input_size(); ++i) { - if (ngraph::op::is_parameter(op.first->get_input_node_ptr(i))) { - auto param = std::dynamic_pointer_cast( + if (ov::op::util::is_parameter(op.first->get_input_node_ptr(i))) { + auto param = std::dynamic_pointer_cast( op.first->get_input_node_shared_ptr(i)); params.push_back(param); } } - ngraph::ResultVector results; + ov::ResultVector results; for (auto &out : op.first->outputs()) { - results.push_back(std::make_shared(out)); + results.push_back(std::make_shared(out)); } - auto function = std::make_shared(results, params); + auto function = std::make_shared(results, params); // TODO: How to define element type for multi-output ops auto op_el_type = op.first->get_output_element_type(0).get_type_name(); diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp index 141d18c9952..f8b1cafb907 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/convolutions_matcher.cpp @@ -7,10 +7,6 @@ #include "ngraph/ops.hpp" #include "functional_test_utils/include/functional_test_utils/layer_test_utils/op_info.hpp" -using namespace ngraph::op; -using namespace ngraph; -using ngraph::element::Type_t; - class ConvolutionMatcherTest : public ::testing::Test { protected: void SetUp() override { @@ -23,58 +19,58 @@ protected: }; -// Check that two convolutions with different input shapes but same kernel size are match each other +// Check that two convolutions with different input ov::Shapes but same kernel size are match each other TEST_F(ConvolutionMatcherTest, ConvsSameKernelSize) { - const auto param = std::make_shared(Type_t::f32, Shape({1, 3, 10, 10})); - const auto weights = std::make_shared(Type_t::f32, Shape({10, 3, 3, 3}), 1); - const auto op1 = std::make_shared(param, weights, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 3, 10, 10})); + const auto weights = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 3, 3, 3}), 1); + const auto op1 = std::make_shared(param, weights, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); - const auto param2 = std::make_shared(Type_t::f32, Shape({1, 5, 20, 20})); - const auto weights2 = std::make_shared(Type_t::f32, Shape({10, 5, 3, 3}), 1); - const auto op2 = std::make_shared(param2, weights2, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 5, 20, 20})); + const auto weights2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 5, 3, 3}), 1); + const auto op2 = std::make_shared(param2, weights2, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); ASSERT_TRUE(matcher.match(op1, op2, op_info)); } -// Check that two convolutions with different input shapes but same kernel size are match each other +// Check that two convolutions with different input ov::Shapes but same kernel size are match each other TEST_F(ConvolutionMatcherTest, ConvsDifferentKernelSize) { - const auto param = std::make_shared(Type_t::f32, Shape({1, 3, 10, 10})); - const auto weights = std::make_shared(Type_t::f32, Shape({10, 3, 3, 5}), 1); - const auto op1 = std::make_shared(param, weights, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 3, 10, 10})); + const auto weights = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 3, 3, 5}), 1); + const auto op1 = std::make_shared(param, weights, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); - const auto param2 = std::make_shared(Type_t::f32, Shape({1, 5, 20, 20})); - const auto weights2 = std::make_shared(Type_t::f32, Shape({10, 5, 3, 3}), 1); - const auto op2 = std::make_shared(param2, weights2, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 5, 20, 20})); + const auto weights2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 5, 3, 3}), 1); + const auto op2 = std::make_shared(param2, weights2, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); ASSERT_FALSE(matcher.match(op1, op2, op_info)); } -// Check that two group convolutions with different input shapes but same kernel size are match each other +// Check that two group convolutions with different input ov::Shapes but same kernel size are match each other TEST_F(ConvolutionMatcherTest, GroupConvsSameKernelSize) { - const auto param = std::make_shared(Type_t::f32, Shape({1, 4, 10, 10})); - const auto weights = std::make_shared(Type_t::f32, Shape({2, 10, 2, 3, 3}), 1); - const auto op1 = std::make_shared(param, weights, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 4, 10, 10})); + const auto weights = std::make_shared(ov::element::Type_t::f32, ov::Shape({2, 10, 2, 3, 3}), 1); + const auto op1 = std::make_shared(param, weights, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); - const auto param2 = std::make_shared(Type_t::f32, Shape({1, 6, 20, 20})); - const auto weights2 = std::make_shared(Type_t::f32, Shape({2, 10, 3, 3, 3}), 1); - const auto op2 = std::make_shared(param2, weights2, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 6, 20, 20})); + const auto weights2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({2, 10, 3, 3, 3}), 1); + const auto op2 = std::make_shared(param2, weights2, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); ASSERT_TRUE(matcher.match(op1, op2, op_info)); } -// Check that two group convolutions with different input shapes but same kernel size are match each other +// Check that two group convolutions with different input ov::Shapes but same kernel size are match each other TEST_F(ConvolutionMatcherTest, GroupConvsDifferentKernelSize) { - const auto param = std::make_shared(Type_t::f32, Shape({1, 4, 10, 10})); - const auto weights = std::make_shared(Type_t::f32, Shape({2, 10, 2, 3, 5}), 1); - const auto op1 = std::make_shared(param, weights, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 4, 10, 10})); + const auto weights = std::make_shared(ov::element::Type_t::f32, ov::Shape({2, 10, 2, 3, 5}), 1); + const auto op1 = std::make_shared(param, weights, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); - const auto param2 = std::make_shared(Type_t::f32, Shape({1, 6, 20, 20})); - const auto weights2 = std::make_shared(Type_t::f32, Shape({2, 10, 3, 3, 3}), 1); - const auto op2 = std::make_shared(param2, weights2, Strides(0, 0), CoordinateDiff(0, 0), - CoordinateDiff(0, 0), Strides(0, 0)); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({1, 6, 20, 20})); + const auto weights2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({2, 10, 3, 3, 3}), 1); + const auto op2 = std::make_shared(param2, weights2, ov::Strides(0, 0), ov::CoordinateDiff(0, 0), + ov::CoordinateDiff(0, 0), ov::Strides(0, 0)); ASSERT_FALSE(matcher.match(op1, op2, op_info)); } \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp index 7907cc1b41e..c40a030309d 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/generic_single_op.cpp @@ -7,10 +7,6 @@ #include "ngraph/ops.hpp" #include "functional_test_utils/include/functional_test_utils/layer_test_utils/op_info.hpp" -using namespace ngraph::op; -using namespace ngraph; -using ngraph::element::Type_t; - class SingleOpMatcherTest : public ::testing::Test { protected: void SetUp() override { @@ -25,68 +21,68 @@ protected: // Check that different values of constant nodes on port 0 (default value) are ignored in match() TEST_F(SingleOpMatcherTest, AllPortsAreConsts_IgnoreConstPortVals) { - const auto const1 = std::make_shared(Type_t::f32, Shape({5, 5}), 1); - const auto shape_pattern = std::make_shared(Type_t::i64, Shape({2}), std::vector{1, 25}); - const auto op1 = std::make_shared(const1, shape_pattern, false); + const auto const1 = std::make_shared(ov::element::Type_t::f32, ov::Shape({5, 5}), 1); + const auto shape_pattern = std::make_shared(ov::element::Type_t::i64, ov::Shape({2}), std::vector{1, 25}); + const auto op1 = std::make_shared(const1, shape_pattern, false); - const auto const2 = std::make_shared(Type_t::f32, Shape({5, 5}), 2); - const auto op2 = std::make_shared(const2, shape_pattern, false); + const auto const2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({5, 5}), 2); + const auto op2 = std::make_shared(const2, shape_pattern, false); ASSERT_TRUE(matcher.match(op1, op2, op_info)); } // Check match of equal nodes TEST_F(SingleOpMatcherTest, AllPortsAreParams_NodesEqual) { - const auto param1 = std::make_shared(element::Type_t::f32, Shape({10, 10})); - const auto param2 = std::make_shared(element::Type_t::f32, Shape({10, 20})); - const auto op1 = std::make_shared<::Concat>(OutputVector({param1, param2}), 1); - const auto op2 = std::make_shared<::Concat>(OutputVector({param1, param2}), 1); + const auto param1 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10})); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 20})); + const auto op1 = std::make_shared(ov::OutputVector({param1, param2}), 1); + const auto op2 = std::make_shared(ov::OutputVector({param1, param2}), 1); ASSERT_TRUE(matcher.match(op1, op2, op_info)); } // Check nodes doesn't match - different input ranks TEST_F(SingleOpMatcherTest, AllPortsAreParams_RanksNotEqual) { - const auto param1 = std::make_shared(element::Type_t::f32, Shape({10, 10})); - const auto param2 = std::make_shared(element::Type_t::f32, Shape({10, 20})); - const auto op1 = std::make_shared<::Concat>(OutputVector({param1, param2}), 1); + const auto param1 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10})); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 20})); + const auto op1 = std::make_shared(ov::OutputVector({param1, param2}), 1); - const auto param3 = std::make_shared(element::Type_t::f32, Shape({10, 40, 10})); - const auto param4 = std::make_shared(element::Type_t::f32, Shape({10, 40, 10})); - const auto op2 = std::make_shared<::Concat>(OutputVector({param3, param4}), 1); + const auto param3 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 40, 10})); + const auto param4 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 40, 10})); + const auto op2 = std::make_shared(ov::OutputVector({param3, param4}), 1); ASSERT_FALSE(matcher.match(op1, op2, op_info)); } // Check nodes doesn't match - different input element types TEST_F(SingleOpMatcherTest, AllPortsAreParams_TypesNotEqual) { - const auto param1 = std::make_shared(element::Type_t::f32, Shape({10, 10})); - const auto param2 = std::make_shared(element::Type_t::f32, Shape({10, 20})); - const auto op1 = std::make_shared<::Concat>(OutputVector({param1, param2}), 1); + const auto param1 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10})); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 20})); + const auto op1 = std::make_shared(ov::OutputVector({param1, param2}), 1); - const auto param3 = std::make_shared(element::Type_t::f16, Shape({10, 10})); - const auto param4 = std::make_shared(element::Type_t::f16, Shape({10, 20})); - const auto op2 = std::make_shared<::Concat>(OutputVector({param3, param4}), 1); + const auto param3 = std::make_shared(ov::element::Type_t::f16, ov::Shape({10, 10})); + const auto param4 = std::make_shared(ov::element::Type_t::f16, ov::Shape({10, 20})); + const auto op2 = std::make_shared(ov::OutputVector({param3, param4}), 1); ASSERT_FALSE(matcher.match(op1, op2, op_info)); } // Check nodes doesn't match - different input element types TEST_F(SingleOpMatcherTest, AllPortsAreParams_AttrsNotEqual) { - const auto param1 = std::make_shared(element::Type_t::f32, Shape({10, 10, 10})); - const auto param2 = std::make_shared(element::Type_t::f32, Shape({10, 10, 10})); - const auto op1 = std::make_shared<::Concat>(OutputVector({param1, param2}), 1); + const auto param1 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10, 10})); + const auto param2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10, 10})); + const auto op1 = std::make_shared(ov::OutputVector({param1, param2}), 1); - const auto param3 = std::make_shared(element::Type_t::f32, Shape({10, 10, 10})); - const auto param4 = std::make_shared(element::Type_t::f32, Shape({10, 10, 10})); - const auto op2 = std::make_shared<::Concat>(OutputVector({param3, param4}), 2); + const auto param3 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10, 10})); + const auto param4 = std::make_shared(ov::element::Type_t::f32, ov::Shape({10, 10, 10})); + const auto op2 = std::make_shared(ov::OutputVector({param3, param4}), 2); ASSERT_FALSE(matcher.match(op1, op2, op_info)); } // Check nodes Add OPs match with different constants on ports TEST_F(SingleOpMatcherTest, ChecAddOpConfiguration) { - const auto const1 = std::make_shared(Type_t::f32, Shape({5, 5}), 1); - const auto const2 = std::make_shared(Type_t::f32, Shape({5, 5}), 2); - const auto op1 = std::make_shared(const1, const2); + const auto const1 = std::make_shared(ov::element::Type_t::f32, ov::Shape({5, 5}), 1); + const auto const2 = std::make_shared(ov::element::Type_t::f32, ov::Shape({5, 5}), 2); + const auto op1 = std::make_shared(const1, const2); - const auto const3 = std::make_shared(Type_t::f32, Shape({5, 5}), 3); - const auto const4 = std::make_shared(Type_t::f32, Shape({5, 5}), 4); - const auto op2 = std::make_shared(const1, const2); + const auto const3 = std::make_shared(ov::element::Type_t::f32, ov::Shape({5, 5}), 3); + const auto const4 = std::make_shared(ov::element::Type_t::f32, ov::Shape({5, 5}), 4); + const auto op2 = std::make_shared(const1, const2); ASSERT_TRUE(matcher.match(op1, op2, op_info)); } \ No newline at end of file diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/matchers_config.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/matchers_config.cpp index 322a99fbec5..029c746e205 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/matchers_config.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/matchers/matchers_config.cpp @@ -6,15 +6,15 @@ #include "matchers/base_matcher.hpp" #include "ngraph/ops.hpp" -using namespace ngraph::op; +using namespace ov::op; using namespace ngraph; -using ngraph::element::Type_t; +using ov::element::Type_t; class MatcherConfigTest : public ::testing::Test { protected: void SetUp() override { - const auto const1 = std::make_shared(Type_t::f32, Shape({5, 5}), 1); - const auto const2 = std::make_shared(Type_t::f32, Shape({5, 5}), 2); + const auto const1 = std::make_shared(Type_t::f32, Shape({5, 5}), 1); + const auto const2 = std::make_shared(Type_t::f32, Shape({5, 5}), 2); node = std::make_shared(const1, const2); } From b7ead46943049d6b44f235eaca2a5f509fbcdb71 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 22 Feb 2022 13:02:05 +0300 Subject: [PATCH 057/310] [IE TESTS] Functional tests Review. Part 2 (#10476) * [IE TESTS] Functional tests Review. Part 2 * tmp * revert set_blob changes --- .../{hetero => behavior/plugin}/synthetic.cpp | 2 +- .../set_io_blob_precision.cpp} | 3 +- .../infer_request}/dynamic_batch.cpp | 2 +- .../plugin}/auto_batching_tests.cpp | 2 +- .../plugin/hetero_synthetic.cpp} | 2 +- .../infer_request}/dynamic_batch.cpp | 2 +- .../plugin}/auto_batching_tests.cpp | 2 +- .../set_batch}/detect_network_batch_test.cpp | 18 ++++++- .../set_batch}/detect_network_batch_test.hpp | 0 .../plugin}/stress_tests.cpp | 2 +- .../blob_tests/detect_network_batch_test.cpp | 21 -------- .../infer_request}/dynamic_batch.hpp | 0 .../infer_request/set_io_blob_precision.hpp} | 0 .../plugin}/auto_batching_tests.hpp | 0 .../plugin/hetero_synthetic.hpp} | 0 .../plugin}/stress_tests.hpp | 0 .../infer_request}/dynamic_batch.cpp | 2 +- .../infer_request/set_io_blob_precision.cpp} | 52 +++++++++---------- .../plugin/hetero_synthetic.cpp} | 2 +- .../plugin}/stress_tests.cpp | 2 +- 20 files changed, 54 insertions(+), 60 deletions(-) rename docs/template_plugin/tests/functional/shared_tests_instances/{hetero => behavior/plugin}/synthetic.cpp (99%) rename src/tests/functional/plugin/cpu/{shared_tests_instances/blob_tests/set_blob.cpp => blob/set_io_blob_precision.cpp} (91%) rename src/tests/functional/plugin/cpu/shared_tests_instances/{blob_tests => behavior/infer_request}/dynamic_batch.cpp (95%) rename src/tests/functional/plugin/cpu/shared_tests_instances/{auto_batching => behavior/plugin}/auto_batching_tests.cpp (95%) rename src/tests/functional/plugin/cpu/shared_tests_instances/{hetero/synthetic.cpp => behavior/plugin/hetero_synthetic.cpp} (96%) rename src/tests/functional/plugin/gpu/shared_tests_instances/{blob_tests => behavior/infer_request}/dynamic_batch.cpp (95%) rename src/tests/functional/plugin/gpu/shared_tests_instances/{auto_batching => behavior/plugin}/auto_batching_tests.cpp (95%) rename src/tests/functional/plugin/{shared/src/blob_tests => myriad/set_batch}/detect_network_batch_test.cpp (87%) rename src/tests/functional/plugin/{shared/include/blob_tests => myriad/set_batch}/detect_network_batch_test.hpp (100%) rename src/tests/functional/plugin/myriad/shared_tests_instances/{stress_tests => behavior/plugin}/stress_tests.cpp (92%) delete mode 100644 src/tests/functional/plugin/myriad/shared_tests_instances/blob_tests/detect_network_batch_test.cpp rename src/tests/functional/plugin/shared/include/{blob_tests => behavior/infer_request}/dynamic_batch.hpp (100%) rename src/tests/functional/plugin/shared/include/{blob_tests/set_blob.hpp => behavior/infer_request/set_io_blob_precision.hpp} (100%) rename src/tests/functional/plugin/shared/include/{auto_batching => behavior/plugin}/auto_batching_tests.hpp (100%) rename src/tests/functional/plugin/shared/include/{hetero/synthetic.hpp => behavior/plugin/hetero_synthetic.hpp} (100%) rename src/tests/functional/plugin/shared/include/{stress_tests => behavior/plugin}/stress_tests.hpp (100%) rename src/tests/functional/plugin/shared/src/{blob_tests => behavior/infer_request}/dynamic_batch.cpp (99%) rename src/tests/functional/plugin/shared/src/{blob_tests/set_blob.cpp => behavior/infer_request/set_io_blob_precision.cpp} (80%) rename src/tests/functional/plugin/shared/src/{hetero/synthetic.cpp => behavior/plugin/hetero_synthetic.cpp} (99%) rename src/tests/functional/plugin/shared/src/{stress_tests => behavior/plugin}/stress_tests.cpp (97%) diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/hetero/synthetic.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/synthetic.cpp similarity index 99% rename from docs/template_plugin/tests/functional/shared_tests_instances/hetero/synthetic.cpp rename to docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/synthetic.cpp index dfadd8f4a84..e91afe05441 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/hetero/synthetic.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/plugin/synthetic.cpp @@ -4,7 +4,7 @@ #include -#include "hetero/synthetic.hpp" +#include "behavior/plugin/hetero_synthetic.hpp" #include "ngraph_functions/builders.hpp" #include "ngraph_functions/subgraph_builders.hpp" diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp b/src/tests/functional/plugin/cpu/blob/set_io_blob_precision.cpp similarity index 91% rename from src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp rename to src/tests/functional/plugin/cpu/blob/set_io_blob_precision.cpp index d7e43594ab2..8fa31a57f34 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp +++ b/src/tests/functional/plugin/cpu/blob/set_io_blob_precision.cpp @@ -2,8 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "blob_tests/set_blob.hpp" -#include "common_test_utils/test_constants.hpp" +#include "behavior/infer_request/set_io_blob_precision.hpp" using namespace BehaviorTestsDefinitions; using namespace InferenceEngine; diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/dynamic_batch.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/dynamic_batch.cpp similarity index 95% rename from src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/dynamic_batch.cpp rename to src/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/dynamic_batch.cpp index d51c6da9044..0b8582578b8 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/dynamic_batch.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/dynamic_batch.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include #include "common_test_utils/test_constants.hpp" namespace ConfigurationTestsDefinitions { diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/auto_batching/auto_batching_tests.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp similarity index 95% rename from src/tests/functional/plugin/cpu/shared_tests_instances/auto_batching/auto_batching_tests.cpp rename to src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp index 37ff55e3c73..b1f1432a14e 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/auto_batching/auto_batching_tests.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include +#include const std::vector get_vs_set{ true, false }; const std::vector num_streams{ 1, 2 }; diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/hetero/synthetic.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/hetero_synthetic.cpp similarity index 96% rename from src/tests/functional/plugin/cpu/shared_tests_instances/hetero/synthetic.cpp rename to src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/hetero_synthetic.cpp index e3bbe500f9a..402b986265e 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/hetero/synthetic.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/hetero_synthetic.cpp @@ -4,7 +4,7 @@ #include -#include "hetero/synthetic.hpp" +#include "behavior/plugin/hetero_synthetic.hpp" #include "ngraph_functions/builders.hpp" #include "ngraph_functions/subgraph_builders.hpp" diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/blob_tests/dynamic_batch.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/dynamic_batch.cpp similarity index 95% rename from src/tests/functional/plugin/gpu/shared_tests_instances/blob_tests/dynamic_batch.cpp rename to src/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/dynamic_batch.cpp index 83c06de48b3..409511ea9fa 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/blob_tests/dynamic_batch.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/dynamic_batch.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include #include "common_test_utils/test_constants.hpp" namespace ConfigurationTestsDefinitions { diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/auto_batching/auto_batching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp similarity index 95% rename from src/tests/functional/plugin/gpu/shared_tests_instances/auto_batching/auto_batching_tests.cpp rename to src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp index bb1460fcdb7..babdb0416ed 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/auto_batching/auto_batching_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include "auto_batching/auto_batching_tests.hpp" +#include "behavior/plugin/auto_batching_tests.hpp" #include "behavior/plugin/configuration_tests.hpp" diff --git a/src/tests/functional/plugin/shared/src/blob_tests/detect_network_batch_test.cpp b/src/tests/functional/plugin/myriad/set_batch/detect_network_batch_test.cpp similarity index 87% rename from src/tests/functional/plugin/shared/src/blob_tests/detect_network_batch_test.cpp rename to src/tests/functional/plugin/myriad/set_batch/detect_network_batch_test.cpp index 0ba6f0f166a..3ccee366dad 100644 --- a/src/tests/functional/plugin/shared/src/blob_tests/detect_network_batch_test.cpp +++ b/src/tests/functional/plugin/myriad/set_batch/detect_network_batch_test.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "blob_tests/detect_network_batch_test.hpp" +#include "detect_network_batch_test.hpp" #include "ngraph_functions/subgraph_builders.hpp" #include "ngraph_functions/builders.hpp" @@ -78,3 +78,19 @@ TEST_P(DetectNetworkBatch, InferWithMultipleInputs_SameDims) { }; } // namespace LayerTestsDefinitions + +using namespace LayerTestsDefinitions; + +const std::vector batchSizes = { + 2, + 4, + 8, +}; + +namespace { +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, DetectNetworkBatch, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), + ::testing::ValuesIn(batchSizes)), + DetectNetworkBatch::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/shared/include/blob_tests/detect_network_batch_test.hpp b/src/tests/functional/plugin/myriad/set_batch/detect_network_batch_test.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/blob_tests/detect_network_batch_test.hpp rename to src/tests/functional/plugin/myriad/set_batch/detect_network_batch_test.hpp diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/stress_tests/stress_tests.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/stress_tests.cpp similarity index 92% rename from src/tests/functional/plugin/myriad/shared_tests_instances/stress_tests/stress_tests.cpp rename to src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/stress_tests.cpp index 2dde4184585..c45f6a2f9e7 100644 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/stress_tests/stress_tests.cpp +++ b/src/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/stress_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "stress_tests/stress_tests.hpp" +#include "behavior/plugin/stress_tests.hpp" using namespace LayerTestsDefinitions; diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/blob_tests/detect_network_batch_test.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/blob_tests/detect_network_batch_test.cpp deleted file mode 100644 index c66d9ef3657..00000000000 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/blob_tests/detect_network_batch_test.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "blob_tests/detect_network_batch_test.hpp" - -using namespace LayerTestsDefinitions; - -const std::vector batchSizes = { - 2, - 4, - 8, -}; - -namespace { - INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, DetectNetworkBatch, - ::testing::Combine( - ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), - ::testing::ValuesIn(batchSizes)), - DetectNetworkBatch::getTestCaseName); -} // namespace diff --git a/src/tests/functional/plugin/shared/include/blob_tests/dynamic_batch.hpp b/src/tests/functional/plugin/shared/include/behavior/infer_request/dynamic_batch.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/blob_tests/dynamic_batch.hpp rename to src/tests/functional/plugin/shared/include/behavior/infer_request/dynamic_batch.hpp diff --git a/src/tests/functional/plugin/shared/include/blob_tests/set_blob.hpp b/src/tests/functional/plugin/shared/include/behavior/infer_request/set_io_blob_precision.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/blob_tests/set_blob.hpp rename to src/tests/functional/plugin/shared/include/behavior/infer_request/set_io_blob_precision.hpp diff --git a/src/tests/functional/plugin/shared/include/auto_batching/auto_batching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/auto_batching/auto_batching_tests.hpp rename to src/tests/functional/plugin/shared/include/behavior/plugin/auto_batching_tests.hpp diff --git a/src/tests/functional/plugin/shared/include/hetero/synthetic.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/hetero_synthetic.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/hetero/synthetic.hpp rename to src/tests/functional/plugin/shared/include/behavior/plugin/hetero_synthetic.hpp diff --git a/src/tests/functional/plugin/shared/include/stress_tests/stress_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/stress_tests.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/stress_tests/stress_tests.hpp rename to src/tests/functional/plugin/shared/include/behavior/plugin/stress_tests.hpp diff --git a/src/tests/functional/plugin/shared/src/blob_tests/dynamic_batch.cpp b/src/tests/functional/plugin/shared/src/behavior/infer_request/dynamic_batch.cpp similarity index 99% rename from src/tests/functional/plugin/shared/src/blob_tests/dynamic_batch.cpp rename to src/tests/functional/plugin/shared/src/behavior/infer_request/dynamic_batch.cpp index e553e2bb703..67ec9088099 100644 --- a/src/tests/functional/plugin/shared/src/blob_tests/dynamic_batch.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/infer_request/dynamic_batch.cpp @@ -12,7 +12,7 @@ #include "ie_transformations.hpp" #include "common_test_utils/common_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" -#include "blob_tests/dynamic_batch.hpp" +#include "behavior/infer_request/dynamic_batch.hpp" #include "ngraph_functions/subgraph_builders.hpp" diff --git a/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp b/src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp similarity index 80% rename from src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp rename to src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp index 5fe7a17c4d7..717c5979336 100644 --- a/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/infer_request/set_io_blob_precision.cpp @@ -2,8 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "blob_tests/set_blob.hpp" -#include +#include "behavior/infer_request/set_io_blob_precision.hpp" +#include "ngraph_functions/builders.hpp" using namespace InferenceEngine; @@ -11,17 +11,17 @@ namespace BehaviorTestsDefinitions { std::ostream& operator<<(std::ostream & os, setType type) { switch (type) { - case setType::INPUT: - os << "INPUT"; - break; - case setType::OUTPUT: - os << "OUTPUT"; - break; - case setType::BOTH: - os << "BOTH"; - break; - default: - IE_THROW() << "Not supported type for SetBlob"; + case setType::INPUT: + os << "INPUT"; + break; + case setType::OUTPUT: + os << "OUTPUT"; + break; + case setType::BOTH: + os << "BOTH"; + break; + default: + IE_THROW() << "Not supported type for SetBlob"; } return os; } @@ -43,19 +43,19 @@ std::string SetBlobTest::getTestCaseName(testing::TestParamInfo o inline void fillBlob(Blob::Ptr &blob) { switch (blob->getTensorDesc().getPrecision()) { #define CASE(X) case X: CommonTestUtils::fill_data_random(blob); break; - CASE(Precision::U8) - CASE(Precision::I8) - CASE(Precision::U16) - CASE(Precision::I16) - CASE(Precision::U32) - CASE(Precision::I32) - CASE(Precision::U64) - CASE(Precision::I64) - CASE(Precision::BF16) - CASE(Precision::FP16) - CASE(Precision::FP32) - CASE(Precision::FP64) - CASE(Precision::BOOL) + CASE(Precision::U8) + CASE(Precision::I8) + CASE(Precision::U16) + CASE(Precision::I16) + CASE(Precision::U32) + CASE(Precision::I32) + CASE(Precision::U64) + CASE(Precision::I64) + CASE(Precision::BF16) + CASE(Precision::FP16) + CASE(Precision::FP32) + CASE(Precision::FP64) + CASE(Precision::BOOL) #undef CASE default: IE_THROW() << "Can't fill blob with precision: " << blob->getTensorDesc().getPrecision(); diff --git a/src/tests/functional/plugin/shared/src/hetero/synthetic.cpp b/src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp similarity index 99% rename from src/tests/functional/plugin/shared/src/hetero/synthetic.cpp rename to src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp index 6165aef7cf0..040e22bf53d 100644 --- a/src/tests/functional/plugin/shared/src/hetero/synthetic.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/plugin/hetero_synthetic.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "hetero/synthetic.hpp" +#include "behavior/plugin/hetero_synthetic.hpp" #include #include #include "ngraph_functions/builders.hpp" diff --git a/src/tests/functional/plugin/shared/src/stress_tests/stress_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/plugin/stress_tests.cpp similarity index 97% rename from src/tests/functional/plugin/shared/src/stress_tests/stress_tests.cpp rename to src/tests/functional/plugin/shared/src/behavior/plugin/stress_tests.cpp index 8119c9b55c6..11b8317e7f9 100644 --- a/src/tests/functional/plugin/shared/src/stress_tests/stress_tests.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/plugin/stress_tests.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "stress_tests/stress_tests.hpp" +#include "behavior/plugin/stress_tests.hpp" #include "ngraph_functions/subgraph_builders.hpp" namespace LayerTestsDefinitions { From a3887f332847113d79e020cd095170e1f27cc165 Mon Sep 17 00:00:00 2001 From: Alexey Varyzgin Date: Tue, 22 Feb 2022 02:05:19 -0800 Subject: [PATCH 058/310] [CPU] Transpose node optimized with Reorder (#10551) --- src/plugins/intel_cpu/src/nodes/transpose.cpp | 81 ++++++++++++++++++- src/plugins/intel_cpu/src/nodes/transpose.h | 2 + 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/transpose.cpp b/src/plugins/intel_cpu/src/nodes/transpose.cpp index 996a49daf4d..2c3756105f6 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.cpp +++ b/src/plugins/intel_cpu/src/nodes/transpose.cpp @@ -14,6 +14,32 @@ using namespace mkldnn; using namespace ov::intel_cpu; using namespace InferenceEngine; +namespace { +struct TransposeAsReorderKey { + mkldnn::memory::desc src; + mkldnn::memory::desc dest; + size_t hash() const; + bool operator==(const TransposeAsReorderKey& rhs) const; +}; + +size_t TransposeAsReorderKey::hash() const { + using namespace dnnl::impl; + using namespace dnnl::impl::primitive_hashing; + + size_t seed = 0; + seed = hash_combine(seed, get_md_hash(src.data)); + seed = hash_combine(seed, get_md_hash(dest.data)); + + return seed; +} + +bool TransposeAsReorderKey::operator==(const TransposeAsReorderKey& rhs) const { + bool retVal = true; + retVal = src == rhs.src && dest == rhs.dest; + return retVal; +} +} // namespace + bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { if (!one_of(op->get_type_info(), @@ -122,6 +148,53 @@ void MKLDNNTransposeNode::prepareParams() { params.src_block_dims = srcDesc->getBlockDims(); auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); params.dst_block_dims = dstDesc->getBlockDims(); + + if (performAsReorder) { + mkldnn::primitive_attr attr; + const auto engine = getEngine(); + auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto& srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); + MKLDNNMemoryPtr src_blocked = std::make_shared(engine); + MKLDNNMemoryPtr dst_blocked = std::make_shared(engine); + + dst_blocked->Create( + MKLDNNExtensionUtils::makeDescriptor(dstMemPtr->GetDescWithType()->getDnnlDesc()), + dstMemPtr->GetData(), false); + + const auto newDims = dst_blocked->getStaticDims(); + auto newDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(newDims), + dst_blocked->GetDataType(), + memory::format_tag::acdb); + src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(newDesc), srcMemPtr->GetData(), false); + + impl_desc_type impl_type = getSelectedPrimitiveDescriptor()->getImplementationType(); + TransposeAsReorderKey key = {src_blocked->GetPrimitive().get_desc(), dst_blocked->GetPrimitive().get_desc()}; + auto builder = [&engine, &impl_type](const TransposeAsReorderKey& key) -> std::shared_ptr { + mkldnn::primitive_attr attr; + reorder::primitive_desc pd = mkldnn::reorder::primitive_desc(engine, key.src, engine, key.dest, attr, true); + + if (!pd) + return nullptr; + auto info = pd.impl_info_str(); + impl_type = parse_impl_name(info); + return std::make_shared(pd); + }; + + auto cache = getRuntimeCache(); + auto result = cache->getOrCreate(key, builder); + + if (!result.first) { + IE_THROW() << "Reorder primitive descriptor was not found for Transpose node " << getName() << "."; + } + + prim = result.first; + + supportedPrimitiveDescriptors[0].setImplementationType(impl_type); + primArgs = {{DNNL_ARG_SRC, getParentEdgesAtPort(INPUT_DATA_IDX)[0]->getMemoryPtr()->GetPrimitive()}, + {DNNL_ARG_DST, getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive()}}; + return; + } + if (!isInputOrderConst) { auto orderPtr = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto orderLen = getParentEdgeAt(0)->getMemoryPtr()->GetSize(); @@ -154,6 +227,10 @@ void MKLDNNTransposeNode::createPrimitive() { IE_THROW() << "Preferable primitive descriptor was not set."; if (getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && + order == std::vector{0, 3, 1, 2}) { + performAsReorder = true; + } else if (getParentEdgeAt(INPUT_DATA_IDX)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && std::find(optimizedOrders.begin(), optimizedOrders.end(), order) != optimizedOrders.end()) { isOptimized = true; execPtr = std::make_shared(); @@ -276,7 +353,9 @@ void MKLDNNTransposeNode::optimizedExecute(const int MB, const MKLDNNMemoryPtr& } void MKLDNNTransposeNode::execute(mkldnn::stream strm) { - if (execPtr) { + if (prim) { + (*prim).execute(strm, primArgs); + } else if (execPtr) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(INPUT_DATA_IDX)->getMemoryPtr(); diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h index 14e733d02f6..b9397315f17 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.h +++ b/src/plugins/intel_cpu/src/nodes/transpose.h @@ -92,6 +92,8 @@ private: static constexpr size_t INPUT_DATA_IDX = 0lu; static constexpr size_t INPUT_ORDER_IDX = 1lu; + + bool performAsReorder = false; }; } // namespace intel_cpu From 6500ec775d2a0f38c85fbcd972a2049e130b5383 Mon Sep 17 00:00:00 2001 From: Ivan Novoselov Date: Tue, 22 Feb 2022 13:30:15 +0300 Subject: [PATCH 059/310] [Snippets] Check for cyclic dependencies during ternary merge. (#10374) --- src/common/snippets/src/op/subgraph.cpp | 31 ++++++++++++------- .../snippets/src/pass/collapse_subgraph.cpp | 7 +++-- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 7dff3913085..e73dc6b4bce 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -133,13 +133,7 @@ Shape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShape NODE_VALIDATION_CHECK(this, outputShapes.size() == m_body->get_results().size(), "number of results for snippet doesn't match passed to generate method: ", outputShapes.size(), " vs ", m_body->get_results().size(), "."); - // todo: does it allowed to have outputs with different layouts? I assume no, remove if invalid - const AxisVector outOrder = get<1>(outputShapes[0]); - for (size_t i = 1; i < outputShapes.size(); i++) { - const AxisVector order_i = get<1>(outputShapes[i]); - NODE_VALIDATION_CHECK(this, outOrder.size() == order_i.size() && equal(outOrder.begin(), outOrder.end(), order_i.begin()), - "Snippets output shapes must have the same layout"); - } + auto getMaxRankBlockedShape = [](const BlockedShapeVector& blockedShapes) -> const BlockedShape& { return *std::max_element(blockedShapes.begin(), blockedShapes.end(), [&](const BlockedShape& lhs, const BlockedShape& rhs) { @@ -187,18 +181,33 @@ Shape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShape } m_body->validate_nodes_and_infer_types(); + auto skipStartEndOnes = [](const Shape& shape) { + auto begin = shape.begin(); + auto end = shape.end(); + while (*begin == 1 && begin != end) + begin++; + while (begin != end && *(end-1) == 1) + end--; + Shape trimmedShape(end - begin, 1); + std::copy(begin, end, trimmedShape.begin()); + return trimmedShape; + }; // Check that output shapes are broadcastable => can be scheduled const auto& body_results = m_body->get_results(); PartialShape outPShape = body_results[0]->get_shape(); for (size_t i = 0; i < body_results.size(); i++) { auto shape_i = body_results[i]->get_shape(); - PartialShape pShape_i(shape_i); + auto outputShape_i = std::get<0>(outputShapes[i]); // Check that the produced output shape corresponds to the passed shape - bool compatibleWithPassedShape = PartialShape::broadcast_merge_into(pShape_i, std::get<0>(outputShapes[i]), + // Some produced shapes may have been changed to be broadcastable (e.g. blocked + planar outputs), + // so we need to remove leading and trailing "1" before the comparison + PartialShape pShape_i(skipStartEndOnes(shape_i)); + bool compatibleWithPassedShape = PartialShape::broadcast_merge_into(pShape_i, skipStartEndOnes(outputShape_i), ::ngraph::op::AutoBroadcastType::NUMPY); - NODE_VALIDATION_CHECK(this, compatibleWithPassedShape, "Inferred and passed results shapes are difference for snippet : ", - shape_i, " vs ", std::get<0>(outputShapes[i]), "."); + NODE_VALIDATION_CHECK(this, ov::shape_size(shape_i) == ov::shape_size(outputShape_i) && + compatibleWithPassedShape, "Inferred and passed results shapes are incompatible for snippet ", + get_friendly_name(), " : ", shape_i, " vs ", outputShape_i, "."); // Check that output shapes are broadcastable to each other => can be scheduled bool compatibleWithOtherOutputs = PartialShape::broadcast_merge_into(outPShape, shape_i, ::ngraph::op::AutoBroadcastType::NUMPY); diff --git a/src/common/snippets/src/pass/collapse_subgraph.cpp b/src/common/snippets/src/pass/collapse_subgraph.cpp index 1529b58a81d..0bd462f1ee5 100644 --- a/src/common/snippets/src/pass/collapse_subgraph.cpp +++ b/src/common/snippets/src/pass/collapse_subgraph.cpp @@ -372,9 +372,12 @@ TokenizeSnippets::TokenizeSnippets() { auto internal = input_body_parameters[i]; auto internal_consumers = internal->outputs(); - if (auto to_replace_with = ov::as_type_ptr(subgraph->get_input_node_shared_ptr(i))) { - for (auto output : internal_consumers) { + // todo: In principle, we can still attach the node to the subgraph if cyclic dependency is introduced during ternary merge. + // Need to support. + if (cyclicDependencyIsIntoduced(to_replace_with, currentTopoBounds)) + return abort_with_strategy("Attempt to perform recurrent merge for cyclic-dependent subgraphs. Aborting."); + for (const auto& output : internal_consumers) { for (auto consumer : output.get_target_inputs()) { auto other_body = clones[subgraph->get_input_node_shared_ptr(i)]; auto other_body_result = other_body->get_results()[consumer.get_source_output().get_index()]; From efd3c119faa94ff124967d4b81926c05e90342c0 Mon Sep 17 00:00:00 2001 From: Andrey Zaytsev Date: Tue, 22 Feb 2022 13:33:44 +0300 Subject: [PATCH 060/310] Update Yocto documentation (#10547) (#10591) * installing-openvino-yocto: fix documentation links Point to the new Yocto docs website. Signed-off-by: Anuj Mittal * Update installing-openvino-yocto.md * installing-openvino-yocto: add step to checkout specific branch Request users to checkout specific branch of meta-intel where this version of OpenVINO is available. Signed-off-by: Anuj Mittal Co-authored-by: Yuan Xu Co-authored-by: Anuj Mittal Co-authored-by: Yuan Xu --- .../installing-openvino-yocto.md | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/docs/install_guides/installing-openvino-yocto.md b/docs/install_guides/installing-openvino-yocto.md index c5340095d90..e37fe59c5ae 100644 --- a/docs/install_guides/installing-openvino-yocto.md +++ b/docs/install_guides/installing-openvino-yocto.md @@ -1,30 +1,28 @@ -# Create a Yocto* Image with OpenVINO™ toolkit {#openvino_docs_install_guides_installing_openvino_yocto} -This document provides instructions for creating a Yocto* image with OpenVINO™ toolkit. - -Instructions were validated and tested for [Yocto OpenVINO 2020.4 release](http://git.yoctoproject.org/cgit/cgit.cgi/meta-intel). +# Create a Yocto Image with Intel® Distribution of OpenVINO™ toolkit {#openvino_docs_install_guides_installing_openvino_yocto} +This document provides instructions for creating a Yocto image with Intel® Distribution of OpenVINO™ toolkit. ## System Requirements -Use the [Yocto Project* official documentation](https://www.yoctoproject.org/docs/latest/mega-manual/mega-manual.html#brief-compatible-distro) to set up and configure your host machine to be compatible with BitBake*. +Use the [Yocto Project official documentation](https://docs.yoctoproject.org/brief-yoctoprojectqs/index.html#compatible-linux-distribution) to set up and configure your host machine to be compatible with BitBake. -## Setup +## Step 1: Set Up Environment -### Set up Git repositories +### Set Up Git Repositories The following Git repositories are required to build a Yocto image: -- [Poky](https://www.yoctoproject.org/docs/latest/mega-manual/mega-manual.html#poky) -- [Meta-intel](http://git.yoctoproject.org/cgit/cgit.cgi/meta-intel/tree/README) +- [Poky](https://git.yoctoproject.org/poky) +- [Meta-intel](https://git.yoctoproject.org/meta-intel/tree/README) - [Meta-openembedded](http://cgit.openembedded.org/meta-openembedded/tree/README) - Meta-clang Clone these Git repositories to your host machine: ```sh -git clone https://git.yoctoproject.org/git/poky -git clone https://git.yoctoproject.org/git/meta-intel -git clone https://git.openembedded.org/meta-openembedded -git clone https://github.com/kraj/meta-clang.git +git clone https://git.yoctoproject.org/git/poky --branch honister +git clone https://git.yoctoproject.org/git/meta-intel --branch honister +git clone https://git.openembedded.org/meta-openembedded --branch honister +git clone https://github.com/kraj/meta-clang.git --branch honister ``` -### Set up BitBake* Layers +### Set up BitBake Layers ```sh source poky/oe-init-build-env @@ -36,7 +34,7 @@ bitbake-layers add-layer ../meta-clang ### Set up BitBake Configurations -Include extra configuration in conf/local.conf in your build directory as required. +Include extra configuration in `conf/local.conf` in your build directory as required. ```sh # Build with SSE4.2, AVX2 etc. extensions @@ -67,22 +65,22 @@ CORE_IMAGE_EXTRA_INSTALL_append = " openvino-inference-engine-vpu-firmware" CORE_IMAGE_EXTRA_INSTALL_append = " openvino-model-optimizer" ``` -## Build a Yocto Image with OpenVINO Packages +## Step 2: Build a Yocto Image with OpenVINO Packages -Run BitBake to build the minimal image with OpenVINO packages: +Run BitBake to build your image with OpenVINO packages. To build the minimal image, for example, run: ```sh bitbake core-image-minimal ``` -## Verify the Created Yocto Image with OpenVINO Packages +## Step 3: Verify the Yocto Image with OpenVINO Packages Verify that OpenVINO packages were built successfully. -Run 'oe-pkgdata-util list-pkgs | grep openvino' command. +Run the following command: ```sh oe-pkgdata-util list-pkgs | grep openvino ``` -Verify that it returns the list of packages below: +If the image was built successfully, it will return the list of packages as below: ```sh openvino-inference-engine openvino-inference-engine-dbg From 3d223ebc2ac4029fd6c3cd5cf304eeaf2fae7843 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 22 Feb 2022 13:51:10 +0300 Subject: [PATCH 061/310] [MO] update error message when reverse infer was not successful (#10576) * update error message when reverse infer was not successful * corrected message when there are several undefined Parameters --- .../mo/openvino/tools/mo/middle/passes/infer.py | 17 +++++++++++++++++ .../front/common/partial_infer/eltwise_test.py | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/mo/openvino/tools/mo/middle/passes/infer.py b/tools/mo/openvino/tools/mo/middle/passes/infer.py index e1116f34d17..97c263c8ecd 100644 --- a/tools/mo/openvino/tools/mo/middle/passes/infer.py +++ b/tools/mo/openvino/tools/mo/middle/passes/infer.py @@ -353,3 +353,20 @@ def reverse_infer(graph: Graph, nodes: list): log.debug('Inputs:') log_debug_dict(node.in_nodes(), 'inputs') + parameters_with_no_shape = [] + for node in graph.get_op_nodes(op='Parameter'): + if not node.has_valid('shape'): + parameters_with_no_shape.append(node) + + if len(parameters_with_no_shape) == 0: + return + + parameters_names = '' + for idx, node in enumerate(parameters_with_no_shape): + parameters_names += "'{}'".format(node.soft_get('name', node.id)) + if idx < len(parameters_with_no_shape) - 1: + parameters_names += ', ' + + if len(parameters_with_no_shape) > 0: + raise Error("Model Optimizer is unable to deduce input shapes for the following Parameter nodes: {}. " + "Please use cli options --input or --input_shape to set model input shape.".format(parameters_names)) diff --git a/tools/mo/unit_tests/mo/front/common/partial_infer/eltwise_test.py b/tools/mo/unit_tests/mo/front/common/partial_infer/eltwise_test.py index 6b459682043..099e5c434f5 100644 --- a/tools/mo/unit_tests/mo/front/common/partial_infer/eltwise_test.py +++ b/tools/mo/unit_tests/mo/front/common/partial_infer/eltwise_test.py @@ -180,7 +180,7 @@ class TestElementwiseReverseInfer(unittest.TestCase): def test_reverse_infer_6(self): # both output and input has the same rank, cannot deduce other inputs rank - with self.assertRaisesRegex(Error, 'Stopped shape/value propagation'): + with self.assertRaisesRegex(Error, "Model Optimizer is unable to deduce input shapes"): self.build_and_test_reverse_inference(inp_shape_1=[dyn, dyn, dyn, dyn], inp_shape_2=None, out_shape=[dyn, dyn, 4, 1], From 3f15afb926d72b9e357f8cbbeb6b0d2d022704ec Mon Sep 17 00:00:00 2001 From: Sofya Balandina Date: Tue, 22 Feb 2022 13:55:51 +0300 Subject: [PATCH 062/310] [IE TEST] Continue run after crash (#10037) --- .../conformance_infra/src/main.cpp | 19 ++ .../src/read_ir_test/read_ir.cpp | 220 ++++++++++-------- .../include/behavior/plugin/life_time.hpp | 1 + .../shared/src/behavior/plugin/life_time.cpp | 45 ++-- .../op_impl_check/op_impl_check.cpp | 34 +-- .../base/layer_test_utils.hpp | 1 + .../shared_test_classes/base/ov_subgraph.hpp | 6 - .../src/base/layer_test_utils.cpp | 65 +++--- .../src/base/ov_subgraph.cpp | 89 +++---- .../common_test_utils/crash_handler.cpp | 55 +++++ .../common_test_utils/crash_handler.hpp | 24 ++ 11 files changed, 342 insertions(+), 217 deletions(-) create mode 100644 src/tests/ie_test_utils/common_test_utils/crash_handler.cpp create mode 100644 src/tests/ie_test_utils/common_test_utils/crash_handler.hpp diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp index 00d5b4a35ff..0aaa25761ef 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp @@ -59,5 +59,24 @@ int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); ::testing::AddGlobalTestEnvironment(new LayerTestsUtils::TestEnvironment); + + auto exernalSignalHandler = [](int errCode) { + std::cerr << "Unexpected application crash with code: " << errCode << std::endl; + + // set default handler for crash + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + + if (errCode == SIGINT || errCode == SIGTERM) { + auto& s = LayerTestsUtils::Summary::getInstance(); + s.saveReport(); + exit(1); + } + }; + + // killed by extarnal + signal(SIGINT, exernalSignalHandler); + signal(SIGTERM , exernalSignalHandler); + return RUN_ALL_TESTS(); } diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp index 812de4b83ce..44a36a53f0b 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp @@ -13,11 +13,14 @@ #include "common_test_utils/file_utils.hpp" #include "common_test_utils/data_utils.hpp" #include "common_test_utils/common_utils.hpp" +#include "common_test_utils/crash_handler.hpp" #include "functional_test_utils/layer_test_utils/op_info.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "read_ir_test/read_ir.hpp" +#include + namespace ov { namespace test { namespace subgraph { @@ -48,118 +51,143 @@ std::string ReadIRTest::getTestCaseName(const testing::TestParamInfoset_friendly_name("refFunction"); - } - auto crashHandler = [](int errCode) { + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); + + // place to jump in case of a crash +#ifdef _WIN32 + if (setjmp(CommonTestUtils::env) == 0) { +#else + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif + if (functionRefs == nullptr) { + functionRefs = ngraph::clone_function(*function); + functionRefs->set_friendly_name("refFunction"); + } + auto &s = LayerTestsUtils::Summary::getInstance(); - s.saveReport(); - std::cout << "Unexpected application crash!" << std::endl; - std::abort(); - }; - signal(SIGSEGV, crashHandler); + s.setDeviceName(targetDevice); - auto &s = LayerTestsUtils::Summary::getInstance(); - s.setDeviceName(targetDevice); - - if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { - s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::SKIPPED); - GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { + s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::SKIPPED); + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + } else { + s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::CRASHED); + } + try { + SubgraphBaseTest::query_model(); + s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::PASSED); + } catch (...) { + s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::FAILED); + } } else { - s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::CRASHED); - } - try { - SubgraphBaseTest::query_model(); - s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::PASSED); - } catch (...) { - s.updateOPsStats(functionRefs, LayerTestsUtils::PassRate::Statuses::FAILED); + IE_THROW() << "Crash happens"; } } void ReadIRTest::SetUp() { - std::tie(pathToModel, targetDevice, configuration) = this->GetParam(); - function = core->read_model(pathToModel); - const auto metaFile = CommonTestUtils::replaceExt(pathToModel, "meta"); - if (CommonTestUtils::fileExists(metaFile)) { - pugi::xml_document doc; - doc.load_file(metaFile.c_str()); - auto models = doc.child("meta_info").child("models"); - sourceModel = models.child("initial_model").attribute("name").as_string(); - for (const auto &model : models.children("model")) { - ocuranceInModels.push_back({model.attribute("name").as_string(), model.attribute("count").as_uint()}); - } - auto portsInfo = doc.child("meta_info").child("ports_info"); - auto getPortInfo = [&](size_t id) { - LayerTestsUtils::PortInfo info; - for (const auto &p : portsInfo.children()) { - if (p.attribute("id").as_uint() == id) { - info.convert_to_const = p.attribute("convert_to_const").as_bool(); - if (std::strcmp(p.attribute("min").as_string(), "undefined") != 0) { - info.min = p.attribute("min").as_double(); - } else { - info.min = -10; - } - if (std::strcmp(p.attribute("max").as_string(), "undefined") != 0) { - info.max = p.attribute("max").as_double(); - } else { - info.max = 10; - } - break; - } - } - return info; - }; + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); - auto params = function->get_parameters(); - for (const auto ¶m : params) { - auto idx = -1; - for (size_t i = 0; i < param->get_output_size(); i++) { - for (const auto &node : param->get_output_target_inputs(i)) { - const auto nodePtr = node.get_node()->shared_from_this(); - for (size_t port = 0; port < nodePtr->get_input_size(); ++port) { - if (nodePtr->get_input_node_ptr(port)->shared_from_this() == param->shared_from_this()) { - idx = port; - break; + // place to jump in case of a crash +#ifdef _WIN32 + if (setjmp(CommonTestUtils::env) == 0) { +#else + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif + std::tie(pathToModel, targetDevice, configuration) = this->GetParam(); + function = core->read_model(pathToModel); + const auto metaFile = CommonTestUtils::replaceExt(pathToModel, "meta"); + if (CommonTestUtils::fileExists(metaFile)) { + pugi::xml_document doc; + doc.load_file(metaFile.c_str()); + auto models = doc.child("meta_info").child("models"); + sourceModel = models.child("initial_model").attribute("name").as_string(); + for (const auto &model : models.children("model")) { + ocuranceInModels.push_back({model.attribute("name").as_string(), model.attribute("count").as_uint()}); + } + auto portsInfo = doc.child("meta_info").child("ports_info"); + auto getPortInfo = [&](size_t id) { + LayerTestsUtils::PortInfo info; + for (const auto &p : portsInfo.children()) { + if (p.attribute("id").as_uint() == id) { + info.convert_to_const = p.attribute("convert_to_const").as_bool(); + if (std::strcmp(p.attribute("min").as_string(), "undefined") != 0) { + info.min = p.attribute("min").as_double(); + } else { + info.min = -10; + } + if (std::strcmp(p.attribute("max").as_string(), "undefined") != 0) { + info.max = p.attribute("max").as_double(); + } else { + info.max = 10; + } + break; + } + } + return info; + }; + + auto params = function->get_parameters(); + for (const auto ¶m : params) { + auto idx = -1; + for (size_t i = 0; i < param->get_output_size(); i++) { + for (const auto &node : param->get_output_target_inputs(i)) { + const auto nodePtr = node.get_node()->shared_from_this(); + for (size_t port = 0; port < nodePtr->get_input_size(); ++port) { + if (nodePtr->get_input_node_ptr(port)->shared_from_this() == param->shared_from_this()) { + idx = port; + break; + } } } } - } - EXPECT_GE(idx, 0); + EXPECT_GE(idx, 0); - auto info = getPortInfo(idx); - if (info.convert_to_const) { - const auto constant = ngraph::builder::makeConstant(param->get_element_type(), - param->get_shape(), - std::vector{}, - true, - info.max, - info.min, - 1); - ov::replace_node(param, constant); - function->remove_parameter(param); - } - } - } - std::vector inputShapes; - for (const auto& param : function -> get_parameters()) { - if (param->get_partial_shape().is_static()) { - inputShapes.push_back(InputShape{{}, {param->get_shape()}}); - } else { - ov::Shape midShape; - for (const auto s : param->get_partial_shape()) { - int dimValue = s.get_length(); - if (s.is_dynamic()) { - CommonTestUtils::fill_data_random(&dimValue, 1, s.get_max_length() - s.get_min_length(), s.get_min_length(), 1); + auto info = getPortInfo(idx); + if (info.convert_to_const) { + const auto constant = ngraph::builder::makeConstant(param->get_element_type(), + param->get_shape(), + std::vector{}, + true, + info.max, + info.min, + 1); + ov::replace_node(param, constant); + function->remove_parameter(param); } - midShape.push_back(dimValue); } - inputShapes.push_back(InputShape{param->get_partial_shape(), { param->get_partial_shape().get_min_shape(), - param->get_partial_shape().get_max_shape(), - midShape }}); } + std::vector staticShapes; + for (const auto param : function->get_parameters()) { + if (param->get_partial_shape().is_static()) { + staticShapes.push_back(param->get_shape()); + } else { + staticShapes.push_back(param->get_partial_shape().get_max_shape()); + } + } + std::vector inputShapes; + for (const auto& param : function -> get_parameters()) { + if (param->get_partial_shape().is_static()) { + inputShapes.push_back(InputShape{{}, {param->get_shape()}}); + } else { + ov::Shape midShape; + for (const auto s : param->get_partial_shape()) { + int dimValue = s.get_length(); + if (s.is_dynamic()) { + CommonTestUtils::fill_data_random(&dimValue, 1, s.get_max_length() - s.get_min_length(), s.get_min_length(), 1); + } + midShape.push_back(dimValue); + } + inputShapes.push_back(InputShape{param->get_partial_shape(), { param->get_partial_shape().get_min_shape(), + param->get_partial_shape().get_max_shape(), + midShape }}); + } + } + init_input_shapes(inputShapes); + } else { + IE_THROW() << "Crash happens"; } - init_input_shapes(inputShapes); } } // namespace subgraph diff --git a/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp b/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp index 39cb28f6cbd..ca6c2ff4bdf 100644 --- a/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/plugin/life_time.hpp @@ -16,6 +16,7 @@ #include #include "gtest/gtest.h" #include "common_test_utils/test_common.hpp" +#include "common_test_utils/crash_handler.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "functional_test_utils/precision_utils.hpp" #include diff --git a/src/tests/functional/plugin/shared/src/behavior/plugin/life_time.cpp b/src/tests/functional/plugin/shared/src/behavior/plugin/life_time.cpp index df57618e916..149620d7b46 100644 --- a/src/tests/functional/plugin/shared/src/behavior/plugin/life_time.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/plugin/life_time.cpp @@ -8,17 +8,9 @@ #include #include "behavior/plugin/life_time.hpp" -#ifndef _WIN32 - #include - #include -#endif +#include namespace BehaviorTestsDefinitions { - -#ifndef _WIN32 - static jmp_buf env; -#endif - std::string HoldersTest::getTestCaseName(testing::TestParamInfo obj) { std::string targetDevice; std::vector order; @@ -38,19 +30,6 @@ namespace BehaviorTestsDefinitions { SKIP_IF_CURRENT_TEST_IS_DISABLED(); std::tie(targetDevice, order) = this->GetParam(); function = ngraph::builder::subgraph::makeConvPoolRelu(); - -#ifndef _WIN32 - // configure handling of crash - auto crashHandler = [](int errCode) { - std::cerr << "Unexpected application crash with code: " << errCode << std::endl; - siglongjmp(env, 1); - }; - struct sigaction act; - act.sa_handler = crashHandler; - sigemptyset(&act.sa_mask); - act.sa_flags = 0; - sigaction(SIGSEGV, &act, 0); -#endif } void release_order_test(std::vector order, const std::string &deviceName, @@ -90,29 +69,35 @@ namespace BehaviorTestsDefinitions { } TEST_P(HoldersTest, Orders) { + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); + // Test failed if crash happens #ifdef _WIN32 - EXPECT_NO_THROW(release_order_test(order, targetDevice, function)); + if (setjmp(CommonTestUtils::env) == 0) { #else - if (sigsetjmp(env, 1) == 0) { - release_order_test(order, targetDevice, function); + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif + EXPECT_NO_THROW(release_order_test(order, targetDevice, function)); } else { IE_THROW() << "Crash happens"; } -#endif } TEST_P(HoldersTestImportNetwork, Orders) { + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); + // Test failed if crash happens #ifdef _WIN32 - EXPECT_NO_THROW(release_order_test(order, targetDevice, function)); + if (setjmp(CommonTestUtils::env) == 0) { #else - if (sigsetjmp(env, 1) == 0) { - release_order_test(order, targetDevice, function); + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif + EXPECT_NO_THROW(release_order_test(order, targetDevice, function)); } else { IE_THROW() << "Crash happens"; } -#endif } std::string HoldersTestOnImportedNetwork::getTestCaseName(testing::TestParamInfo obj) { diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp index cef5aee662a..3a3b8536598 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/op_impl_check.cpp @@ -7,6 +7,7 @@ #endif #include "single_layer_tests/op_impl_check/op_impl_check.hpp" +#include "common_test_utils/crash_handler.hpp" namespace ov { namespace test { @@ -16,21 +17,26 @@ void OpImplCheckTest::run() { if (function == nullptr) { GTEST_FAIL() << "Target function is empty!"; } - auto crashHandler = [](int errCode) { - auto& s = LayerTestsUtils::Summary::getInstance(); - s.saveReport(); - std::cerr << "Unexpected application crash with code: " << errCode << std::endl; - std::abort(); - }; - signal(SIGSEGV, crashHandler); - summary.setDeviceName(targetDevice); - try { - auto executableNetwork = core->compile_model(function, targetDevice, configuration); - summary.updateOPsImplStatus(function, true); - } catch (...) { - summary.updateOPsImplStatus(function, false); - GTEST_FAIL() << "Error in the LoadNetwork!"; + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); + + // place to jump in case of a crash +#ifdef _WIN32 + if (setjmp(CommonTestUtils::env) == 0) { +#else + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif + summary.setDeviceName(targetDevice); + try { + auto executableNetwork = core->compile_model(function, targetDevice, configuration); + summary.updateOPsImplStatus(function, true); + } catch (...) { + summary.updateOPsImplStatus(function, false); + GTEST_FAIL() << "Error in the LoadNetwork!"; + } + } else { + IE_THROW() << "Crash happens"; } } diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp index 4762844086d..3ead1299065 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp @@ -21,6 +21,7 @@ #include "common_test_utils/ngraph_test_utils.hpp" #include "common_test_utils/common_utils.hpp" #include "common_test_utils/test_common.hpp" +#include "common_test_utils/crash_handler.hpp" #include "functional_test_utils/skip_tests_config.hpp" #include "functional_test_utils/plugin_cache.hpp" diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp index 4739fa1045f..b5b6713307c 100644 --- a/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp +++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/base/ov_subgraph.hpp @@ -26,12 +26,6 @@ public: virtual void serialize(); virtual void query_model(); - void TearDown() override { - if (!configuration.empty()) { - ov::test::utils::PluginCache::get().core().reset(); - } - } - protected: virtual void compare(const std::vector &expected, const std::vector &actual); diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index cefc0234fa7..5d815d69c98 100644 --- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -30,40 +30,45 @@ void LayerTestsCommon::Run() { functionRefs = ngraph::clone_function(*function); functionRefs->set_friendly_name("refFunction"); } - auto crashHandler = [](int errCode) { + + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); + + // place to jump in case of a crash +#ifdef _WIN32 + if (setjmp(CommonTestUtils::env) == 0) { +#else + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif auto &s = Summary::getInstance(); - s.saveReport(); - std::cout << "Unexpected application crash!" << std::endl; - std::abort(); - }; - signal(SIGSEGV, crashHandler); + s.setDeviceName(targetDevice); - auto &s = Summary::getInstance(); - s.setDeviceName(targetDevice); + if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { + s.updateOPsStats(functionRefs, PassRate::Statuses::SKIPPED); + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + } else { + s.updateOPsStats(functionRefs, PassRate::Statuses::CRASHED); + } - if (FuncTestUtils::SkipTestsConfig::currentTestIsDisabled()) { - s.updateOPsStats(functionRefs, PassRate::Statuses::SKIPPED); - GTEST_SKIP() << "Disabled test due to configuration" << std::endl; + try { + LoadNetwork(); + GenerateInputs(); + Infer(); + Validate(); + s.updateOPsStats(functionRefs, PassRate::Statuses::PASSED); + } + catch (const std::runtime_error &re) { + s.updateOPsStats(functionRefs, PassRate::Statuses::FAILED); + GTEST_FATAL_FAILURE_(re.what()); + } catch (const std::exception &ex) { + s.updateOPsStats(functionRefs, PassRate::Statuses::FAILED); + GTEST_FATAL_FAILURE_(ex.what()); + } catch (...) { + s.updateOPsStats(functionRefs, PassRate::Statuses::FAILED); + GTEST_FATAL_FAILURE_("Unknown failure occurred."); + } } else { - s.updateOPsStats(functionRefs, PassRate::Statuses::CRASHED); - } - - try { - LoadNetwork(); - GenerateInputs(); - Infer(); - Validate(); - s.updateOPsStats(functionRefs, PassRate::Statuses::PASSED); - } - catch (const std::runtime_error &re) { - s.updateOPsStats(functionRefs, PassRate::Statuses::FAILED); - GTEST_FATAL_FAILURE_(re.what()); - } catch (const std::exception &ex) { - s.updateOPsStats(functionRefs, PassRate::Statuses::FAILED); - GTEST_FATAL_FAILURE_(ex.what()); - } catch (...) { - s.updateOPsStats(functionRefs, PassRate::Statuses::FAILED); - GTEST_FATAL_FAILURE_("Unknown failure occurred."); + IE_THROW() << "Crash happens"; } } diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 6db97b61d6b..3d5de5ec2d1 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -18,6 +18,7 @@ #include "ngraph_functions/utils/ngraph_helpers.hpp" #include "common_test_utils/file_utils.hpp" +#include "common_test_utils/crash_handler.hpp" #include "functional_test_utils/ov_tensor_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" @@ -25,6 +26,8 @@ #include "shared_test_classes/base/utils/generate_inputs.hpp" #include "shared_test_classes/base/utils/compare_results.hpp" +#include + namespace ov { namespace test { @@ -34,52 +37,56 @@ std::ostream& operator <<(std::ostream& os, const InputShape& inputShape) { } void SubgraphBaseTest::run() { - auto crashHandler = [](int errCode) { - auto& s = LayerTestsUtils::Summary::getInstance(); - s.saveReport(); - std::cerr << "Unexpected application crash with code: " << errCode << std::endl; - std::abort(); - }; - signal(SIGSEGV, crashHandler); + // in case of crash jump will be made and work will be continued + auto crashHandler = std::unique_ptr(new CommonTestUtils::CrashHandler()); - LayerTestsUtils::PassRate::Statuses status = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled() - ? LayerTestsUtils::PassRate::Statuses::SKIPPED - : LayerTestsUtils::PassRate::Statuses::CRASHED; - summary.setDeviceName(targetDevice); - summary.updateOPsStats(function, status); - SKIP_IF_CURRENT_TEST_IS_DISABLED(); + // place to jump in case of a crash +#ifdef _WIN32 + if (setjmp(CommonTestUtils::env) == 0) { +#else + if (sigsetjmp(CommonTestUtils::env, 1) == 0) { +#endif + LayerTestsUtils::PassRate::Statuses status = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled() + ? LayerTestsUtils::PassRate::Statuses::SKIPPED + : LayerTestsUtils::PassRate::Statuses::CRASHED; + summary.setDeviceName(targetDevice); + summary.updateOPsStats(function, status); + SKIP_IF_CURRENT_TEST_IS_DISABLED(); - ASSERT_FALSE(targetStaticShapes.empty()) << "Target Static Shape is empty!!!"; - std::string errorMessage; - try { - compile_model(); - for (const auto& targetStaticShapeVec : targetStaticShapes) { - try { - if (!inputDynamicShapes.empty()) { - // resize ngraph function according new target shape - // Note: output shapes of some nodes depend on the input data - // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes - init_ref_function(functionRefs, targetStaticShapeVec); + ASSERT_FALSE(targetStaticShapes.empty()) << "Target Static Shape is empty!!!"; + std::string errorMessage; + try { + compile_model(); + for (const auto& targetStaticShapeVec : targetStaticShapes) { + try { + if (!inputDynamicShapes.empty()) { + // resize ngraph function according new target shape + // Note: output shapes of some nodes depend on the input data + // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes + init_ref_function(functionRefs, targetStaticShapeVec); + } + generate_inputs(targetStaticShapeVec); + } catch (const std::exception& ex) { + throw std::runtime_error("Incorrect target static shape: " + + CommonTestUtils::vec2str(targetStaticShapeVec) + " " + ex.what()); } - generate_inputs(targetStaticShapeVec); - } catch (const std::exception& ex) { - throw std::runtime_error("Incorrect target static shape: " + - CommonTestUtils::vec2str(targetStaticShapeVec) + " " + ex.what()); + infer(); + validate(); } - infer(); - validate(); + status = LayerTestsUtils::PassRate::Statuses::PASSED; + } catch (const std::exception& ex) { + status = LayerTestsUtils::PassRate::Statuses::FAILED; + errorMessage = ex.what(); + } catch (...) { + status = LayerTestsUtils::PassRate::Statuses::FAILED; + errorMessage = "Unknown failure occurred."; } - status = LayerTestsUtils::PassRate::Statuses::PASSED; - } catch (const std::exception& ex) { - status = LayerTestsUtils::PassRate::Statuses::FAILED; - errorMessage = ex.what(); - } catch (...) { - status = LayerTestsUtils::PassRate::Statuses::FAILED; - errorMessage = "Unknown failure occurred."; - } - summary.updateOPsStats(function, status); - if (status != LayerTestsUtils::PassRate::Statuses::PASSED) { - GTEST_FATAL_FAILURE_(errorMessage.c_str()); + summary.updateOPsStats(function, status); + if (status != LayerTestsUtils::PassRate::Statuses::PASSED) { + GTEST_FATAL_FAILURE_(errorMessage.c_str()); + } + } else { + IE_THROW() << "Crash happens"; } } diff --git a/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp b/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp new file mode 100644 index 00000000000..ed3a1e446c4 --- /dev/null +++ b/src/tests/ie_test_utils/common_test_utils/crash_handler.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "crash_handler.hpp" + +namespace CommonTestUtils { + +// enviroment to restore in case of crash +jmp_buf env; + +CrashHandler::CrashHandler() { + auto crashHandler = [](int errCode) { + std::cerr << "Unexpected application crash with code: " << errCode << std::endl; + + // reset custom signal handler to avoid infinit loop + // if for some reasons sigsetjmp will not be available + signal(SIGABRT, SIG_DFL); + signal(SIGSEGV, SIG_DFL); + signal(SIGILL, SIG_DFL); +#ifndef _WIN32 + signal(SIGBUS, SIG_DFL); + signal(SIGFPE, SIG_DFL); +#endif + + // goto sigsetjmp +#ifdef _WIN32 + longjmp(env, 1); +#else + siglongjmp(env, 1); +#endif + }; + + // setup custom handler for signals + signal(SIGABRT, crashHandler); + signal(SIGSEGV, crashHandler); + signal(SIGILL, crashHandler); +#ifndef _WIN32 + signal(SIGFPE, crashHandler); + signal(SIGBUS, crashHandler); +#endif +} + +CrashHandler::~CrashHandler() { + // reset custom signal handler to avoid infinit loop + signal(SIGABRT, SIG_DFL); + signal(SIGSEGV, SIG_DFL); + signal(SIGILL, SIG_DFL); +#ifndef _WIN32 + signal(SIGFPE, SIG_DFL); + signal(SIGBUS, SIG_DFL); +#endif +} + +} // namespace CommonTestUtils \ No newline at end of file diff --git a/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp b/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp new file mode 100644 index 00000000000..8e624671951 --- /dev/null +++ b/src/tests/ie_test_utils/common_test_utils/crash_handler.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "common_utils.hpp" + +#include +#include + +namespace CommonTestUtils { + +extern jmp_buf env; + +class CrashHandler { +public: + CrashHandler(); + ~CrashHandler(); +}; + +} // namespace CommonTestUtils \ No newline at end of file From 991c9db1c18f4bacdfa31c230af1eb61f2990f3c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 22 Feb 2022 14:32:57 +0300 Subject: [PATCH 063/310] Config api docs (#10563) * Renamed hetero md * Renamed some guides * Updated OpenVINO_Runtime_User_Guide.md * Updated plugin's page * More updates * Fixed links * Updated link names * Fixed links * Fixed docs build * Self-review * Fixed issues in doc snippets --- docs/HOWTO/Custom_Layers_Guide.md | 4 +- .../Deep_Learning_Model_Optimizer_DevGuide.md | 2 +- .../convert_model/Convert_Model_From_Caffe.md | 4 +- .../convert_model/Convert_Model_From_Kaldi.md | 4 +- .../convert_model/Convert_Model_From_MxNet.md | 4 +- .../Convert_Model_From_Paddle.md | 4 +- .../Convert_Model_From_PyTorch.md | 2 +- .../Convert_Model_From_TensorFlow.md | 2 +- docs/OV_Runtime_UG/Bfloat16Inference.md | 2 +- docs/OV_Runtime_UG/DynamicBatching.md | 2 +- docs/OV_Runtime_UG/Model_caching_overview.md | 2 +- docs/OV_Runtime_UG/PropertiesAPI.md | 243 ------------------ docs/OV_Runtime_UG/Samples_Overview.md | 2 +- docs/OV_Runtime_UG/ShapeInference.md | 2 +- .../AUTO.md => auto_device_selection.md} | 6 +- .../HETERO.md => hetero_execution.md} | 2 +- .../MULTI.md => multi_device.md} | 16 +- ...untime_User_Guide.md => openvino_intro.md} | 27 +- docs/OV_Runtime_UG/openvino_temporary.md | 20 ++ docs/OV_Runtime_UG/protecting_model_guide.md | 2 +- docs/OV_Runtime_UG/supported_plugins/CPU.md | 2 +- .../supported_plugins/Device_Plugins.md | 27 +- docs/OV_Runtime_UG/supported_plugins/GNA.md | 2 +- docs/OV_Runtime_UG/supported_plugins/GPU.md | 4 +- .../supported_plugins/GPU_RemoteBlob_API.md | 2 +- docs/OV_Runtime_UG/supported_plugins/HDDL.md | 2 +- .../OV_Runtime_UG/supported_plugins/MYRIAD.md | 2 +- .../supported_plugins/Supported_Devices.md | 12 +- docs/OV_Runtime_UG/supported_plugins/VPU.md | 8 +- .../supported_plugins/config_properties.md | 226 ++++++++++++++++ docs/documentation.md | 2 +- .../install_guides/installing-openvino-pip.md | 2 +- .../dldt_deployment_optimization_guide.md | 12 +- ...eployment_optimization_guide_additional.md | 2 +- docs/snippets/ov_hetero.cpp | 9 +- docs/snippets/ov_hetero.py | 10 +- docs/snippets/ov_properties_api.cpp | 70 +++-- docs/snippets/ov_properties_api.py | 67 +++++ samples/cpp/benchmark_app/README.md | 2 +- samples/cpp/hello_query_device/README.md | 4 +- samples/python/hello_query_device/README.md | 2 +- .../include/openvino/runtime/core.hpp | 10 +- .../openvino/runtime/intel_gna/properties.hpp | 4 + .../openvino/runtime/intel_gpu/ocl/ocl.hpp | 4 + .../openvino/runtime/intel_gpu/properties.hpp | 4 + .../runtime/intel_myriad/hddl_properties.hpp | 9 +- .../intel_myriad/myriad_properties.hpp | 7 +- .../include/openvino/runtime/properties.hpp | 17 ++ .../tools/pot/api/samples/speech/README.md | 2 +- 49 files changed, 502 insertions(+), 376 deletions(-) delete mode 100644 docs/OV_Runtime_UG/PropertiesAPI.md rename docs/OV_Runtime_UG/{supported_plugins/AUTO.md => auto_device_selection.md} (98%) rename docs/OV_Runtime_UG/{supported_plugins/HETERO.md => hetero_execution.md} (99%) rename docs/OV_Runtime_UG/{supported_plugins/MULTI.md => multi_device.md} (91%) rename docs/OV_Runtime_UG/{OpenVINO_Runtime_User_Guide.md => openvino_intro.md} (51%) create mode 100644 docs/OV_Runtime_UG/openvino_temporary.md create mode 100644 docs/OV_Runtime_UG/supported_plugins/config_properties.md create mode 100644 docs/snippets/ov_properties_api.py diff --git a/docs/HOWTO/Custom_Layers_Guide.md b/docs/HOWTO/Custom_Layers_Guide.md index b3dd18f3b14..2315acb0637 100644 --- a/docs/HOWTO/Custom_Layers_Guide.md +++ b/docs/HOWTO/Custom_Layers_Guide.md @@ -32,14 +32,14 @@ There are three steps to support inference of a model with custom operation(s): the Model Optimizer can generate the IR with the operation. 2. Create an operation set and implement a custom nGraph operation in it as described in the [Custom nGraph Operation](../OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md). -3. Implement a customer operation in one of the [OpenVINO™ Runtime](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) +3. Implement a customer operation in one of the [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md) plugins to support inference of this operation using a particular target hardware (CPU, GPU or VPU). To see the operations that are supported by each device plugin for the Inference Engine, refer to the [Supported Devices](../OV_Runtime_UG/supported_plugins/Supported_Devices.md). > **NOTE**: If a device doesn't support a particular operation, an alternative to creating a new operation is to target -> an additional device using the HETERO plugin. The [Heterogeneous Plugin](../OV_Runtime_UG/supported_plugins/HETERO.md) may be +> an additional device using the HETERO device. The [Heterogeneous execution](../OV_Runtime_UG/hetero_execution.md) may be > used to run an inference model on multiple devices allowing the unsupported operations on one device to "fallback" to > run on another device (e.g., CPU) that does support those operations. diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md index 731eb073ea7..67160bc86bb 100644 --- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md +++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -22,7 +22,7 @@ Model Optimizer is a cross-platform command-line tool that facilitates the transition between the training and deployment environment, performs static model analysis, and adjusts deep learning models for optimal execution on end-point target devices. -Model Optimizer process assumes you have a network model trained using supported deep learning frameworks: Caffe*, TensorFlow*, Kaldi*, MXNet* or converted to the ONNX* format. Model Optimizer produces an Intermediate Representation (IR) of the network, which can be inferred with the [OpenVINO™ Runtime](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md). +Model Optimizer process assumes you have a network model trained using supported deep learning frameworks: Caffe*, TensorFlow*, Kaldi*, MXNet* or converted to the ONNX* format. Model Optimizer produces an Intermediate Representation (IR) of the network, which can be inferred with the [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md). > **NOTE**: Model Optimizer does not infer models. Model Optimizer is an offline tool that runs before the inference takes place. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md index d8f2f906c40..187e73a4574 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md @@ -10,8 +10,8 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Caffe\*. 2. [Convert a Caffe\* Model](#Convert_From_Caffe) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md) -4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in your application to deploy the model in the target environment +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md) +4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment ## Supported Topologies diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md index 511823f047e..5d65d897ef5 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md @@ -16,8 +16,8 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Kaldi\*. 2. [Convert a Kaldi\* Model](#Convert_From_Kaldi) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values. -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). -4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in your application to deploy the model in the target environment. +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). +4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment. > **NOTE**: The Model Optimizer supports the [nnet1](http://kaldi-asr.org/doc/dnn1.html) and [nnet2](http://kaldi-asr.org/doc/dnn2.html) formats of Kaldi models. Support of the [nnet3](http://kaldi-asr.org/doc/dnn3.html) format is limited. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md index 8841fccbba1..62bba643129 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md @@ -17,8 +17,8 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for MXNet* (MXNet was used to train your model) 2. [Convert a MXNet model](#ConvertMxNet) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md) -4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in your application to deploy the model in the target environment +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md) +4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment ## Supported Topologies diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md index 19c487b21ec..c7ae7277c7f 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md @@ -4,8 +4,8 @@ A summary of the steps for optimizing and deploying a model trained with Paddle\ 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Paddle\*. 2. [Convert a Paddle\* Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases. -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). -4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in your application to deploy the model in the target environment. +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). +4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment. ## Supported Topologies diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md index 7ffe0fa11ad..63cef40c49d 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md @@ -48,7 +48,7 @@ PyTorch* framework is supported through export to ONNX\* format. A summary of th 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for ONNX\*. 2. [Export PyTorch model to ONNX\*](#export-to-onnx). 3. [Convert an ONNX\* model](Convert_Model_From_ONNX.md) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values. -4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). +4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). 5. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the Inference Engine in your application to deploy the model in the target environment. ## Export PyTorch\* Model to ONNX\* Format diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md index 1ba6aa2d4d4..80fd4faa207 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -31,7 +31,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for TensorFlow\* (TensorFlow was used to train your model). 2. [Freeze the TensorFlow model](#freeze-the-tensorflow-model) if your model is not already frozen or skip this step and use the [instruction](#loading-nonfrozen-models) to a convert a non-frozen model. 3. [Convert a TensorFlow\* model](#Convert_From_TF) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values. -4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). +4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). 5. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the Inference Engine in your application to deploy the model in the target environment. ## Supported Topologies diff --git a/docs/OV_Runtime_UG/Bfloat16Inference.md b/docs/OV_Runtime_UG/Bfloat16Inference.md index 7339e24a042..5091901e986 100644 --- a/docs/OV_Runtime_UG/Bfloat16Inference.md +++ b/docs/OV_Runtime_UG/Bfloat16Inference.md @@ -26,7 +26,7 @@ There are two ways to check if CPU device can support bfloat16 computations for 1. Query the instruction set using one of these system commands: * `lscpu | grep avx512_bf16` * `cat /proc/cpuinfo | grep avx512_bf16` -2. Use the [Properties API](PropertiesAPI.md) with `METRIC_KEY(OPTIMIZATION_CAPABILITIES)`, which should return `BF16` in the list of CPU optimization options: +2. Use the [Configure devices](supported_plugins/config_properties.md) with `METRIC_KEY(OPTIMIZATION_CAPABILITIES)`, which should return `BF16` in the list of CPU optimization options: @snippet snippets/Bfloat16Inference0.cpp part0 diff --git a/docs/OV_Runtime_UG/DynamicBatching.md b/docs/OV_Runtime_UG/DynamicBatching.md index d962aa2a664..5773af94128 100644 --- a/docs/OV_Runtime_UG/DynamicBatching.md +++ b/docs/OV_Runtime_UG/DynamicBatching.md @@ -1,4 +1,4 @@ -# Using Dynamic Batching {#openvino_docs_IE_DG_DynamicBatching} +# Working with dynamic shapes {#openvino_docs_IE_DG_DynamicBatching} ## Using Dynamic Batching (C++) diff --git a/docs/OV_Runtime_UG/Model_caching_overview.md b/docs/OV_Runtime_UG/Model_caching_overview.md index bf85c4ef680..07d8e871c18 100644 --- a/docs/OV_Runtime_UG/Model_caching_overview.md +++ b/docs/OV_Runtime_UG/Model_caching_overview.md @@ -8,7 +8,7 @@
C++
@endsphinxdirective -As described in the [OpenVINO™ Runtime User Guide](OpenVINO_Runtime_User_Guide.md), a common application flow consists of the following steps: +As described in the [OpenVINO™ Runtime User Guide](openvino_intro.md), a common application flow consists of the following steps: 1. **Create an Inference Engine Core object**: First step to manage available devices and read network objects diff --git a/docs/OV_Runtime_UG/PropertiesAPI.md b/docs/OV_Runtime_UG/PropertiesAPI.md deleted file mode 100644 index 435e0e3cd88..00000000000 --- a/docs/OV_Runtime_UG/PropertiesAPI.md +++ /dev/null @@ -1,243 +0,0 @@ -# Introduction to OpenVINO™ Device Properties API {#openvino_docs_IE_DG_InferenceEngine_QueryAPI} - -## OpenVINO™ Properties API (C++) - -@sphinxdirective -.. raw:: html - -
C++
-@endsphinxdirective - -The OpenVINO™ toolkit supports inferencing with several types of devices (processors or accelerators). -This section provides a high-level description of the process of querying of different device properties and configuration values at runtime. Refer to the [Hello Query Device С++ Sample](../../samples/cpp/hello_query_device/README.md) sources and the [Multi-Device Plugin documentation](supported_plugins/MULTI.md) for examples of using the OpenVINO™ Properties API in user applications. - -### Using the OpenVINO™ Properties API in Your Code - -The `ov::Core` class provides the following API to query device information, set or get different device configuration properties: - -* `ov::Core::get_available_devices` - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with `.suffix` where `suffix` is a unique string identifier. The device name can be passed to all methods of the `ov::Core` class that work with devices, for example `ov::Core::compile_model`. -* `ov::Core::get_property` - Gets the current value of a specific property. -* `ov::Core::set_property` - Sets a new value for the property. - -The `ov::CompiledModel` class is also extended to support the Properties API: - -* `ov::CompiledModel::get_property` -* `ov::CompiledModel::set)property` - -### Properties API in the Core Class - -#### get_available_devices - -@snippet snippets/ov_properties_api.cpp part0 - -The function returns a list of available devices, for example: - -``` -MYRIAD.1.2-ma2480 -MYRIAD.1.4-ma2480 -CPU -GPU.0 -GPU.1 -``` - -Each device name can then be passed to: - -* `ov::Core::compile_model` to load the network to a specific device. -* `ov::Core::get_property` to get common or device specific properties. -* All other methods of the `ov::Core` class that accept `deviceName`. - -#### ov::Core methods - -`ov::Core` methods like: - -* `ov::Core::compile_model` -* `ov::Core::import_model` -* `ov::Core::query_model` -* `ov::Core::query_model` -* `ov::Core::create_context` - -accept variadic list of properties as last arguments. Each property in such parameters lists should be used as function call to pass property value with specified property type - -@snippet snippets/ov_properties_api.cpp part3 - -#### get_property() - -For documentation about common configuration keys, refer to `openvino/runtime/properties.hpp`. Device specific configuration keys can be found in corresponding plugin folders. - -* The code below demonstrates how to query `HETERO` device priority of devices which will be used to infer the model: - -@snippet snippets/ov_properties_api.cpp part1 - -* To extract device properties such as available device, device name, supported configuration keys, and others, use the `ov::Core::get_property` method: - -@snippet snippets/ov_properties_api.cpp part2 - -A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. - -> **NOTE**: All properties have a type, which is specified during property declaration. The list of common device-agnostic properties can be found in `openvino/runtime/properties.hpp`. Device specific properties (for example, for HDDL or MYRIAD devices) can be found in corresponding plugin folders. - -### Properties API in the CompiledModel Class - -#### get_property() - -The method is used to get configuration values the compiled model has been created with or compiled model specific property such as `ov::optimal_number_of_infer_requests`: - -@snippet snippets/ov_properties_api.cpp part4 - -Or the current temperature of the `MYRIAD` device: - -@snippet snippets/ov_properties_api.cpp part5 - -Or the number of threads that would be used for inference in `CPU` device: - -@snippet snippets/ov_properties_api.cpp part6 - -#### set_property() - -The only device that supports this method is [Multi-Device](supported_plugins/MULTI.md). - -## OpenVINO™ Properties API (Python) - -@sphinxdirective -.. raw:: html - -
Python
-@endsphinxdirective - -This section provides a high-level description of the process of querying of different device properties and configuration values. Refer to the [Hello Properties Device Python Sample](../../samples/python/hello_query_device/README.md) sources and the [Multi-Device Plugin documentation](supported_plugins/MULTI.md) for examples of using the OpenVINO™ Properties API in user applications. - -### Using the OpenVINO™ Properties API in Your Code - -The OpenVINO™ [Core](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino-inference-engine-iecore) class provides the following API to query device information, set or get different device configuration properties: - -* [ie_api.IECore.available_devices](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.available_devices) - Provides a list of available devices. If there are more than one instance of a specific device, the devices are enumerated with .suffix where suffix is a unique string identifier. The device name can be passed to all methods of the IECore class that work with devices, for example [ie_api.IECore.load_network](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.load_network). -* [ie_api.ieCore.get_property](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_property) - Provides information about specific device. -* [ie_api.IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) - Gets the current value of a specific configuration key. -* [ie_api.IECore.set_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.set_config) - Sets a new value for the configuration key. - -The [ie_api.CompiledModel](api/ie_python_api/_autosummary/openvino.inference_engine.CompiledModel.html) class is also extended to support the Properties API: -* [ie_api.CompiledModel.get_property](api/ie_python_api/_autosummary/openvino.inference_engine.CompiledModel.html#openvino.inference_engine.CompiledModel.get_property) -* [ie_api.CompiledModel.get_config](latest/api/ie_python_api/_autosummary/openvino.inference_engine.CompiledModel.html#openvino.inference_engine.CompiledModel.get_config) -* There is no method to call for set_config, but the equivalent action is described below. - -### Properties API in the IECore Class - -#### Get Available Devices - -```python -from openvino.inference_engine import IECore - -ie = IECore() -print(ie.available_devices) -``` - -This code prints a list of available devices, for example: - -``` -MYRIAD.1.2-ma2480 -MYRIAD.1.4-ma2480 -FPGA.0 -FPGA.1 -CPU -GPU.0 -GPU.1 -``` - -Each device name can then be passed to: - -* `IECore.load_network` to load the network to a specific device. -* `IECore.get_property` to get common or device specific properties. -* All other methods of the `IECore` class that accept a device name. - -#### Get Metric - -To extract device properties such as available device, device name, supported configuration keys, and others, use the [IECore.get_property](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_property) method: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_property(device_name="CPU", property_name="FULL_DEVICE_NAME") -``` - -A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. - -To list all supported properties for a device: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_property(device_name="GPU", property_name="SUPPORTED_METRICS") -``` - -#### Get Configuration - -The code below uses the [IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) method and demonstrates how to understand whether the HETERO device dumps .dot files with split graphs during the split stage: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_config(device_name="HETERO", config_name="HETERO_DUMP_GRAPH_DOT") -``` - -To list all supported configuration keys for a device: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -ie.get_property(device_name=device, property_name="SUPPORTED_CONFIG_KEYS") -``` - -For documentation about common configuration keys, refer to `ie_plugin_config.hpp`. Device specific configuration keys can be found in corresponding plugin folders. - - -### Properties API in the CompiledModel Class - -#### Get Metric - -To get the name of the loaded network: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name=device) -exec_net.get_property("NETWORK_NAME") -``` - -Use `exec_net.get_property("SUPPORTED_METRICS")` to list all supported properties for an CompiledModel instance. - - -#### Get Configuration - -The [IECore.get_config](api/ie_python_api/_autosummary/openvino.inference_engine.IECore.html#openvino.inference_engine.IECore.get_config) method is used to get information about configuration values the compiled model has been created with: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name="CPU") -exec_net.get_config("CPU_THREADS_NUM") -``` - -Or the current temperature of MYRIAD device: - -```python -from openvino.inference_engine import IECore - -ie = IECore() -net = ie.read_network(model=path_to_xml_file) -exec_net = ie.load_network(network=net, device_name="MYRIAD") -exec_net.get_config("DEVICE_THERMAL") -``` - -Use `exec_net.get_property("SUPPORTED_CONFIG_KEYS")` to list all supported configuration keys. - -#### Set Configuration - -The only device that supports this method in the CompiledModel class is the [Multi-Device](supported_plugins/MULTI.md), where you can change the priorities of the devices for the Multi plugin in real time: `exec_net.set_config({{"MULTI_DEVICE_PRIORITIES", "GPU,CPU"}})`. See the Multi-Device documentation for more details. \ No newline at end of file diff --git a/docs/OV_Runtime_UG/Samples_Overview.md b/docs/OV_Runtime_UG/Samples_Overview.md index 2e43e342a26..c7d6dd66f82 100644 --- a/docs/OV_Runtime_UG/Samples_Overview.md +++ b/docs/OV_Runtime_UG/Samples_Overview.md @@ -270,4 +270,4 @@ sample, read the sample documentation by clicking the sample name in the samples list above. ## See Also -* [OpenVINO™ Runtime User Guide](OpenVINO_Runtime_User_Guide.md) +* [OpenVINO™ Runtime User Guide](openvino_intro.md) diff --git a/docs/OV_Runtime_UG/ShapeInference.md b/docs/OV_Runtime_UG/ShapeInference.md index 43a48ce5e89..4f836e23710 100644 --- a/docs/OV_Runtime_UG/ShapeInference.md +++ b/docs/OV_Runtime_UG/ShapeInference.md @@ -1,4 +1,4 @@ -# Using the Reshape Inference Feature {#openvino_docs_IE_DG_ShapeInference} +# Changing input shapes {#openvino_docs_IE_DG_ShapeInference} ## Introduction (C++) diff --git a/docs/OV_Runtime_UG/supported_plugins/AUTO.md b/docs/OV_Runtime_UG/auto_device_selection.md similarity index 98% rename from docs/OV_Runtime_UG/supported_plugins/AUTO.md rename to docs/OV_Runtime_UG/auto_device_selection.md index 4d692abb3ab..285cdf453b8 100644 --- a/docs/OV_Runtime_UG/supported_plugins/AUTO.md +++ b/docs/OV_Runtime_UG/auto_device_selection.md @@ -1,4 +1,4 @@ -# Auto-Device Plugin {#openvino_docs_IE_DG_supported_plugins_AUTO} +# Automatic device selection {#openvino_docs_IE_DG_supported_plugins_AUTO} ## Auto-Device Plugin Execution (C++) @@ -50,7 +50,7 @@ The Auto-device plugin supports query device optimization capabilities in metric ### Enumerating Devices and Selection Logic The Inference Engine now features a dedicated API to enumerate devices and their capabilities. -See [Hello Query Device C++ Sample](../../../samples/cpp/hello_query_device/README.md). +See [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md). This is the example output from the sample (truncated to device names only): ```sh @@ -208,7 +208,7 @@ The Auto-device plugin supports query device optimization capabilities in metric ### Enumerating Devices and Selection Logic -The Inference Engine now features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../../inference_engine/ie_bridges/python/sample_hello_query_device_README.html) for code. +The Inference Engine now features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md) for code. This is the example output from the sample (truncated to device names only): diff --git a/docs/OV_Runtime_UG/supported_plugins/HETERO.md b/docs/OV_Runtime_UG/hetero_execution.md similarity index 99% rename from docs/OV_Runtime_UG/supported_plugins/HETERO.md rename to docs/OV_Runtime_UG/hetero_execution.md index 04823782c55..0d90971a37d 100644 --- a/docs/OV_Runtime_UG/supported_plugins/HETERO.md +++ b/docs/OV_Runtime_UG/hetero_execution.md @@ -154,4 +154,4 @@ where: You can point more than two devices: `-d HETERO:MYRIAD,GPU,CPU` ### See Also -[Supported Devices](Supported_Devices.md) +[Supported Devices](supported_plugins/Supported_Devices.md) diff --git a/docs/OV_Runtime_UG/supported_plugins/MULTI.md b/docs/OV_Runtime_UG/multi_device.md similarity index 91% rename from docs/OV_Runtime_UG/supported_plugins/MULTI.md rename to docs/OV_Runtime_UG/multi_device.md index 64659111a3c..e058b763886 100644 --- a/docs/OV_Runtime_UG/supported_plugins/MULTI.md +++ b/docs/OV_Runtime_UG/multi_device.md @@ -1,4 +1,4 @@ -# Multi-Device Plugin {#openvino_docs_IE_DG_supported_plugins_MULTI} +# Running on multiple device simultaneously {#openvino_docs_OV_UG_Running_on_multiple_devices} ## Introducing the Multi-Device Plugin (C++) @@ -44,7 +44,7 @@ Notice that the priorities of the devices can be changed in real time for the ex Finally, there is a way to specify number of requests that the Multi-Device will internally keep for each device. Suppose your original app was running 4 cameras with 4 inference requests. You would probably want to share these 4 requests between 2 devices used in MULTI. The easiest way is to specify a number of requests for each device using parentheses: "MULTI:CPU(2),GPU(2)" and use the same 4 requests in your app. However, such an explicit configuration is not performance-portable and hence not recommended. Instead, the better way is to configure the individual devices and query the resulting number of requests to be used at the application level (see [Configuring the Individual Devices and Creating the Multi-Device On Top](#configuring-the-individual-devices-and-creating-the-multi-device-on-top)). ### Enumerating Available Devices -The Inference Engine features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device C++ Sample](../../../samples/cpp/hello_query_device/README.md). This is example output from the sample (truncated to device names only): +The Inference Engine features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md). This is example output from the sample (truncated to device names only): ```sh ./hello_query_device @@ -86,13 +86,13 @@ Note that while the performance of accelerators combines really well with Multi- See the [Using the Multi-Device with OpenVINO samples and benchmarking the performance](#using-the-multi-device-with-openvino-samples-and-benchmarking-the-performance) section below. ### Querying the Optimal Number of Inference Requests -You can use the [Properties API](../PropertiesAPI.md) API to query the optimal number of requests. Similarly, when using the Multi-Device you don't need to sum over included devices yourself, you can query property directly: +You can use the [configure devices](supported_plugins/config_properties.md) to query the optimal number of requests. Similarly, when using the Multi-Device you don't need to sum over included devices yourself, you can query property directly: @snippet snippets/MULTI5.cpp part5 ### Using the Multi-Device with OpenVINO Samples and Benchmarking the Performance -Every OpenVINO sample that supports the `-d` (which stands for "device") command-line option transparently accepts Multi-Device. The [Benchmark Application](../../../samples/cpp/benchmark_app/README.md) is the best reference for the optimal usage of Multi-Device. As discussed earlier, you do not need to set up the number of requests, CPU streams or threads because the application provides optimal performance out of the box. Below is an example command to evaluate HDDL+GPU performance with that: +Every OpenVINO sample that supports the `-d` (which stands for "device") command-line option transparently accepts Multi-Device. The [Benchmark Application](../../samples/cpp/benchmark_app/README.md) is the best reference for the optimal usage of Multi-Device. As discussed earlier, you do not need to set up the number of requests, CPU streams or threads because the application provides optimal performance out of the box. Below is an example command to evaluate HDDL+GPU performance with that: ```sh ./benchmark_app –d MULTI:HDDL,GPU –m -i -niter 1000 @@ -110,7 +110,7 @@ The Multi-Device plugin supports FP16 IR files. The CPU plugin automatically upc @endsphinxdirective ### See Also -[Supported Devices](Supported_Devices.md) +[Supported Devices](supported_plugins/Supported_Devices.md) ## Introducing the Multi-Device Plugin (Python) @@ -182,7 +182,7 @@ You can set the configuration directly as a string, or use the metric key `MULTI ### Enumerating Available Devices -The Inference Engine features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../../samples/python/hello_query_device/README.md). This is example output from the sample (truncated to device names only): +The Inference Engine features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md). This is example output from the sample (truncated to device names only): ```sh ./hello_query_device @@ -268,7 +268,7 @@ Note that while the performance of accelerators works well with Multi-Device, th ### Using the Multi-Device with OpenVINO Samples and Benchmarking the Performance -Every OpenVINO sample that supports the `-d` (which stands for "device") command-line option transparently accepts Multi-Device. The [Benchmark application](../../../tools/benchmark_tool/README.md) is the best reference for the optimal usage of Multi-Device. As discussed earlier, you do not need to set up the number of requests, CPU streams or threads because the application provides optimal performance out of the box. Below is an example command to evaluate CPU+GPU performance with the Benchmark application: +Every OpenVINO sample that supports the `-d` (which stands for "device") command-line option transparently accepts Multi-Device. The [Benchmark application](../../tools/benchmark_tool/README.md) is the best reference for the optimal usage of Multi-Device. As discussed earlier, you do not need to set up the number of requests, CPU streams or threads because the application provides optimal performance out of the box. Below is an example command to evaluate CPU+GPU performance with the Benchmark application: ```sh ./benchmark_app.py –d MULTI:CPU,GPU –m @@ -289,4 +289,4 @@ The Multi-Device plugin supports FP16 IR files. The CPU plugin automatically upc @endsphinxdirective ### See Also -[Supported Devices](Supported_Devices.md) \ No newline at end of file +[Supported Devices](supported_plugins/Supported_Devices.md) \ No newline at end of file diff --git a/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md b/docs/OV_Runtime_UG/openvino_intro.md similarity index 51% rename from docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md rename to docs/OV_Runtime_UG/openvino_intro.md index deaea820d7b..e6ce0f9c6c3 100644 --- a/docs/OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -9,39 +9,30 @@ :hidden: openvino_docs_IE_DG_Integrate_with_customer_application_new_API + openvino_docs_OV_Runtime_UG_Model_Representation - openvino_docs_OV_Runtime_UG_Preprocessing_Overview - openvino_docs_IE_DG_ShapeInference - openvino_docs_IE_DG_Device_Plugins - - + openvino_docs_OV_UG_Working_with_devices + openvino_docs_OV_Runtime_UG_Preprocessing_Overview openvino_docs_IE_DG_DynamicBatching - openvino_docs_IE_DG_supported_plugins_AUTO - - openvino_docs_IE_DG_supported_plugins_MULTI + openvino_docs_OV_UG_Running_on_multiple_devices openvino_docs_OV_UG_Hetero_execution openvino_docs_IE_DG_network_state_intro openvino_2_0_transition_guide - - openvino_docs_deployment_optimization_guide_dldt_optimization_guide - openvino_docs_IE_DG_Model_caching_overview - openvino_docs_IE_DG_Extensibility_DG_Intro - openvino_docs_IE_DG_Int8Inference - openvino_docs_IE_DG_Bfloat16Inference - ngraph_transformation + openvino_docs_OV_Should_be_in_performance openvino_docs_OV_Runtime_API_Changes @endsphinxdirective ## Introduction -Inference Engine is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the Inference Engine API to read the Intermediate Representation (IR), ONNX and execute the model on devices. +OpenVINO Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the OpenVINO Runtime API to read the Intermediate Representation (IR), ONNX, PDPD file formats and execute the model on devices. + +OpenVINO runtime uses a plugin architecture. Inference plugin is a software component that contains complete implementation for inference on a certain Intel® hardware device: CPU, GPU, VPU, GNA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs to configure device or interoperability API between OpenVINO Runtime and underlaying plugin backend. -Inference Engine uses a plugin architecture. Inference Engine plugin is a software component that contains complete implementation for inference on a certain Intel® hardware device: CPU, GPU, VPU, etc. Each plugin implements the unified API and provides additional hardware-specific APIs. - The scheme below illustrates the typical workflow for deploying a trained deep learning model: + ![](img/BASIC_FLOW_IE_C.svg) diff --git a/docs/OV_Runtime_UG/openvino_temporary.md b/docs/OV_Runtime_UG/openvino_temporary.md new file mode 100644 index 00000000000..df5aae0f7d4 --- /dev/null +++ b/docs/OV_Runtime_UG/openvino_temporary.md @@ -0,0 +1,20 @@ +# Should be moved to performance / extensibility {#openvino_docs_OV_Should_be_in_performance} + +@sphinxdirective + +.. _deep learning inference engine: + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_deployment_optimization_guide_dldt_optimization_guide + openvino_docs_IE_DG_Model_caching_overview + openvino_docs_IE_DG_Int8Inference + openvino_docs_IE_DG_Bfloat16Inference + ngraph_transformation + openvino_docs_IE_DG_Extensibility_DG_Intro + +@endsphinxdirective + +## TEMP: should be moved to performance / extensibility guides diff --git a/docs/OV_Runtime_UG/protecting_model_guide.md b/docs/OV_Runtime_UG/protecting_model_guide.md index e710eeeb668..c5d891e70cf 100644 --- a/docs/OV_Runtime_UG/protecting_model_guide.md +++ b/docs/OV_Runtime_UG/protecting_model_guide.md @@ -54,7 +54,7 @@ should be called with `weights` passed as an empty `Blob`. - Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) - OpenVINO™ toolkit online documentation: [https://docs.openvino.ai](https://docs.openvino.ai) - Model Optimizer Developer Guide: [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -- [OpenVINO™ runTime User Guide](OpenVINO_Runtime_User_Guide.md) +- [OpenVINO™ runTime User Guide](openvino_intro.md) - For more information on Sample Applications, see the [Inference Engine Samples Overview](Samples_Overview.md) - For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel) - For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/OV_Runtime_UG/supported_plugins/CPU.md b/docs/OV_Runtime_UG/supported_plugins/CPU.md index 08eafe3ac0d..692e490afaf 100644 --- a/docs/OV_Runtime_UG/supported_plugins/CPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/CPU.md @@ -1,4 +1,4 @@ -# CPU Plugin {#openvino_docs_IE_DG_supported_plugins_CPU} +# CPU device {#openvino_docs_OV_UG_supported_plugins_CPU} ## Introducing the CPU Plugin diff --git a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md index ada0e6ce977..7ad6fbed4b8 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md +++ b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md @@ -1,4 +1,4 @@ -# Device Plugin Support {#openvino_docs_IE_DG_Device_Plugins} +# Working with devices {#openvino_docs_OV_UG_Working_with_devices} @sphinxdirective @@ -6,27 +6,30 @@ :maxdepth: 1 :hidden: - openvino_docs_IE_DG_InferenceEngine_QueryAPI - openvino_docs_IE_DG_supported_plugins_CPU - openvino_docs_IE_DG_supported_plugins_GPU + openvino_docs_OV_UG_query_api + openvino_docs_OV_UG_supported_plugins_CPU + openvino_docs_OV_UG_supported_plugins_GPU openvino_docs_IE_DG_supported_plugins_VPU - openvino_docs_IE_DG_supported_plugins_GNA + openvino_docs_OV_UG_supported_plugins_GNA @endsphinxdirective -Inference Engine uses a plugin architecture. Inference Engine plugin is a software component that contains complete implementation for inference on a certain Intel® hardware device: CPU, GPU, VPU, GNA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs. - -The Inference Engine provides capabilities to infer deep learning models on the following device types with corresponding plugins: +The OpenVINO Runtime provides capabilities to infer deep learning models on the following device types with corresponding plugins: | Plugin | Device types | |------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| -|[GPU plugin](GPU.md) |Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics | |[CPU plugin](CPU.md) |Intel® Xeon® with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) | +|[GPU plugin](GPU.md) |Intel® Processor Graphics, including Intel® HD Graphics and Intel® Iris® Graphics | |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs | |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor| -|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel | -|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel® device for inference automatically | -|[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). | + +OpenVINO runtime also has several execution capabilities which work on top of other devices: + +| Capability | Description | +|------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| +|[Multi-Device execution](../multi_device.md) |Multi-Device enables simultaneous inference of the same model on several devices in parallel | +|[Auto-Device selection](../auto_device_selection.md) |Auto-Device selection enables selecting Intel® device for inference automatically | +|[Heterogeneous execution](../hetero_execution.md) |Heterogeneous execution enables automatic inference splitting between several devices (for example if a device doesn't [support certain operation](#supported-layers)). | Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). diff --git a/docs/OV_Runtime_UG/supported_plugins/GNA.md b/docs/OV_Runtime_UG/supported_plugins/GNA.md index 4b177d98c31..497dec14ac0 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GNA.md +++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md @@ -1,4 +1,4 @@ -# GNA Plugin {#openvino_docs_IE_DG_supported_plugins_GNA} +# GNA device {#openvino_docs_OV_UG_supported_plugins_GNA} ## Introducing the GNA Plugin The Intel® Gaussian & Neural Accelerator is a low-power neural coprocessor for continuous inference at the edge. diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU.md b/docs/OV_Runtime_UG/supported_plugins/GPU.md index 8e55e4f647d..3bba80568f7 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md @@ -1,4 +1,4 @@ -# GPU Plugin {#openvino_docs_IE_DG_supported_plugins_GPU} +# GPU device {#openvino_docs_OV_UG_supported_plugins_GPU} @sphinxdirective @@ -6,7 +6,7 @@ :maxdepth: 1 :hidden: - openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API + openvino_docs_OV_UG_supported_plugins_GPU_RemoteBlob_API @endsphinxdirective diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteBlob_API.md b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteBlob_API.md index a5232b580d9..ffb287f47d9 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteBlob_API.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU_RemoteBlob_API.md @@ -1,4 +1,4 @@ -Remote Blob API of GPU Plugin {#openvino_docs_IE_DG_supported_plugins_GPU_RemoteBlob_API} +Remote Blob API of GPU Plugin {#openvino_docs_OV_UG_supported_plugins_GPU_RemoteBlob_API} ================================ The GPU plugin implementation of the `RemoteContext` and `RemoteBlob` interfaces supports GPU diff --git a/docs/OV_Runtime_UG/supported_plugins/HDDL.md b/docs/OV_Runtime_UG/supported_plugins/HDDL.md index 6d7784aa9ad..94fd2417a78 100644 --- a/docs/OV_Runtime_UG/supported_plugins/HDDL.md +++ b/docs/OV_Runtime_UG/supported_plugins/HDDL.md @@ -1,4 +1,4 @@ -# HDDL Plugin {#openvino_docs_IE_DG_supported_plugins_HDDL} +# HDDL device {#openvino_docs_OV_UG_supported_plugins_HDDL} ## Introducing the HDDL Plugin diff --git a/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md b/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md index 53addae9c51..8ccfe223e14 100644 --- a/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md +++ b/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md @@ -1,4 +1,4 @@ -# MYRIAD Plugin {#openvino_docs_IE_DG_supported_plugins_MYRIAD} +# MYRIAD device {#openvino_docs_OV_UG_supported_plugins_MYRIAD} ## Introducing MYRIAD Plugin diff --git a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md index ad13af79d3f..51ed5358068 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md +++ b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md @@ -13,9 +13,9 @@ The Inference Engine provides unique capabilities to infer deep learning models |[CPU plugin](CPU.md) |Intel® Xeon® with Intel® Advanced Vector Extensions 2 (Intel® AVX2), Intel® Advanced Vector Extensions 512 (Intel® AVX-512), and AVX512_BF16, Intel® Core™ Processors with Intel® AVX2, Intel® Atom® Processors with Intel® Streaming SIMD Extensions (Intel® SSE) | |[VPU plugins](VPU.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X, Intel® Vision Accelerator Design with Intel® Movidius™ VPUs | |[GNA plugin](GNA.md) (available in the Intel® Distribution of OpenVINO™ toolkit) |Intel® Speech Enabling Developer Kit, Amazon Alexa* Premium Far-Field Developer Kit, Intel® Pentium® Silver J5005 Processor, Intel® Pentium® Silver N5000 Processor, Intel® Celeron® J4005 Processor, Intel® Celeron® J4105 Processor, Intel® Celeron® Processor N4100, Intel® Celeron® Processor N4000, Intel® Core™ i3-8121U Processor, Intel® Core™ i7-1065G7 Processor, Intel® Core™ i7-1060G7 Processor, Intel® Core™ i5-1035G4 Processor, Intel® Core™ i5-1035G7 Processor, Intel® Core™ i5-1035G1 Processor, Intel® Core™ i5-1030G7 Processor, Intel® Core™ i5-1030G4 Processor, Intel® Core™ i3-1005G1 Processor, Intel® Core™ i3-1000G1 Processor, Intel® Core™ i3-1000G4 Processor| -|[Multi-Device plugin](MULTI.md) |Multi-Device plugin enables simultaneous inference of the same network on several Intel® devices in parallel | -|[Auto-Device plugin](AUTO.md) |Auto-Device plugin enables selecting Intel® device for inference automatically | -|[Heterogeneous plugin](HETERO.md) |Heterogeneous plugin enables automatic inference splitting between several Intel® devices (for example if a device doesn't [support certain layers](#supported-layers)). | +|[Multi-Device execution](../multi_device.md) |Multi-Device execution enables simultaneous inference of the same model on several devices in parallel | +|[Auto-Device plugin](../auto_device_selection.md) |Auto-Device plugin enables selecting Intel® device for inference automatically | +|[Heterogeneous plugin](../hetero_execution.md) |Heterogeneous execution enables automatic inference splitting between several devices (for example if a device doesn't [support certain operation](#supported-layers)). | Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). @@ -73,7 +73,7 @@ For example, the CHW value at index (c,h,w) is physically located at index (c\*H |VPU plugins |Not supported |Supported |Not supported | |GNA plugin |Supported |Supported |Not supported |
\* - currently, only limited set of topologies might benefit from enabling I8 model on GPU
-For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution +For [Multi-Device](../multi_device.md) and [Heterogeneous](../hetero_execution.md) executions the supported models formats depends on the actual underlying devices. _Generally, FP16 is preferable as it is most ubiquitous and performant_. ### Supported Input Precision @@ -86,7 +86,7 @@ the supported models formats depends on the actual underlying devices. _Generall |GNA plugin |Supported |Not supported |Supported |Not supported |Supported |Supported |
\* - Supported via `SetBlob` only, `GetBlob` returns FP32
-For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution +For [Multi-Device](../multi_device.md) and [Heterogeneous](../hetero_execution.md) executions the supported input precision depends on the actual underlying devices. _Generally, U8 is preferable as it is most ubiquitous_. ### Supported Output Precision @@ -97,7 +97,7 @@ the supported input precision depends on the actual underlying devices. _Genera |GPU plugin |Supported |Supported | |VPU plugins |Supported |Supported | |GNA plugin |Supported |Not supported | -For [Multi-Device](MULTI.md) and [Heterogeneous](HETERO.md) execution +For [Multi-Device](../multi_device.md) and [Heterogeneous](../hetero_execution.md) executions the supported output precision depends on the actual underlying devices. _Generally, FP32 is preferable as it is most ubiquitous_. ### Supported Input Layout diff --git a/docs/OV_Runtime_UG/supported_plugins/VPU.md b/docs/OV_Runtime_UG/supported_plugins/VPU.md index 62a390fdbb5..11f3aaa9799 100644 --- a/docs/OV_Runtime_UG/supported_plugins/VPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/VPU.md @@ -1,4 +1,4 @@ -# VPU Plugins {#openvino_docs_IE_DG_supported_plugins_VPU} +# VPU devices {#openvino_docs_IE_DG_supported_plugins_VPU} @sphinxdirective @@ -6,8 +6,8 @@ :maxdepth: 1 :hidden: - openvino_docs_IE_DG_supported_plugins_MYRIAD - openvino_docs_IE_DG_supported_plugins_HDDL + openvino_docs_OV_UG_supported_plugins_MYRIAD + openvino_docs_OV_UG_supported_plugins_HDDL @endsphinxdirective @@ -138,7 +138,7 @@ In a perfect pipeline this time should be near zero, which means that the data w **Get the following message when running inference with the VPU plugin: "[VPU] Cannot convert layer due to unsupported layer type "** -This means that your topology has a layer that is unsupported by your target VPU plugin. To resolve this issue, you can implement the custom layer for the target device using the [Inference Engine Extensibility mechanism](../Extensibility_DG/Intro.md). Or, to quickly get a working prototype, you can use the heterogeneous scenario with the default fallback policy (see the [HETERO Plugin](HETERO.md) section). Use the HETERO plugin with a fallback device that supports this layer, for example, CPU: `HETERO:MYRIAD,CPU`. +This means that your topology has a layer that is unsupported by your target VPU plugin. To resolve this issue, you can implement the custom layer for the target device using the [Inference Engine Extensibility mechanism](../Extensibility_DG/Intro.md). Or, to quickly get a working prototype, you can use the heterogeneous scenario with the default fallback policy (see the [Heterogeneous execution](../hetero_execution.md) section). Use the HETERO mode with a fallback device that supports this layer, for example, CPU: `HETERO:MYRIAD,CPU`. For a list of VPU-supported layers, see the Supported Layers section of the [Supported Devices](Supported_Devices.md) page. ## Known Layers Limitations diff --git a/docs/OV_Runtime_UG/supported_plugins/config_properties.md b/docs/OV_Runtime_UG/supported_plugins/config_properties.md new file mode 100644 index 00000000000..5b0fef66c20 --- /dev/null +++ b/docs/OV_Runtime_UG/supported_plugins/config_properties.md @@ -0,0 +1,226 @@ +# Query device properties, configuration {#openvino_docs_OV_UG_query_api} + +## Query device properties and devices configuration + +The OpenVINO™ toolkit supports inferencing with several types of devices (processors or accelerators). +This section provides a high-level description of the process of querying of different device properties and configuration values at runtime. + +OpenVINO runtime has two types of properties: +- Read only properties which provides information about the devices (such device name, termal, execution capabilities, etc) and information about ov::CompiledModel to understand what configuration values were used to compile the model with. +- Mutable properties which are primarily used to configure ov::Core::compile_model process and affect final inference on the specific set of devices. Such properties can be set globally per device via ov::Core::set_property or locally for particular model in ov::Core::compile_model and ov::Core::query_model calls. + +OpenVINO property is represented as a named constexpr variable with a given string name and type (see ). Example: +``` +static constexpr Property, PropertyMutability::RO> available_devices{"AVAILABLE_DEVICES"}; +``` +represents a read-only property with C++ name `ov::available_devices`, string name `AVAILABLE_DEVICES` and type `std::vector`. + +Refer to the [Hello Query Device С++ Sample](../../../samples/cpp/hello_query_device/README.md) sources and the [Multi-Device execution](../multi_device.md) documentation for examples of using setting and getting properties in user applications. + +### Get a set of available devices + +Based on read-only property `ov::available_devices`, OpenVINO Core collects information about currently available devices enabled by OpenVINO plugins and returns information using the `ov::Core::get_available_devices` method: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [get_available_devices] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [get_available_devices] + +@endsphinxdirective + +The function returns a list of available devices, for example: + +``` +MYRIAD.1.2-ma2480 +MYRIAD.1.4-ma2480 +CPU +GPU.0 +GPU.1 +``` + +If there are more than one instance of a specific device, the devices are enumerated with `.suffix` where `suffix` is a unique string identifier. Each device name can then be passed to: + +* `ov::Core::compile_model` to load the model to a specific device with specific configuration properties. +* `ov::Core::get_property` to get common or device specific properties. +* All other methods of the `ov::Core` class that accept `deviceName`. + +### Working with properties in Your Code + +The `ov::Core` class provides the following method to query device information, set or get different device configuration properties: + +* `ov::Core::get_property` - Gets the current value of a specific property. +* `ov::Core::set_property` - Sets a new value for the property globally for specified `device_name`. + +The `ov::CompiledModel` class is also extended to support the properties: + +* `ov::CompiledModel::get_property` +* `ov::CompiledModel::set_property` + +For documentation about OpenVINO common device-independent properties, refer to `openvino/runtime/properties.hpp`. Device specific configuration keys can be found in corresponding device folders (for example, `openvino/runtime/intel_gpu/properties.hpp`). + +### Working with properties via Core + +#### Getting device properties + +The code below demonstrates how to query `HETERO` device priority of devices which will be used to infer the model: + +@snippet snippets/ov_properties_api.cpp hetero_priorities + +> **NOTE**: All properties have a type, which is specified during property declaration. Based on this, actual type under `auto` is automatically deduced by C++ compiler. + +To extract device properties such as available devices (`ov::available_devices`), device name (`ov::device::full_name`), supported properties (`ov::supported_properties`), and others, use the `ov::Core::get_property` method: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [cpu_device_name] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [cpu_device_name] + +@endsphinxdirective + +A returned value appears as follows: `Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz`. + +> **NOTE**: In order to understand a list of supported properties on `ov::Core` or `ov::CompiledModel` levels, use `ov::supported_properties` which contains a vector of supported property names. Properties which can be changed, has `ov::PropertyName::is_mutable` returning the `true` value. Most of the properites which are changable on ov::Core level, cannot be changed once the model is compiled, so it becomes immutable read-only property. + +#### Configure a work with a model + +`ov::Core` methods like: + +* `ov::Core::compile_model` +* `ov::Core::import_model` +* `ov::Core::query_model` + +accept variadic list of properties as last arguments. Each property in such parameters lists should be used as function call to pass property value with specified property type. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [compile_model_with_property] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [compile_model_with_property] + +@endsphinxdirective + +The example below specifies hints that a model should be compiled to be inferenced with multiple inference requests in parallel to achive best throughput while inference should be performed without accuracy loss with FP32 precision. + +#### Setting properties globally + +`ov::Core::set_property` with a given device name should be used to set global configuration properties which are the same accross multiple `ov::Core::compile_model`, `ov::Core::query_model`, etc. calls, while setting property on the specific `ov::Core::compile_model` call applies properties only for current call: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [core_set_property_then_compile] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [core_set_property_then_compile] + +@endsphinxdirective + +### Properties on CompiledModel level + +#### Getting property + +The `ov::CompiledModel::get_property` method is used to get property values the compiled model has been created with or a compiled model level property such as `ov::optimal_number_of_infer_requests`: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [optimal_number_of_infer_requests] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [optimal_number_of_infer_requests] + +@endsphinxdirective + +Or the current temperature of the `MYRIAD` device: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [device_thermal] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [device_thermal] + +@endsphinxdirective + +Or the number of threads that would be used for inference on `CPU` device: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [inference_num_threads] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [inference_num_threads] + +@endsphinxdirective + +#### Setting properties for compiled model + +The only mode that supports this method is [Multi-Device execution](../multi_device.md): + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_properties_api.cpp + :language: cpp + :fragment: [multi_device] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_properties_api.py + :language: python + :fragment: [multi_device] + +@endsphinxdirective diff --git a/docs/documentation.md b/docs/documentation.md index 731c7d7aa13..ce888b46304 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -93,7 +93,7 @@ This section provides reference documents that guide you through developing your With the [Model Downloader](@ref omz_tools_downloader) and [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) guides, you will learn to download pre-trained models and convert them for use with the OpenVINO™ toolkit. You can provide your own model or choose a public or Intel model from a broad selection provided in the [Open Model Zoo](model_zoo.md). ## Deploying Inference -The [OpenVINO™ Runtime User Guide](OV_Runtime_UG/OpenVINO_Runtime_User_Guide.md) explains the process of creating your own application that runs inference with the OpenVINO™ toolkit. The [API Reference](./api_references.html) defines the Inference Engine API for Python, C++, and C and the nGraph API for Python and C++. The Inference Engine API is what you'll use to create an OpenVINO™ application, while the nGraph API is available for using enhanced operations sets and other features. After writing your application, you can use the [Deployment Manager](install_guides/deployment-manager-tool.md) for deploying to target devices. +The [OpenVINO™ Runtime User Guide](OV_Runtime_UG/openvino_intro.md) explains the process of creating your own application that runs inference with the OpenVINO™ toolkit. The [API Reference](./api_references.html) defines the Inference Engine API for Python, C++, and C and the nGraph API for Python and C++. The Inference Engine API is what you'll use to create an OpenVINO™ application, while the nGraph API is available for using enhanced operations sets and other features. After writing your application, you can use the [Deployment Manager](install_guides/deployment-manager-tool.md) for deploying to target devices. ## Tuning for Performance The toolkit provides a [Performance Optimization Guide](optimization_guide/dldt_optimization_guide.md) and utilities for squeezing the best performance out of your application, including [Accuracy Checker](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool](@ref pot_README), and other tools for measuring accuracy, benchmarking performance, and tuning your application. diff --git a/docs/install_guides/installing-openvino-pip.md b/docs/install_guides/installing-openvino-pip.md index 1db00af2ee2..79606ae18a5 100644 --- a/docs/install_guides/installing-openvino-pip.md +++ b/docs/install_guides/installing-openvino-pip.md @@ -37,5 +37,5 @@ For system requirements and more detailed steps, see -Heterogeneous execution (constituted by the dedicated Inference Engine [“Hetero” plugin](../OV_Runtime_UG/supported_plugins/HETERO.md)) enables to schedule a network inference to the multiple devices. +Heterogeneous execution (constituted by the dedicated Inference Engine [“Hetero” device](../OV_Runtime_UG/hetero_execution.md)) enables to schedule a network inference to the multiple devices. ### Typical Heterogeneous Scenarios of Concern @@ -231,7 +231,7 @@ Similarly, if there are too much subgraphs, the synchronization and data transfe The general affinity “rule of thumb” is to keep computationally-intensive kernels on the accelerator, and "glue" or helper kernels on the CPU. Notice that this includes the granularity considerations. For example, running some custom activation (that comes after every accelerator-equipped convolution) on the CPU might result in performance degradation due to too much data type and/or layout conversions, even though the activation itself can be extremely fast. In this case, it might make sense to consider implementing the kernel for the accelerator (see Optimizing Custom Kernels). The conversions typically manifest themselves as outstanding (comparing to CPU-only execution) 'Reorder' entries (see Internal Inference Performance Counters). -For general details on the heterogeneous plugin, refer to the [corresponding section in the Inference Engine Developer Guide](../OV_Runtime_UG/supported_plugins/HETERO.md). +For general details on the heterogeneous mode, refer to the [Heterogeneous execution guide](../OV_Runtime_UG/hetero_execution.md). ### Trying the Heterogeneous Plugin with Inference Engine Samples @@ -270,7 +270,7 @@ The following tips are provided to give general guidance on optimizing execution ### Analyzing Heterogeneous Execution -There is a dedicated configuration option that enables dumping the visualization of the subgraphs created by the heterogeneous plugin, please see code example in the [HETERO plugin documentation](../OV_Runtime_UG/supported_plugins/HETERO.md) +There is a dedicated configuration option that enables dumping the visualization of the subgraphs created by the heterogeneous mode, please see code example in the [Heterogeneous execution guide](../OV_Runtime_UG/hetero_execution.md) After enabling the configuration key, the heterogeneous plugin generates two files: @@ -279,10 +279,10 @@ After enabling the configuration key, the heterogeneous plugin generates two fil You can use GraphViz\* utility or `.dot` converters (for example, to `.png` or `.pdf`), like xdot\*, available on Linux\* OS with `sudo apt-get install xdot`. -You can also use performance data (in the [Benchmark App](../../samples/cpp/benchmark_app/README.md), it is an option `-pc`) to get performance data on each subgraph. Again, refer to the [HETERO plugin documentation](../OV_Runtime_UG/supported_plugins/HETERO.md) and to Internal Inference Performance Counters for a general counters information. +You can also use performance data (in the [Benchmark App](../../samples/cpp/benchmark_app/README.md), it is an option `-pc`) to get performance data on each subgraph. Again, refer to the [Heterogeneous execution guide](../OV_Runtime_UG/hetero_execution.md) and to Internal Inference Performance Counters for a general counters information. ## Multi-Device Execution -OpenVINO™ toolkit supports automatic multi-device execution, please see [MULTI-Device plugin description](../OV_Runtime_UG/supported_plugins/MULTI.md). +OpenVINO™ toolkit supports automatic multi-device execution, please see [Multi-Device execution](../OV_Runtime_UG/multi_device.md) description. In the next chapter you can find the device-specific tips, while this section covers few recommendations for the multi-device execution: - MULTI usually performs best when the fastest device is specified first in the list of the devices. diff --git a/docs/optimization_guide/dldt_deployment_optimization_guide_additional.md b/docs/optimization_guide/dldt_deployment_optimization_guide_additional.md index 9f685e57468..fd70c080d61 100644 --- a/docs/optimization_guide/dldt_deployment_optimization_guide_additional.md +++ b/docs/optimization_guide/dldt_deployment_optimization_guide_additional.md @@ -26,7 +26,7 @@ usually gives the same best latency as a single request on the single socket, bu $ benchmark_app -m -nstreams 2 ``` Number of NUMA nodes on the machine can be queried via 'lscpu'. -Please see more on the NUMA support in the [Optimization Guide](../OV_Runtime_UG/supported_plugins/MULTI.md). +Please see more on the NUMA support in the [Optimization Guide](../OV_Runtime_UG/multi_device.md). ## Threading diff --git a/docs/snippets/ov_hetero.cpp b/docs/snippets/ov_hetero.cpp index 52874aea2bc..c21a70be639 100644 --- a/docs/snippets/ov_hetero.cpp +++ b/docs/snippets/ov_hetero.cpp @@ -43,9 +43,12 @@ auto compiled_model = core.compile_model(model, device); { //! [configure_fallback_devices] auto compiled_model = core.compile_model(model, "HETERO", - ov::device::priorities("GPU", "CPU"), // GPU with fallback to CPU - ov::device::properties("CPU", ov::enable_profiling(true)), // profiling is enabled only for CPU - ov::device::properties("GPU", ov::hint::inference_precision(ov::element::f16)) // FP16 inference precision only for GPU + // GPU with fallback to CPU + ov::device::priorities("GPU", "CPU"), + // profiling is enabled only for CPU + ov::device::properties("CPU", ov::enable_profiling(true)), + // FP16 inference precision only for GPU + ov::device::properties("GPU", ov::hint::inference_precision(ov::element::f16)) ); //! [configure_fallback_devices] } diff --git a/docs/snippets/ov_hetero.py b/docs/snippets/ov_hetero.py index 4f42847efe0..52874aea2bc 100644 --- a/docs/snippets/ov_hetero.py +++ b/docs/snippets/ov_hetero.py @@ -34,12 +34,10 @@ auto compiled_model = core.compile_model(model, device); //! [compile_model] { auto compiled_model = core.compile_model(model, "HETERO:GPU,CPU"); -} -{ - auto compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU", "CPU")); -} -{ - auto compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU,CPU")); + // or with ov::device::priorities with multiple args + compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU", "CPU")); + // or with ov::device::priorities with a single argument + compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU,CPU")); } //! [compile_model] { diff --git a/docs/snippets/ov_properties_api.cpp b/docs/snippets/ov_properties_api.cpp index 2e3d761cc2b..1d971f52ced 100644 --- a/docs/snippets/ov_properties_api.cpp +++ b/docs/snippets/ov_properties_api.cpp @@ -1,47 +1,67 @@ #include int main() { -//! [part0] +//! [get_available_devices] ov::Core core; -auto available_devices = core.get_available_devices(); -//! [part0] +std::vector available_devices = core.get_available_devices(); +//! [get_available_devices] -//! [part1] +//! [hetero_priorities] auto device_priorites = core.get_property("HETERO", ov::device::priorities); -//! [part1] +//! [hetero_priorities] -//! [part2] -auto cpu_device_name = core.get_property("GPU", ov::device::full_name); -//! [part2] +//! [cpu_device_name] +auto cpu_device_name = core.get_property("CPU", ov::device::full_name); +//! [cpu_device_name] -//! [part3] auto model = core.read_model("sample.xml"); { - auto compiled_model = core.compile_model(model, "CPU", - ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), - ov::hint::inference_precision(ov::element::f32)); +//! [compile_model_with_property] +auto compiled_model = core.compile_model(model, "CPU", + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::inference_precision(ov::element::f32)); +//! [compile_model_with_property] } -//! [part3] -//! [part4] { - auto compiled_model = core.compile_model(model, "CPU"); - auto nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); +//! [optimal_number_of_infer_requests] +auto compiled_model = core.compile_model(model, "CPU"); +auto nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); +//! [optimal_number_of_infer_requests] +} +{ +//! [core_set_property_then_compile] +// set letency hint is a default for CPU +core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); +// compiled with latency configuration hint +auto compiled_model_latency = core.compile_model(model, "CPU"); +// compiled with overriden ov::hint::performance_mode value +auto compiled_model_thrp = core.compile_model(model, "CPU", + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); +//! [core_set_property_then_compile] } -//! [part4] -//! [part5] { - auto compiled_model = core.compile_model(model, "MYRIAD"); - auto temperature = compiled_model.get_property(ov::device::thermal); +//! [device_thermal] +auto compiled_model = core.compile_model(model, "MYRIAD"); +float temperature = compiled_model.get_property(ov::device::thermal); +//! [device_thermal] } -//! [part5] -//! [part6] { - auto compiled_model = core.compile_model(model, "CPU"); - auto nthreads = compiled_model.get_property(ov::inference_num_threads); +//! [inference_num_threads] +auto compiled_model = core.compile_model(model, "CPU"); +auto nthreads = compiled_model.get_property(ov::inference_num_threads); +//! [inference_num_threads] +} + +{ +//! [multi_device] +auto compiled_model = core.compile_model(model, "MULTI", + ov::device::priorities("CPU", "GPU")); +// change the order of priorities +compiled_model.set_property(ov::device::priorities("GPU", "CPU")); +//! [multi_device] } -//! [part6] return 0; } diff --git a/docs/snippets/ov_properties_api.py b/docs/snippets/ov_properties_api.py new file mode 100644 index 00000000000..1d971f52ced --- /dev/null +++ b/docs/snippets/ov_properties_api.py @@ -0,0 +1,67 @@ +#include + +int main() { +//! [get_available_devices] +ov::Core core; +std::vector available_devices = core.get_available_devices(); +//! [get_available_devices] + +//! [hetero_priorities] +auto device_priorites = core.get_property("HETERO", ov::device::priorities); +//! [hetero_priorities] + +//! [cpu_device_name] +auto cpu_device_name = core.get_property("CPU", ov::device::full_name); +//! [cpu_device_name] + +auto model = core.read_model("sample.xml"); +{ +//! [compile_model_with_property] +auto compiled_model = core.compile_model(model, "CPU", + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::inference_precision(ov::element::f32)); +//! [compile_model_with_property] +} + +{ +//! [optimal_number_of_infer_requests] +auto compiled_model = core.compile_model(model, "CPU"); +auto nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); +//! [optimal_number_of_infer_requests] +} +{ +//! [core_set_property_then_compile] +// set letency hint is a default for CPU +core.set_property("CPU", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); +// compiled with latency configuration hint +auto compiled_model_latency = core.compile_model(model, "CPU"); +// compiled with overriden ov::hint::performance_mode value +auto compiled_model_thrp = core.compile_model(model, "CPU", + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); +//! [core_set_property_then_compile] +} + +{ +//! [device_thermal] +auto compiled_model = core.compile_model(model, "MYRIAD"); +float temperature = compiled_model.get_property(ov::device::thermal); +//! [device_thermal] +} + +{ +//! [inference_num_threads] +auto compiled_model = core.compile_model(model, "CPU"); +auto nthreads = compiled_model.get_property(ov::inference_num_threads); +//! [inference_num_threads] +} + +{ +//! [multi_device] +auto compiled_model = core.compile_model(model, "MULTI", + ov::device::priorities("CPU", "GPU")); +// change the order of priorities +compiled_model.set_property(ov::device::priorities("GPU", "CPU")); +//! [multi_device] +} +return 0; +} diff --git a/samples/cpp/benchmark_app/README.md b/samples/cpp/benchmark_app/README.md index 08f7491fbb8..3470480b968 100644 --- a/samples/cpp/benchmark_app/README.md +++ b/samples/cpp/benchmark_app/README.md @@ -56,7 +56,7 @@ Note that the benchmark_app usually produces optimal performance for any device But it is still may be sub-optimal for some cases, especially for very small networks. More details can read in [Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md). -As explained in the [Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md) section, for all devices, including new [MULTI device](../../../docs/OV_Runtime_UG/supported_plugins/MULTI.md) it is preferable to use the FP16 IR for the model. +As explained in the [Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md) section, for all devices, including new [MULTI device](../../../docs/OV_Runtime_UG/multi_device.md) it is preferable to use the FP16 IR for the model. Also if latency of the CPU inference on the multi-socket machines is of concern, please refer to the same [Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md). diff --git a/samples/cpp/hello_query_device/README.md b/samples/cpp/hello_query_device/README.md index a3cd7ae034f..d783f234627 100644 --- a/samples/cpp/hello_query_device/README.md +++ b/samples/cpp/hello_query_device/README.md @@ -1,6 +1,6 @@ # Hello Query Device C++ Sample {#openvino_inference_engine_samples_hello_query_device_README} -This sample demonstrates how to execute an query OpenVINO™ Runtime devices, prints their metrics and default configuration values, using [Properties API](../../../docs/OV_Runtime_UG/PropertiesAPI.md). +This sample demonstrates how to execute an query OpenVINO™ Runtime devices, prints their metrics and default configuration values, using [Properties API](../../../docs/OV_Runtime_UG/supported_plugins/config_properties.md). The following C++ API is used in the application: @@ -13,7 +13,7 @@ Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](.. | Options | Values | | :--- |:--- | Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) | -| Other language realization | [Python](../../../samples/python/hello_query_device/README.md) | +| Other language realization | [Python](../../python/hello_query_device/README.md) | ## How It Works diff --git a/samples/python/hello_query_device/README.md b/samples/python/hello_query_device/README.md index 67087932a69..aa934529df0 100644 --- a/samples/python/hello_query_device/README.md +++ b/samples/python/hello_query_device/README.md @@ -1,6 +1,6 @@ # Hello Query Device Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README} -This sample demonstrates how to show OpenVINO™ Runtime devices and prints their metrics and default configuration values using [Query Device API feature](../../../docs/OV_Runtime_UG/PropertiesAPI.md). +This sample demonstrates how to show OpenVINO™ Runtime devices and prints their metrics and default configuration values using [Query Device API feature](../../../docs/OV_Runtime_UG/supported_plugins/config_properties.md). The following Python API is used in the application: diff --git a/src/inference/include/openvino/runtime/core.hpp b/src/inference/include/openvino/runtime/core.hpp index d22e071eb43..e98928f32a3 100644 --- a/src/inference/include/openvino/runtime/core.hpp +++ b/src/inference/include/openvino/runtime/core.hpp @@ -644,23 +644,23 @@ public: * @brief Creates a new remote shared context object on the specified accelerator device * using specified plugin-specific low-level device API parameters (device handle, pointer, context, etc.). * @param device_name Name of a device to create a new shared context on. - * @param properties Map of device-specific shared context properties. + * @param remote_properties Map of device-specific shared context remote properties. * @return Reference to a created remote context. */ - RemoteContext create_context(const std::string& device_name, const AnyMap& properties); + RemoteContext create_context(const std::string& device_name, const AnyMap& remote_properties); /** * @brief Creates a new shared context object on specified accelerator device * using specified plugin-specific low level device API properties (device handle, pointer, etc.) * @tparam Properties Should be the pack of `std::pair` types * @param device_name Name of a device to create new shared context on. - * @param properties Pack of device-specific shared context properties. + * @param remote_properties Pack of device-specific shared context remote properties. * @return A shared pointer to a created remote context. */ template util::EnableIfAllStringAny create_context(const std::string& device_name, - Properties&&... properties) { - return create_context(device_name, AnyMap{std::forward(properties)...}); + Properties&&... remote_properties) { + return create_context(device_name, AnyMap{std::forward(remote_properties)...}); } /** diff --git a/src/inference/include/openvino/runtime/intel_gna/properties.hpp b/src/inference/include/openvino/runtime/intel_gna/properties.hpp index 3e67440064a..fc7d5d3599f 100644 --- a/src/inference/include/openvino/runtime/intel_gna/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_gna/properties.hpp @@ -13,6 +13,10 @@ #include "openvino/runtime/properties.hpp" namespace ov { + +/** + * @brief Namespace with Intel GNA specific properties + */ namespace intel_gna { /** diff --git a/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp b/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp index 6faff0955f8..3923fb2d81c 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/ocl/ocl.hpp @@ -21,6 +21,10 @@ namespace ov { namespace intel_gpu { + +/** + * @brief Namespace with Intel GPU OpenCL specific remote objects + */ namespace ocl { /** diff --git a/src/inference/include/openvino/runtime/intel_gpu/properties.hpp b/src/inference/include/openvino/runtime/intel_gpu/properties.hpp index 3604f9f3eb5..a52081e1ad6 100644 --- a/src/inference/include/openvino/runtime/intel_gpu/properties.hpp +++ b/src/inference/include/openvino/runtime/intel_gpu/properties.hpp @@ -13,6 +13,10 @@ #include "openvino/runtime/properties.hpp" namespace ov { + +/** + * @brief Namespace with Intel GPU specific properties + */ namespace intel_gpu { /** diff --git a/src/inference/include/openvino/runtime/intel_myriad/hddl_properties.hpp b/src/inference/include/openvino/runtime/intel_myriad/hddl_properties.hpp index 73964c8a120..f24d71a73d3 100644 --- a/src/inference/include/openvino/runtime/intel_myriad/hddl_properties.hpp +++ b/src/inference/include/openvino/runtime/intel_myriad/hddl_properties.hpp @@ -8,8 +8,14 @@ namespace ov { namespace intel_myriad { + +/** + * @brief Namespace with Intel HDDL specific properties + */ namespace hddl { + // RO properties + /** * @brief Property to get a int of the device number */ @@ -169,6 +175,7 @@ static constexpr Property use_sgad{"HDDL_USE_SGAD" * Each device has their own group id. Device in one group shares same group id. */ static constexpr Property group_device{"HDDL_GROUP_DEVICE"}; + } // namespace hddl } // namespace intel_myriad -}; // namespace ov +} // namespace ov diff --git a/src/inference/include/openvino/runtime/intel_myriad/myriad_properties.hpp b/src/inference/include/openvino/runtime/intel_myriad/myriad_properties.hpp index 0d9adc71956..db4b4403256 100644 --- a/src/inference/include/openvino/runtime/intel_myriad/myriad_properties.hpp +++ b/src/inference/include/openvino/runtime/intel_myriad/myriad_properties.hpp @@ -8,7 +8,12 @@ #include namespace ov { + +/** + * @brief Namespace with Intel MYRIAD specific properties + */ namespace intel_myriad { + /** * @brief Turn on HW stages usage (applicable for MyriadX devices only). */ @@ -126,4 +131,4 @@ inline std::istream& operator>>(std::istream& is, DDRType& ddrType) { static constexpr Property ddr_type{"MYRIAD_DDR_TYPE"}; } // namespace intel_myriad -}; // namespace ov +} // namespace ov diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index a4e67f2595a..b3935c0f96e 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -220,6 +220,9 @@ static constexpr Property model_name{"NETWO static constexpr Property optimal_number_of_infer_requests{ "OPTIMAL_NUMBER_OF_INFER_REQUESTS"}; +/** + * @brief Namespace with hint properties + */ namespace hint { /** @@ -345,6 +348,9 @@ static constexpr Property allow_auto_batching{"ALL */ static constexpr Property enable_profiling{"PERF_COUNT"}; +/** + * @brief Namespace with log level property and its possible values + */ namespace log { /** @@ -468,6 +474,9 @@ static constexpr Property max_batch_size{"MAX_ static constexpr Property, PropertyMutability::RO> range_for_async_infer_requests{"RANGE_FOR_ASYNC_INFER_REQUESTS"}; +/** + * @brief Namespace with device properties + */ namespace device { /** @@ -616,6 +625,10 @@ static constexpr Property thermal{"DEVICE_THERMAL * @brief Read-only property to get a std::vector of capabilities options per device. */ static constexpr Property, PropertyMutability::RO> capabilities{"OPTIMIZATION_CAPABILITIES"}; + +/** + * @brief Namespace with possible values for ov::device::capabilities property + */ namespace capability { constexpr static const auto FP32 = "FP32"; //!< Device supports fp32 inference constexpr static const auto BF16 = "BF16"; //!< Device supports bf16 inference @@ -626,8 +639,12 @@ constexpr static const auto BIN = "BIN"; //!< Device suppor constexpr static const auto WINOGRAD = "WINOGRAD"; //!< Device supports winograd optimization constexpr static const auto EXPORT_IMPORT = "EXPORT_IMPORT"; //!< Device supports compiled model export and import } // namespace capability + } // namespace device +/** + * @brief Namespace for streams in streams executor + */ namespace streams { /** * @brief Class to represent number of streams in streams executor diff --git a/tools/pot/openvino/tools/pot/api/samples/speech/README.md b/tools/pot/openvino/tools/pot/api/samples/speech/README.md index d25bc863b98..d149cedcf41 100644 --- a/tools/pot/openvino/tools/pot/api/samples/speech/README.md +++ b/tools/pot/openvino/tools/pot/api/samples/speech/README.md @@ -1,6 +1,6 @@ # API usage sample for speech task on GNA {#pot_sample_speech_README} -This sample demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a speech model for [GNA](@ref openvino_docs_IE_DG_supported_plugins_GNA) device. +This sample demonstrates the use of the [Post-training Optimization Tool API](@ref pot_compression_api_README) for the task of quantizing a speech model for [GNA](@ref openvino_docs_OV_UG_supported_plugins_GNA) device. Quantization for GNA is different from CPU quantization due to device specific: GNA supports quantized inputs in INT16 and INT32 (for activations) precision and quantized weights in INT8 and INT16 precision. This sample contains pre-selected quantization options based on the DefaultQuantization algorithm and created for models from [Kaldi](http://kaldi-asr.org/doc/) framework, and its data format. From a3004e7d8098d413e97ba847c360741b31f6e019 Mon Sep 17 00:00:00 2001 From: Alexey Lebedev Date: Tue, 22 Feb 2022 14:48:55 +0300 Subject: [PATCH 064/310] [PYTHON API] reshape helper (#10402) * Add reshape helper * add dimension(range) * Add partial_shape helper * Fix code style * fix comments * Split reshape on several overloads * Fix code style * correct exception * remove range support * fix code style * Add exception * Dimension from str, PartialShape from str, reshape(str) support * Apply review comments * Add default init for shape * Add PS syntax examples * Remove pshape parsing from benchmark_app * Update src/bindings/python/src/pyopenvino/graph/model.cpp Co-authored-by: Sergey Lyalin * Update src/bindings/python/src/pyopenvino/graph/model.cpp Co-authored-by: Sergey Lyalin * Apply suggestions from code review Co-authored-by: Sergey Lyalin Co-authored-by: Sergey Lyalin --- .../python/src/pyopenvino/core/common.cpp | 104 ++++++++++++++++ .../python/src/pyopenvino/core/common.hpp | 6 + .../python/src/pyopenvino/graph/dimension.cpp | 5 + .../python/src/pyopenvino/graph/model.cpp | 111 ++++++++++++++---- .../src/pyopenvino/graph/partial_shape.cpp | 17 +-- .../python/src/pyopenvino/graph/shape.cpp | 1 + .../test_inference_engine/test_function.py | 73 ++++++++++++ .../python/tests/test_ngraph/test_core.py | 62 ++++++++++ .../openvino/tools/benchmark/utils/utils.py | 49 +------- 9 files changed, 354 insertions(+), 74 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/core/common.cpp b/src/bindings/python/src/pyopenvino/core/common.cpp index 33855935143..3ff7a32d465 100644 --- a/src/bindings/python/src/pyopenvino/core/common.cpp +++ b/src/bindings/python/src/pyopenvino/core/common.cpp @@ -6,6 +6,8 @@ #include +#include "openvino/util/common_util.hpp" + #define C_CONTIGUOUS py::detail::npy_api::constants::NPY_ARRAY_C_CONTIGUOUS_ namespace Common { @@ -88,6 +90,108 @@ ov::Tensor tensor_from_numpy(py::array& array, bool shared_memory) { return tensor; } +ov::PartialShape partial_shape_from_list(const py::list& shape) { + using value_type = ov::Dimension::value_type; + ov::PartialShape pshape; + for (py::handle dim : shape) { + if (py::isinstance(dim)) { + pshape.insert(pshape.end(), ov::Dimension(dim.cast())); + } else if (py::isinstance(dim)) { + pshape.insert(pshape.end(), Common::dimension_from_str(dim.cast())); + } else if (py::isinstance(dim)) { + pshape.insert(pshape.end(), dim.cast()); + } else if (py::isinstance(dim) || py::isinstance(dim)) { + py::list bounded_dim = dim.cast(); + if (bounded_dim.size() != 2) { + throw py::type_error("Two elements are expected in tuple(lower, upper) for dynamic dimension, but " + + std::to_string(bounded_dim.size()) + " elements were given."); + } + if (!(py::isinstance(bounded_dim[0]) && py::isinstance(bounded_dim[1]))) { + throw py::type_error("Incorrect pair of types (" + std::string(bounded_dim[0].get_type().str()) + ", " + + std::string(bounded_dim[1].get_type().str()) + + ") for dynamic dimension, ints are expected."); + } + pshape.insert(pshape.end(), + ov::Dimension(bounded_dim[0].cast(), bounded_dim[1].cast())); + } else { + throw py::type_error("Incorrect type " + std::string(dim.get_type().str()) + + " for dimension. Expected types are: " + "int, str, openvino.runtime.Dimension, list/tuple with lower and upper values for " + "dynamic dimension."); + } + } + return pshape; +} + +bool check_all_digits(const std::string& value) { + auto val = ov::util::trim(value); + for (const auto& c : val) { + if (!std::isdigit(c) || c == '-') { + return false; + } + } + return true; +} + +template +T stringToType(const std::string& valStr) { + T ret{0}; + std::istringstream ss(valStr); + if (!ss.eof()) { + ss >> ret; + } + return ret; +} + +ov::Dimension dimension_from_str(const std::string& value) { + using value_type = ov::Dimension::value_type; + auto val = ov::util::trim(value); + if (val == "?" || val == "-1") { + return {-1}; + } + if (val.find("..") == std::string::npos) { + OPENVINO_ASSERT(Common::check_all_digits(val), "Cannot parse dimension: \"", val, "\""); + return {Common::stringToType(val)}; + } + + std::string min_value_str = val.substr(0, val.find("..")); + OPENVINO_ASSERT(Common::check_all_digits(min_value_str), "Cannot parse min bound: \"", min_value_str, "\""); + + value_type min_value; + if (min_value_str.empty()) { + min_value = 0; + } else { + min_value = Common::stringToType(min_value_str); + } + + std::string max_value_str = val.substr(val.find("..") + 2); + value_type max_value; + if (max_value_str.empty()) { + max_value = -1; + } else { + max_value = Common::stringToType(max_value_str); + } + + OPENVINO_ASSERT(Common::check_all_digits(max_value_str), "Cannot parse max bound: \"", max_value_str, "\""); + + return {min_value, max_value}; +} + +ov::PartialShape partial_shape_from_str(const std::string& value) { + auto val = ov::util::trim(value); + if (val == "...") { + return ov::PartialShape::dynamic(); + } + ov::PartialShape res; + std::stringstream ss(val); + std::string field; + while (getline(ss, field, ',')) { + OPENVINO_ASSERT(!field.empty(), "Cannot get vector of dimensions! \"", val, "\" is incorrect"); + res.insert(res.end(), Common::dimension_from_str(field)); + } + return res; +} + py::array as_contiguous(py::array& array, ov::element::Type type) { switch (type) { // floating diff --git a/src/bindings/python/src/pyopenvino/core/common.hpp b/src/bindings/python/src/pyopenvino/core/common.hpp index 12108096116..8a3199643b2 100644 --- a/src/bindings/python/src/pyopenvino/core/common.hpp +++ b/src/bindings/python/src/pyopenvino/core/common.hpp @@ -33,6 +33,12 @@ ov::Tensor tensor_from_pointer(py::array& array, const ov::Shape& shape); ov::Tensor tensor_from_numpy(py::array& array, bool shared_memory); +ov::PartialShape partial_shape_from_list(const py::list& shape); + +ov::PartialShape partial_shape_from_str(const std::string& value); + +ov::Dimension dimension_from_str(const std::string& value); + py::array as_contiguous(py::array& array, ov::element::Type type); const ov::Tensor& cast_to_tensor(const py::handle& tensor); diff --git a/src/bindings/python/src/pyopenvino/graph/dimension.cpp b/src/bindings/python/src/pyopenvino/graph/dimension.cpp index bec1643aee6..4475abdb4a5 100644 --- a/src/bindings/python/src/pyopenvino/graph/dimension.cpp +++ b/src/bindings/python/src/pyopenvino/graph/dimension.cpp @@ -11,6 +11,7 @@ #include #include +#include "pyopenvino/core/common.hpp" #include "pyopenvino/graph/dimension.hpp" namespace py = pybind11; @@ -41,6 +42,10 @@ void regclass_graph_Dimension(py::module m) { :type max_dimension: int )"); + dim.def(py::init([](const std::string& value) { + return Common::dimension_from_str(value); + })); + dim.def_static("dynamic", &ov::Dimension::dynamic); dim.def_property_readonly("is_dynamic", diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp index 3d4e09b39bb..d4a17bd0686 100644 --- a/src/bindings/python/src/pyopenvino/graph/model.cpp +++ b/src/bindings/python/src/pyopenvino/graph/model.cpp @@ -264,49 +264,114 @@ void regclass_graph_Model(py::module m) { [](ov::Model& self, const ov::PartialShape& partial_shape) { self.reshape(partial_shape); }, - py::arg("partial_shapes"), + py::arg("partial_shape"), R"( - :param partial_shapes: Index of Output. - :type partial_shapes: PartialShape + :param partial_shape: New shape. + :type partial_shape: PartialShape :return : void )"); function.def( "reshape", - [](ov::Model& self, const std::map& partial_shapes) { - self.reshape(partial_shapes); + [](ov::Model& self, const py::list& partial_shape) { + self.reshape(Common::partial_shape_from_list(partial_shape)); }, - py::arg("partial_shapes"), + py::arg("partial_shape"), R"( - - :param partial_shapes: Index of Output. - :type partial_shapes: Dict[int, PartialShape] - :return: void + :param partial_shape: New shape. + :type partial_shape: list + :return : void )"); function.def( "reshape", - [](ov::Model& self, const std::map& partial_shapes) { - self.reshape(partial_shapes); + [](ov::Model& self, const py::tuple& partial_shape) { + self.reshape(Common::partial_shape_from_list(partial_shape.cast())); }, - py::arg("partial_shapes"), + py::arg("partial_shape"), R"( - :param partial_shapes: Index of Output. - :type partial_shapes: Dict[string, PartialShape] - :return: void + :param partial_shape: New shape. + :type partial_shape: tuple + :return : void )"); function.def( "reshape", - [](ov::Model& self, const std::map, ov::PartialShape>& partial_shapes) { - self.reshape(partial_shapes); + [](ov::Model& self, const std::string& partial_shape) { + self.reshape(Common::partial_shape_from_str(partial_shape)); + }, + py::arg("partial_shape"), + R"( + :param partial_shape: New shape. + :type partial_shape: str + :return : void + )"); + + function.def( + "reshape", + [](ov::Model& self, const py::dict& partial_shapes) { + std::map, ov::PartialShape> new_shapes; + for (const auto& item : partial_shapes) { + std::pair, ov::PartialShape> new_shape; + // check keys + if (py::isinstance(item.first)) { + new_shape.first = self.input(item.first.cast()); + } else if (py::isinstance(item.first)) { + new_shape.first = self.input(item.first.cast()); + } else if (py::isinstance>(item.first)) { + new_shape.first = item.first.cast>(); + } else { + throw py::type_error("Incorrect key type " + std::string(item.first.get_type().str()) + + " to reshape a model, expected keys as openvino.runtime.Output, int or str."); + } + // check values + if (py::isinstance(item.second)) { + new_shape.second = item.second.cast(); + } else if (py::isinstance(item.second) || py::isinstance(item.second)) { + new_shape.second = Common::partial_shape_from_list(item.second.cast()); + } else if (py::isinstance(item.second)) { + new_shape.second = Common::partial_shape_from_str(item.second.cast()); + } else { + throw py::type_error( + "Incorrect value type " + std::string(item.second.get_type().str()) + + " to reshape a model, expected values as openvino.runtime.PartialShape, str, list or tuple."); + } + new_shapes.insert(new_shape); + } + self.reshape(new_shapes); }, py::arg("partial_shapes"), - R"( - :param partial_shapes: Index of Output. - :type partial_shapes: Dict[Output, PartialShape] - :return: void - )"); + R"( Reshape model inputs. + + The allowed types of keys in the `partial_shapes` dictionary are: + + (1) `int`, input index + (2) `str`, input tensor name + (3) `openvino.runtime.Output` + + The allowed types of values in the `partial_shapes` are: + + (1) `openvino.runtime.PartialShape` + (2) `list` consisting of dimensions + (3) `tuple` consisting of dimensions + (4) `str`, string representation of `openvino.runtime.PartialShape` + + When list or tuple are used to describe dimensions, each dimension can be written in form: + + (1) non-negative `int` which means static value for the dimension + (2) `[min, max]`, dynamic dimension where `min` specifies lower bound and `max` specifies upper bound; the range includes both `min` and `max`; using `-1` for `min` or `max` means no known bound + (3) `(min, max)`, the same as above + (4) `-1` is a dynamic dimension without known bounds + (4) `openvino.runtime.Dimension` + (5) `str` using next syntax: + '?' - to define fully dinamic dimension + '1' - to define dimension which length is 1 + '1..10' - to define bounded dimension + '..10' or '1..' to define dimension with only lower or only upper limit + + :param partial_shapes: New shapes. + :type partial_shapes: Dict[keys, values] + )"); function.def("get_output_size", &ov::Model::get_output_size, diff --git a/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp b/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp index 1f6a1c2499c..dfc5595f7fe 100644 --- a/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp +++ b/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp @@ -13,6 +13,7 @@ #include "openvino/core/dimension.hpp" // ov::Dimension #include "openvino/core/shape.hpp" // ov::Shape +#include "pyopenvino/core/common.hpp" #include "pyopenvino/graph/partial_shape.hpp" namespace py = pybind11; @@ -23,15 +24,17 @@ void regclass_graph_PartialShape(py::module m) { py::class_> shape(m, "PartialShape"); shape.doc() = "openvino.runtime.PartialShape wraps ov::PartialShape"; - shape.def(py::init([](const std::vector& dimensions) { - return ov::PartialShape(std::vector(dimensions.begin(), dimensions.end())); - })); - shape.def(py::init&>()); - shape.def(py::init&>()); - shape.def(py::init&>()); - shape.def(py::init&>()); shape.def(py::init()); shape.def(py::init()); + shape.def(py::init([](py::list& shape) { + return Common::partial_shape_from_list(shape); + })); + shape.def(py::init([](py::tuple& shape) { + return Common::partial_shape_from_list(shape.cast()); + })); + shape.def(py::init([](const std::string& shape) { + return Common::partial_shape_from_str(shape); + })); shape.def_static("dynamic", &ov::PartialShape::dynamic, py::arg("rank") = ov::Dimension()); diff --git a/src/bindings/python/src/pyopenvino/graph/shape.cpp b/src/bindings/python/src/pyopenvino/graph/shape.cpp index 975aad6f84c..1f306ac2649 100644 --- a/src/bindings/python/src/pyopenvino/graph/shape.cpp +++ b/src/bindings/python/src/pyopenvino/graph/shape.cpp @@ -19,6 +19,7 @@ namespace py = pybind11; void regclass_graph_Shape(py::module m) { py::class_> shape(m, "Shape"); shape.doc() = "openvino.runtime.Shape wraps ov::Shape"; + shape.def(py::init<>()); shape.def(py::init&>(), py::arg("axis_lengths")); shape.def(py::init&>(), py::arg("axis_lengths")); shape.def(py::init(), py::arg("axis_lengths")); diff --git a/src/bindings/python/tests/test_inference_engine/test_function.py b/src/bindings/python/tests/test_inference_engine/test_function.py index adaa2da2302..9863474ad6b 100644 --- a/src/bindings/python/tests/test_inference_engine/test_function.py +++ b/src/bindings/python/tests/test_inference_engine/test_function.py @@ -361,3 +361,76 @@ def test_reshape(device): core = Core() compiled = core.compile_model(model, device) assert compiled.input().partial_shape == ref_shape + + +def test_reshape_with_python_types(device): + model = create_test_model() + + def check_shape(new_shape): + for input in model.inputs: + assert input.partial_shape == new_shape + + shape1 = [1, 4] + new_shapes = {input: shape1 for input in model.inputs} + model.reshape(new_shapes) + check_shape(PartialShape(shape1)) + + shape2 = [1, 6] + new_shapes = {input.any_name: shape2 for input in model.inputs} + model.reshape(new_shapes) + check_shape(PartialShape(shape2)) + + shape3 = [1, 8] + new_shapes = {i: shape3 for i, input in enumerate(model.inputs)} + model.reshape(new_shapes) + check_shape(PartialShape(shape3)) + + shape4 = [1, -1] + new_shapes = {input: shape4 for input in model.inputs} + model.reshape(new_shapes) + check_shape(PartialShape([Dimension(1), Dimension(-1)])) + + shape5 = [1, (1, 10)] + new_shapes = {input: shape5 for input in model.inputs} + model.reshape(new_shapes) + check_shape(PartialShape([Dimension(1), Dimension(1, 10)])) + + shape6 = [Dimension(3), Dimension(3, 10)] + new_shapes = {input: shape6 for input in model.inputs} + model.reshape(new_shapes) + check_shape(PartialShape(shape6)) + + shape7 = "1..10, ?" + new_shapes = {input: shape7 for input in model.inputs} + model.reshape(new_shapes) + check_shape(PartialShape(shape7)) + + # reshape mixed keys + shape8 = [(1, 20), -1] + new_shapes = {"data1": shape8, 1: shape8} + model.reshape(new_shapes) + check_shape(PartialShape([Dimension(1, 20), Dimension(-1)])) + + # reshape with one input + param = ops.parameter([1, 3, 28, 28]) + model = Model(ops.relu(param), [param]) + + shape9 = [-1, 3, (28, 56), (28, 56)] + model.reshape(shape9) + check_shape(PartialShape([Dimension(-1), Dimension(3), Dimension(28, 56), Dimension(28, 56)])) + + shape10 = "?,3,..224,..224" + model.reshape(shape10) + check_shape(PartialShape([Dimension(-1), Dimension(3), Dimension(-1, 224), Dimension(-1, 224)])) + + # check exceptions + shape10 = [1, 1, 1, 1] + with pytest.raises(TypeError) as e: + model.reshape({model.input().node: shape10}) + assert "Incorrect key type to reshape a model, " \ + "expected keys as openvino.runtime.Output, int or str." in str(e.value) + + with pytest.raises(TypeError) as e: + model.reshape({0: range(1, 9)}) + assert "Incorrect value type to reshape a model, " \ + "expected values as openvino.runtime.PartialShape, str, list or tuple." in str(e.value) diff --git a/src/bindings/python/tests/test_ngraph/test_core.py b/src/bindings/python/tests/test_ngraph/test_core.py index a8919e8900e..f505afe756b 100644 --- a/src/bindings/python/tests/test_ngraph/test_core.py +++ b/src/bindings/python/tests/test_ngraph/test_core.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import numpy as np +import pytest import openvino.runtime.opset8 as ov from openvino.runtime import Dimension, Model, PartialShape, Shape @@ -76,6 +77,33 @@ def test_dimension_comparisons(): assert not d2.compatible(d1) assert not d2.same_scheme(d1) + d = Dimension("?") + assert d == Dimension() + + d = Dimension("1") + assert d == Dimension(1) + + d = Dimension("..10") + assert d == Dimension(-1, 10) + + d = Dimension("10..") + assert d == Dimension(10, -1) + + d = Dimension("5..10") + assert d == Dimension(5, 10) + + with pytest.raises(RuntimeError) as e: + d = Dimension("C") + assert 'Cannot parse dimension: "C"' in str(e.value) + + with pytest.raises(RuntimeError) as e: + d = Dimension("?..5") + assert 'Cannot parse min bound: "?"' in str(e.value) + + with pytest.raises(RuntimeError) as e: + d = Dimension("5..?") + assert 'Cannot parse max bound: "?"' in str(e.value) + def test_partial_shape(): ps = PartialShape([1, 2, 3, 4]) @@ -140,6 +168,40 @@ def test_partial_shape(): assert list(ps.get_max_shape())[0] > 1000000000 assert repr(ps) == "" + shape_list = [(1, 10), [2, 5], 4, Dimension(2), "..10"] + ref_ps = PartialShape([Dimension(1, 10), Dimension(2, 5), Dimension(4), Dimension(2), Dimension(-1, 10)]) + assert PartialShape(shape_list) == ref_ps + assert PartialShape(tuple(shape_list)) == ref_ps + + with pytest.raises(TypeError) as e: + PartialShape([(1, 2, 3)]) + assert "Two elements are expected in tuple(lower, upper) " \ + "for dynamic dimension, but 3 elements were given." in str(e.value) + + with pytest.raises(TypeError) as e: + PartialShape([("?", "?")]) + assert "Incorrect pair of types (, ) " \ + "for dynamic dimension, ints are expected." in str(e.value) + + with pytest.raises(TypeError) as e: + PartialShape([range(10)]) + assert "Incorrect type for dimension. Expected types are: " \ + "int, str, openvino.runtime.Dimension, list/tuple with lower " \ + "and upper values for dynamic dimension." in str(e.value) + + ps = PartialShape("...") + assert ps == PartialShape.dynamic() + + ps = PartialShape("?, 3, ..224, 28..224") + assert ps == PartialShape([Dimension(-1), Dimension(3), Dimension(-1, 224), Dimension(28, 224)]) + + with pytest.raises(RuntimeError) as e: + ps = PartialShape("?,,3") + assert 'Cannot get vector of dimensions! "?,,3" is incorrect' in str(e.value) + + shape = Shape() + assert len(shape) == 0 + def test_partial_shape_compatible(): ps1 = PartialShape.dynamic() diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py index ce39d5d49f4..d59fe2c11e1 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py @@ -375,9 +375,9 @@ def get_data_shapes_map(data_shape_string, input_names): input_name = match[:match.find('[')] shapes = re.findall(r'\[(.*?)\]', match[len(input_name):]) if input_name: - return_value[input_name] = list(parse_partial_shape(shape_str) for shape_str in shapes) + return_value[input_name] = list(PartialShape(shape_str) for shape_str in shapes) else: - data_shapes = list(parse_partial_shape(shape_str) for shape_str in shapes) + data_shapes = list(PartialShape(shape_str) for shape_str in shapes) num_inputs, num_shapes = len(input_names), len(data_shapes) if num_shapes != 1 and num_shapes % num_inputs != 0: raise Exception(f"Number of provided data_shapes is not a multiple of the number of model inputs!") @@ -505,52 +505,13 @@ class AppInputInfo: return self.partial_shape.is_dynamic -def parse_partial_shape(shape_str): - dims = [] - for dim in shape_str.split(','): - if '.. ' in dim: - range = list(int(d) for d in dim.split('..')) - assert len(range) == 2 - dims.append(Dimension(range)) - elif dim == '?': - dims.append(Dimension()) - else: - dims.append(Dimension(int(dim))) - return PartialShape(dims) - - -def parse_batch_size(batch_size_str): - if batch_size_str: - error_message = f"Can't parse batch size '{batch_size_str}'" - dims = batch_size_str.split("..") - if len(dims) > 2: - raise Exception(error_message) - elif len(dims) == 2: - range = [] - for d in dims: - if d.isnumeric(): - range.append(int(d)) - else: - raise Exception(error_message) - return Dimension(*range) - else: - if dims[0].lstrip("-").isnumeric(): - return Dimension(int(dims[0])) - elif dims[0] == "?": - return Dimension() - else: - raise Exception(error_message) - else: - return Dimension(0) - - def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size, scale_string, mean_string, inputs): input_names = get_input_output_names(inputs) input_node_names = get_node_names(inputs) shape_map = parse_input_parameters(shape_string, input_names) data_shape_map = get_data_shapes_map(data_shape_string, input_names) layout_map = parse_input_parameters(layout_string, input_names) - batch_size = parse_batch_size(batch_size) + batch_size = Dimension(batch_size) reshape = False batch_found = False input_info = [] @@ -565,10 +526,10 @@ def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size, # Shape info.original_shape = inputs[i].partial_shape if info.name in shape_map: - info.partial_shape = parse_partial_shape(shape_map[info.name]) + info.partial_shape = PartialShape(shape_map[info.name]) reshape = True elif info.node_name in shape_map: - info.partial_shape = parse_partial_shape(shape_map[info.node_name]) + info.partial_shape = PartialShape(shape_map[info.node_name]) reshape = True else: info.partial_shape = inputs[i].partial_shape From c80a872f73e1dad695c007c9dfc8013191690009 Mon Sep 17 00:00:00 2001 From: Anton Romanov Date: Tue, 22 Feb 2022 14:49:35 +0300 Subject: [PATCH 065/310] Fix Coverity in samples (#10583) * Fix coverity samples * Fixed coverity issue in speech sample --- .../c/common/opencv_c_wrapper/bmp_reader.c | 10 ++++---- samples/cpp/speech_sample/main.cpp | 25 +++++++++++-------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/samples/c/common/opencv_c_wrapper/bmp_reader.c b/samples/c/common/opencv_c_wrapper/bmp_reader.c index 848415d204a..40fd5fc4bd4 100644 --- a/samples/c/common/opencv_c_wrapper/bmp_reader.c +++ b/samples/c/common/opencv_c_wrapper/bmp_reader.c @@ -4,11 +4,11 @@ #include #include -#define CLEANUP_AND_RETURN(x) \ - if (x && !image && !image->data) \ - free(image->data); \ - if (input != NULL) \ - fclose(input); \ +#define CLEANUP_AND_RETURN(x) \ + if (0 != x && NULL != image && NULL != image->data) \ + free(image->data); \ + if (input != NULL) \ + fclose(input); \ return x; int readBmpImage(const char* fileName, BitMap* image) { diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index fd484be240e..93c13d53bc6 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -178,19 +178,22 @@ int main(int argc, char* argv[]) { } if (FLAGS_q.compare("user") == 0) { if (!FLAGS_rg.empty()) { - slog::warn << "Custom scale factor will be used for imported gna model: " << FLAGS_rg << slog::endl; - } - auto scale_factors_per_input = parse_scale_factors(model->inputs(), FLAGS_sf); - if (numInputFiles != scale_factors_per_input.size()) { - std::string errMessage( - "Incorrect command line for multiple inputs: " + std::to_string(scale_factors_per_input.size()) + - " scale factors provided for " + std::to_string(numInputFiles) + " input files."); + std::string errMessage("Custom scale factor can not be set for imported gna model: " + FLAGS_rg); throw std::logic_error(errMessage); + } else { + auto scale_factors_per_input = parse_scale_factors(model->inputs(), FLAGS_sf); + if (numInputFiles != scale_factors_per_input.size()) { + std::string errMessage("Incorrect command line for multiple inputs: " + + std::to_string(scale_factors_per_input.size()) + + " scale factors provided for " + std::to_string(numInputFiles) + + " input files."); + throw std::logic_error(errMessage); + } + for (auto&& sf : scale_factors_per_input) { + slog::info << "For input " << sf.first << " using scale factor of " << sf.second << slog::endl; + } + gnaPluginConfig[ov::intel_gna::scale_factors_per_input.name()] = scale_factors_per_input; } - for (auto&& sf : scale_factors_per_input) { - slog::info << "For input " << sf.first << " using scale factor of " << sf.second << slog::endl; - } - gnaPluginConfig[ov::intel_gna::scale_factors_per_input.name()] = scale_factors_per_input; } else { // "static" quantization with calculated scale factor if (!FLAGS_rg.empty()) { From 6dc8b8b047c2bdcbc38222cca07cf36f4c5dd7e5 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Tue, 22 Feb 2022 14:50:37 +0300 Subject: [PATCH 066/310] add note (#10566) --- docs/install_guides/deployment-manager-tool.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/install_guides/deployment-manager-tool.md b/docs/install_guides/deployment-manager-tool.md index 6963b7f35c6..e64b8eb9aed 100644 --- a/docs/install_guides/deployment-manager-tool.md +++ b/docs/install_guides/deployment-manager-tool.md @@ -18,6 +18,8 @@ The Deployment Manager is a Python* command-line tool that creates a deployment > **IMPORTANT**: The operating system on the target system must be the same as the development system on which you are creating the package. For example, if the target system is Ubuntu 18.04, the deployment package must be created from the OpenVINO™ toolkit installed on Ubuntu 18.04. +> **TIP**: If your application requires additional dependencies, including the Microsoft Visual C++ Redistributable, use the ['--user_data' option](https://docs.openvino.ai/latest/openvino_docs_install_guides_deployment_manager_tool.html#run-standard-cli-mode) to add them to the deployment archive. Install these dependencies on the target host before running inference. + ## Create Deployment Package Using Deployment Manager There are two ways to create a deployment package that includes inference-related components of the OpenVINO™ toolkit: you can run the Deployment Manager tool in either interactive or standard CLI mode. From 51ef9383853b6e7416eb84f09b2a27f310c051fa Mon Sep 17 00:00:00 2001 From: Tingqian Li Date: Tue, 22 Feb 2022 20:23:20 +0800 Subject: [PATCH 067/310] [CPU] fix crash in resnet binary model (#9761) --- src/plugins/intel_cpu/src/nodes/input.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 1425e70f77d..bcbe95ddf91 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -254,7 +254,17 @@ void MKLDNNInputNode::cloneBlobIfRequired() { auto cloneBlob = [&, this] () { MKLDNNMemory memory{ getEngine() }; - memory.Create(memDesc, constOp->get_data_ptr()); + + // CVS-74980 + // MKLDNN/oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...) + // but ngraph Constant uses actual bitWidth for data storage allocation + // in that case we make a copy to avoid overflow + if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) { + memory.Create(memDesc, constOp->get_data_ptr()); + } else { + memory.Create(memDesc); + memcpy(memory.GetPtr(), constOp->get_data_ptr(), constOp->get_byte_size()); + } MKLDNNMemoryPtr ptr = MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())); ptr->Create(memDesc); From 850f93f21b67693b5f0e116c2984ff3a9d258de5 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 22 Feb 2022 15:42:26 +0300 Subject: [PATCH 068/310] [CPU] INT8 tests for convolution sum fusing (#10359) * int8 tests * Sum second term port selection fix * Fix after rebase --- src/plugins/intel_cpu/src/graph_optimizer.cpp | 3 +- .../subgraph_tests/src/conv_sum_broadcast.cpp | 135 +++++++++++++++--- 2 files changed, 119 insertions(+), 19 deletions(-) diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 827692487b1..ef87b76b9f3 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -1238,7 +1238,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG // Merged with DW_conv. Shape may change mergedConv->inputShapes.push_back(mergedConv->fusedWith[0]->getOutputShapeAtPort(0)); } else { - mergedConv->inputShapes.push_back(sum->getInputShapeAtPort(1)); + size_t secondTermPort = sum->getFusingPort() == 0 ? 1 : 0; + mergedConv->inputShapes.push_back(sum->getInputShapeAtPort(secondTermPort)); } size_t childIdx = 0lu; diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp index 33c54170556..41a6ffc29d4 100644 --- a/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/conv_sum_broadcast.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "test_utils/cpu_test_utils.hpp" +#include #include "test_utils/fusing_test_utils.hpp" #include "test_utils/convolution_params.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" @@ -60,6 +60,28 @@ public: return result.str(); } + virtual ngraph::ParameterVector makeParams() { + return ngraph::builder::makeDynamicParams(ngraph::element::f32, inputDynamicShapes); + } + + virtual std::shared_ptr makeConv(const ngraph::ParameterVector& inputParams) { + auto conv = ngraph::builder::makeConvolution(inputParams[0], ngraph::element::f32, _kernel, _stride, _padBegin, + _padEnd, _dilation, ngraph::op::PadType::EXPLICIT, _convOutChannels); + + return conv; + } + + virtual std::shared_ptr addSum(std::shared_ptr lastNode, const ngraph::ParameterVector& inputParams) { + auto sum = std::make_shared(lastNode, inputParams[1]); + + fusedOps.insert(fusedOps.begin(), "Add"); // as we always fuse the sum first + return sum; + } + + virtual ov::element::Type getNetType() const { + return ov::element::Type_t::f32; + } + void SetUp() override { InputShape convShape; InputShape secondShape; @@ -75,38 +97,41 @@ public: init_input_shapes({convShape, secondShape}); - const InferenceEngine::SizeVector kernel = {3, 3}; - const InferenceEngine::SizeVector stride = {1, 1}; - const InferenceEngine::SizeVector dilation = {1, 1}; - const std::vector padBegin = {0, 0}; - const std::vector padEnd = {0, 0}; - const size_t convOutChannels = 64; + auto inputParams = makeParams(); - auto netType = ngraph::element::f32; - auto inputParams = ngraph::builder::makeDynamicParams(netType, inputDynamicShapes); + auto conv = makeConv(inputParams); - auto conv = ngraph::builder::makeConvolution(inputParams[0], ngraph::element::f32, kernel, stride, padBegin, - padEnd, dilation, ngraph::op::PadType::EXPLICIT, convOutChannels); if (bias) { - auto biasNode = ngraph::builder::makeConstant(ngraph::element::Type_t::f32, ngraph::Shape({1, convOutChannels, 1, 1}), {}, true); + auto biasNode = ngraph::builder::makeConstant(ngraph::element::Type_t::f32, ngraph::Shape({1, _convOutChannels, 1, 1}), {}, true); conv = std::make_shared(conv, biasNode); } - auto sum = std::make_shared(conv, inputParams[1]); + auto sum = addSum(conv, inputParams); - fusedOps.insert(fusedOps.begin(), "Add"); // as we always fuse the sum first - - auto runtimeType = netType; + auto runtimeType = getNetType(); if (configuration.count(PluginConfigParams::KEY_ENFORCE_BF16) && PluginConfigParams::YES == configuration[PluginConfigParams::KEY_ENFORCE_BF16].as()) { runtimeType = ngraph::element::Type_t::bf16; } + if (inputParams.front()->get_element_type() == ngraph::element::i8 || inputParams.front()->get_element_type() == ngraph::element::u8) { + runtimeType = ngraph::element::i8; + } + selectedType = makeSelectedTypeStr(getPrimitiveType(), runtimeType); - function = makeNgraphFunction(netType, inputParams, sum, "ConvolutionSumBroadcast"); + function = makeNgraphFunction(getNetType(), inputParams, sum, "ConvolutionSumBroadcast"); + targetDevice = CommonTestUtils::DEVICE_CPU; } + +protected: + const InferenceEngine::SizeVector _kernel = {3, 3}; + const InferenceEngine::SizeVector _stride = {1, 1}; + const InferenceEngine::SizeVector _dilation = {1, 1}; + const std::vector _padBegin = {0, 0}; + const std::vector _padEnd = {0, 0}; + const size_t _convOutChannels = 64; }; TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) { @@ -117,6 +142,72 @@ TEST_P(ConcatConvSumInPlaceTest, CompareWithRefs) { CheckPluginRelatedResults(compiledModel, "Convolution"); } +class ConcatConvSumInPlaceTestInt8 : public ConcatConvSumInPlaceTest { +public: + ngraph::ParameterVector makeParams() override { + ngraph::ParameterVector outs(2); + outs[0] = std::make_shared(ngraph::element::u8, inputDynamicShapes[0]); + outs[1] = std::make_shared(ngraph::element::f32, inputDynamicShapes[1]); + return outs; + } + + std::shared_ptr makeConv(const ngraph::ParameterVector& inputParams) override { + using namespace ngraph; + auto inputParamsFP32 = builder::makeDynamicParams(element::f32, { inputParams.front()->get_partial_shape() }); + + auto convolutionNodeRelaxed = std::make_shared>( + *as_type_ptr(builder::makeConvolution(inputParamsFP32.front(), element::f32, _kernel, _stride, _padBegin, + _padEnd, _dilation, ngraph::op::PadType::EXPLICIT, _convOutChannels)), + element::f32); + + auto inpShape = inputParams.front()->get_partial_shape(); + Shape filterShape = {_convOutChannels, static_cast(inpShape[1].get_length())}; + filterShape.insert(filterShape.end(), _kernel.begin(), _kernel.end()); + auto filterWeightsNode = builder::makeConstant(element::i8, filterShape, {}, true); + + auto conv = convolutionNodeRelaxed->copy_with_new_inputs({inputParams.front(), filterWeightsNode}); + + return conv; + } + + std::shared_ptr addSum(std::shared_ptr lastNode, const ngraph::ParameterVector& inputParams) override { + std::vector additionalFusedOps; + + lastNode = ngraph::builder::makeActivation(lastNode, ngraph::element::f32, ngraph::helpers::Relu); + //additionalFusedOps.push_back("Relu"); + + auto fqShape = ngraph::Shape(lastNode->get_output_partial_shape(0).size(), 1); + lastNode = ngraph::builder::makeFakeQuantize(lastNode, ngraph::element::f32, 256, fqShape); + additionalFusedOps.push_back("FakeQuantize"); + + auto secondTerm = ngraph::builder::makeFakeQuantize(inputParams[1], ngraph::element::f32, 256, fqShape); + + auto sum = std::make_shared(lastNode, secondTerm); + additionalFusedOps.push_back("Add"); + + fusedOps.insert(fusedOps.begin(), additionalFusedOps.begin(), additionalFusedOps.end()); + return sum; + } + + void SetUp() override { + abs_threshold = 1.001f; + using ngraph::pass::ConvertPrecision; + ConcatConvSumInPlaceTest::SetUp(); + functionRefs = ov::clone_model(*function); + ngraph::pass::ConvertPrecision().run_on_function(functionRefs); + ngraph::pass::ConvertPrecision().run_on_function(functionRefs); + functionRefs->validate_nodes_and_infer_types(); + } +}; + +TEST_P(ConcatConvSumInPlaceTestInt8, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); + + CheckPluginRelatedResults(compiledModel, "Convolution"); +} + namespace { const auto fusingMulAddFQMullAdd = fusingSpecificParams{ std::make_shared(std::vector{ {[](postNodeConfig& cfg) { @@ -198,7 +289,6 @@ const std::vector fusingParamsSet{ fusingReluScaleShift, fusingMulAddFQMullAdd, fusingSigmoidFQFQ, -// fusingClampFQ // TODO: we need investigation, this particular pattern does not work even in static case fusingDivSubFQ }; @@ -250,5 +340,14 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_BF16, ConcatConvSumInPlaceTest ::testing::Values(cpuBF16PluginConfig)), ConcatConvSumInPlaceTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Conv_Sum_Broadcast_INT8, ConcatConvSumInPlaceTestInt8, + ::testing::Combine( + ::testing::Values(convInpShape), + ::testing::Values(secondInp), + ::testing::Values(true, false), + ::testing::ValuesIn(fusingParamsSet), + ::testing::Values(cpuEmptyPluginConfig)), + ConcatConvSumInPlaceTest::getTestCaseName); + } // namespace } // namespace SubgraphTestsDefinitions From 487bb679957cfc15d73edc6d87a8e5e3bb1d4ce5 Mon Sep 17 00:00:00 2001 From: "Min, Byungil" Date: Tue, 22 Feb 2022 22:23:45 +0900 Subject: [PATCH 069/310] Resolve onednn fc issue to enable bert-base (#10177) + Enabled bert-base-ber model + Resolve failure of onednn fc Signed-off-by: Min, Byungil --- src/plugins/intel_gpu/src/graph/layout_optimizer.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 646ee81a433..1d5422cf137 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1501,6 +1501,12 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format impl_candidate = impl_types::ocl; break; } + + if (fo.node->as().get_primitive()->mode == eltwise_mode::sum && + program_helpers::needs_onednn_sum_post_op(fo.node->as(), in_layout)) { + impl_candidate = impl_types::ocl; + break; + } // Gemm checkings // TODO: investigate why currently onednn gemm has some "sum" post-op restrictions // which don't correlate with fc checkings in the code above From 435584bb9155d3a2757761c9725ea6f0f1266a18 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Tue, 22 Feb 2022 16:46:48 +0300 Subject: [PATCH 070/310] Support dynamic Broadcast and new pattern for TI condition (#9735) * Support dynamic Broadcast and new pattern for TI condition * Apply review feedback * Fix broadcast if statement --- .../mo/middle/TensorIteratorCondition.py | 251 ++++++++++-------- tools/mo/openvino/tools/mo/ops/broadcast.py | 11 +- .../mo/middle/TensorIteratorCondition_test.py | 110 +++++--- tools/mo/unit_tests/mo/ops/broadcast_test.py | 34 ++- 4 files changed, 243 insertions(+), 163 deletions(-) diff --git a/tools/mo/openvino/tools/mo/middle/TensorIteratorCondition.py b/tools/mo/openvino/tools/mo/middle/TensorIteratorCondition.py index df98f2ca05f..cf739e5c360 100644 --- a/tools/mo/openvino/tools/mo/middle/TensorIteratorCondition.py +++ b/tools/mo/openvino/tools/mo/middle/TensorIteratorCondition.py @@ -5,6 +5,7 @@ import logging as log import numpy as np +from openvino.tools.mo.middle.pattern_match import apply_pattern from openvino.tools.mo.middle.TensorIterator_utils import delete_selects_from from openvino.tools.mo.ops.TensorIterator_ops import TensorIteratorCondition, TensorIteratorBackEdge from openvino.tools.mo.ops.identity import Identity @@ -66,134 +67,140 @@ Shape -> StridedSlice -> Enter -| LogicalAnd --> LoopCond (data) return [TensorIteratorMerge] @staticmethod - def pattern(): + def pattern(variation): log.debug('+++++++++++++++ ConditionMatching ++++++++++++++++') - return dict( - nodes=[ - ('Enter_1_less', dict(kind='op', op='Enter')), - ('Strided_slice', dict(kind='op', op='StridedSlice')), - ('Strided_slice_data', dict(kind='data')), - ('Enter_1_less_data', dict(kind='data')), + nodes = [ + ('Enter_1_less', dict(kind='op', op='Enter')), + ('Strided_slice', dict(kind='op', op='StridedSlice')), + ('Strided_slice_data', dict(kind='data')), + ('Enter_1_less_data', dict(kind='data')), - ('Less_1', dict(kind='op', op='Less')), - ('Merge_1', dict(kind='op', op='Merge')), - ('Merge_1_data', dict(kind='data')), - ('Less_1_data', dict(kind='data')), + ('Less_1', dict(kind='op', op='Less')), + ('Merge_1', dict(kind='op', op='Merge')), + ('Merge_1_data', dict(kind='data')), + ('Less_1_data', dict(kind='data')), - ('Less_2', dict(kind='op', op='Less')), - ('Merge_2', dict(kind='op', op='Merge')), - ('Merge_2_data', dict(kind='data')), - ('Less_2_data', dict(kind='data')), + ('Less_2', dict(kind='op', op='Less')), + ('Merge_2', dict(kind='op', op='Merge')), + ('Merge_2_data', dict(kind='data')), + ('Less_2_data', dict(kind='data')), + ('and', dict(kind='op', op='LogicalAnd')), + ('and_data', dict(kind='data')), + ('loop_cond', dict(kind='op', op='LoopCond')), + ('loop_cond_data', dict(kind='data')), + + ('init_1', dict(kind='op', op='Const')), + ('init_1_data', dict(kind='data')), + ('Enter_1', dict(kind='op', op='Enter')), + ('Enter_1_data', dict(kind='data')), + + ('init_2', dict(kind='op', op='Const')), + ('init_2_data', dict(kind='data')), + ('Enter_2', dict(kind='op', op='Enter')), + ('Enter_2_data', dict(kind='data')), + + ('Switch_1', dict(kind='op', op='Switch')), + ('Switch_1_data', dict(kind='data')), + ('Identity_1', dict(kind='op', op='Identity')), + ('Identity_1_data', dict(kind='data')), + ('add_1', dict(kind='op', op='Add')), + ('add_1_y', dict(kind='op', op='Const')), + ('add_1_y_data', dict(kind='data')), + ('add_1_data', dict(kind='data')), + ('NextIteration_1', dict(kind='op', op='NextIteration')), + + ('Switch_2', dict(kind='op', op='Switch')), + ('Switch_2_data', dict(kind='data')), + ('Identity_2', dict(kind='op', op='Identity')), + ('Identity_2_data', dict(kind='data')), + ('add_2', dict(kind='op', op='Add')), + ('add_2_y', dict(kind='op', op='Const')), + ('add_2_y_data', dict(kind='data')), + ('add_2_data', dict(kind='data')), + ('NextIteration_2', dict(kind='op', op='NextIteration')), + + ] + edges = [ + ('Strided_slice', 'Strided_slice_data'), + ('Strided_slice_data', 'Enter_1_less'), + ('Enter_1_less', 'Enter_1_less_data'), + ('Enter_1_less_data', 'Less_1'), + ('Less_1', 'Less_1_data'), + ('Less_1_data', 'and'), + + ('and', 'and_data'), + ('and_data', 'loop_cond'), + ('loop_cond', 'loop_cond_data'), + ('loop_cond_data', 'Switch_1'), + ('loop_cond_data', 'Switch_2'), + + ('init_1', 'init_1_data'), + ('init_1_data', 'Enter_1'), + ('Enter_1', 'Enter_1_data'), + ('Enter_1_data', 'Merge_1'), + ('Merge_1', 'Merge_1_data'), + ('Merge_1_data', 'Less_1'), + + ('Merge_1_data', 'Switch_1'), + ('Switch_1', 'Switch_1_data'), + ('Switch_1_data', 'Identity_1'), + ('Identity_1', 'Identity_1_data'), + ('Identity_1_data', 'add_1'), + ('add_1_y', 'add_1_y_data'), + ('add_1_y_data', 'add_1'), + ('add_1', 'add_1_data'), + ('add_1_data', 'NextIteration_1'), + + ('Merge_2_data', 'Switch_2'), + ('Switch_2', 'Switch_2_data'), + ('Switch_2_data', 'Identity_2'), + ('Identity_2', 'Identity_2_data'), + ('Identity_2_data', 'add_2'), + ('add_2_y', 'add_2_y_data'), + ('add_2_y_data', 'add_2'), + ('add_2', 'add_2_data'), + ('add_2_data', 'NextIteration_2'), + + ('init_2', 'init_2_data'), + ('init_2_data', 'Enter_2'), + ('Enter_2', 'Enter_2_data'), + ('Enter_2_data', 'Merge_2'), + + ('Merge_2', 'Merge_2_data'), + ('Merge_2_data', 'Less_2'), + ('Less_2', 'Less_2_data'), + ('Less_2_data', 'and'), + ] + if variation == 1: + nodes.extend([ ('Enter_2_less', dict(kind='op', op='Enter')), ('Enter_2_less_data', dict(kind='data')), - ('minimum_data', dict(kind='data')), - - ('and', dict(kind='op', op='LogicalAnd')), - ('and_data', dict(kind='data')), - ('loop_cond', dict(kind='op', op='LoopCond')), - ('loop_cond_data', dict(kind='data')), - - ('init_1', dict(kind='op', op='Const')), - ('init_1_data', dict(kind='data')), - ('Enter_1', dict(kind='op', op='Enter')), - ('Enter_1_data', dict(kind='data')), - - ('init_2', dict(kind='op', op='Const')), - ('init_2_data', dict(kind='data')), - ('Enter_2', dict(kind='op', op='Enter')), - ('Enter_2_data', dict(kind='data')), - - ('Switch_1', dict(kind='op', op='Switch')), - ('Switch_1_data', dict(kind='data')), - ('Identity_1', dict(kind='op', op='Identity')), - ('Identity_1_data', dict(kind='data')), - ('add_1', dict(kind='op', op='Add')), - ('add_1_y', dict(kind='op', op='Const')), - ('add_1_y_data', dict(kind='data')), - ('add_1_data', dict(kind='data')), - ('NextIteration_1', dict(kind='op', op='NextIteration')), - - ('Switch_2', dict(kind='op', op='Switch')), - ('Switch_2_data', dict(kind='data')), - ('Identity_2', dict(kind='op', op='Identity')), - ('Identity_2_data', dict(kind='data')), - ('add_2', dict(kind='op', op='Add')), - ('add_2_y', dict(kind='op', op='Const')), - ('add_2_y_data', dict(kind='data')), - ('add_2_data', dict(kind='data')), - ('NextIteration_2', dict(kind='op', op='NextIteration')), - - ], - edges=[ - ('Strided_slice', 'Strided_slice_data'), - ('Strided_slice_data', 'Enter_1_less'), - ('Enter_1_less', 'Enter_1_less_data'), - ('Enter_1_less_data', 'Less_1'), - ('Less_1', 'Less_1_data'), - ('Less_1_data', 'and'), - - ('and', 'and_data'), - ('and_data', 'loop_cond'), - ('loop_cond', 'loop_cond_data'), - ('loop_cond_data', 'Switch_1'), - ('loop_cond_data', 'Switch_2'), - - ('init_1', 'init_1_data'), - ('init_1_data', 'Enter_1'), - ('Enter_1', 'Enter_1_data'), - ('Enter_1_data', 'Merge_1'), - ('Merge_1', 'Merge_1_data'), - ('Merge_1_data', 'Less_1'), - - ('Merge_1_data', 'Switch_1'), - ('Switch_1', 'Switch_1_data'), - ('Switch_1_data', 'Identity_1'), - ('Identity_1', 'Identity_1_data'), - ('Identity_1_data', 'add_1'), - ('add_1_y', 'add_1_y_data'), - ('add_1_y_data', 'add_1'), - ('add_1', 'add_1_data'), - ('add_1_data', 'NextIteration_1'), - - ('Merge_2_data', 'Switch_2'), - ('Switch_2', 'Switch_2_data'), - ('Switch_2_data', 'Identity_2'), - ('Identity_2', 'Identity_2_data'), - ('Identity_2_data', 'add_2'), - ('add_2_y', 'add_2_y_data'), - ('add_2_y_data', 'add_2'), - ('add_2', 'add_2_data'), - ('add_2_data', 'NextIteration_2'), - + ('minimum_data', dict(kind='data')) + ]) + edges.extend([ ('minimum_data', 'Enter_2_less'), ('Enter_2_less', 'Enter_2_less_data'), ('Enter_2_less_data', 'Less_2'), - - ('init_2', 'init_2_data'), - ('init_2_data', 'Enter_2'), - ('Enter_2', 'Enter_2_data'), - ('Enter_2_data', 'Merge_2'), - - ('Merge_2', 'Merge_2_data'), - ('Merge_2_data', 'Less_2'), - ('Less_2', 'Less_2_data'), - ('Less_2_data', 'and'), - ], - ) + ]) + elif variation == 2: + edges.append(('Enter_1_less_data', 'Less_2')) + else: + raise Exception('Wrong pattern variation') + return dict(nodes=nodes, edges=edges) @staticmethod def looking_for_iteration_counter(graph: Graph, match: dict): types = ['TensorIteratorInput', 'TensorIteratorOutput'] - candidates = mo_array([match['Identity_1_data'], match['Identity_2_data']]) - results = mo_array([False for i in range(len(candidates))]) - for i, candidat in enumerate(candidates): - for node in candidat.out_nodes(): + candidates = [match['Identity_1_data'], match['Identity_2_data']] + results = [] + for candidate in candidates: + for node in candidate.out_nodes(): if node['op'] in types: - results[i] = True - assert not np.all(results) - assert sum(results) == 1 - return candidates[results == True][0] + results.append(candidate) + break + assert len(results) == 1 + return results[0] @staticmethod def check_dynamic_seq_len(graph: Graph, match: dict): @@ -201,11 +208,17 @@ Shape -> StridedSlice -> Enter -| LogicalAnd --> LoopCond (data) Cycle is dynamic if at least one of the boundaries isn't constant OR this boundaries is different from tensor shape. """ - dynamic_seq_len = match['Enter_1_less_data'].value is None or match['Enter_2_less_data'].value is None or \ - not np.array_equal(match['Enter_1_less_data'].value, match['Enter_2_less_data'].value) + dynamic_seq_len = match['Enter_1_less_data'].value is None + if 'Enter_2_less_data' in match: + dynamic_seq_len = dynamic_seq_len or match['Enter_2_less_data'].value is None or \ + not np.array_equal(match['Enter_1_less_data'].value, match['Enter_2_less_data'].value) return dynamic_seq_len + def find_and_replace_pattern(self, graph: Graph): + apply_pattern(graph, **self.pattern(1), action=self.replace_pattern) # pylint: disable=no-member + apply_pattern(graph, **self.pattern(2), action=self.replace_pattern) # pylint: disable=no-member + def replace_pattern(self, graph: Graph, match: dict): log.debug('================== ConditionFind ===============') # init_1 @@ -235,7 +248,11 @@ Shape -> StridedSlice -> Enter -| LogicalAnd --> LoopCond (data) condition_attrs = dict(time=dict(init=init_2, step=step_2), iter=dict(init=init_1, step=step_1), name=match['loop_cond'].name + '/TensorIteratorCondition_') condition = TensorIteratorCondition(graph, attrs=condition_attrs) - condition_data = condition.create_node_with_data(inputs=[match['Strided_slice_data'], match['minimum_data']], + if 'minimum_data' in match: + condition_inp = [match['Strided_slice_data'], match['minimum_data']] + else: + condition_inp = [match['Strided_slice_data']] + condition_data = condition.create_node_with_data(inputs=condition_inp, data_nodes=[loop_condition, iterator_data]) safe_nodes = ['loop_cond_data', 'Identity_1_data', 'Identity_2_data', 'Strided_slice', 'Strided_slice_data', diff --git a/tools/mo/openvino/tools/mo/ops/broadcast.py b/tools/mo/openvino/tools/mo/ops/broadcast.py index 746fe4df0fa..76426146f18 100644 --- a/tools/mo/openvino/tools/mo/ops/broadcast.py +++ b/tools/mo/openvino/tools/mo/ops/broadcast.py @@ -1,7 +1,7 @@ # Copyright (C) 2018-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.tools.mo.front.common.partial_infer.utils import is_fully_defined +from openvino.tools.mo.front.common.partial_infer.utils import is_fully_defined, shape_array, undefined_shape_of_rank from openvino.tools.mo.graph.graph import Node, Graph from openvino.tools.mo.graph.perm_inputs import PermuteInputs from openvino.tools.mo.ops.op import Op @@ -46,9 +46,16 @@ class Broadcast(Op): input_shape = node.in_port(0).data.get_shape() input_value = node.in_port(0).data.get_value() + target_shape_shape = node.in_port(1).data.get_shape() target_shape = node.in_port(1).data.get_value() - assert target_shape is not None, 'Output shape is not defined for node "{}"'.format(node_name) assert node.has_and_set('mode'), 'Broadcasting mode is not defined for node "{}"'.format(node_name) + # Dynamic target shape is possible to infer only if shape of target shape is static and 1D + if target_shape is None and len(target_shape_shape) == 1 and (len(input_shape) <= 1 or node.mode == 'explicit'): + assert is_fully_defined(target_shape_shape) + new_shape = undefined_shape_of_rank(target_shape_shape.item(0)) + node.out_port(0).data.set_shape(new_shape) + return + assert target_shape is not None, 'Output shape is not defined for node "{}"'.format(node_name) PermuteInputs().set_input_permutation(node.in_node(1), node, 'output:0', 'shape') diff --git a/tools/mo/unit_tests/mo/middle/TensorIteratorCondition_test.py b/tools/mo/unit_tests/mo/middle/TensorIteratorCondition_test.py index 8a574abfdcd..a3313ff83ca 100644 --- a/tools/mo/unit_tests/mo/middle/TensorIteratorCondition_test.py +++ b/tools/mo/unit_tests/mo/middle/TensorIteratorCondition_test.py @@ -7,55 +7,85 @@ import numpy as np from openvino.tools.mo.middle.TensorIteratorCondition import LoopConditionMatcher from openvino.tools.mo.utils.ir_engine.compare_graphs import compare_graphs -from unit_tests.utils.graph import build_graph_with_attrs +from unit_tests.utils.graph import build_graph_with_attrs, regular_op_with_empty_data, connect, build_graph class TensorIteratorConditionTests(unittest.TestCase): - def test_not_dynamic(self): + def test_not_dynamic_1(self): pattern_matcher = LoopConditionMatcher() - pattern = pattern_matcher.pattern() + pattern = pattern_matcher.pattern(1) graph = build_graph_with_attrs(nodes_with_attrs=pattern['nodes'], edges_with_attrs=pattern['edges'], - new_nodes_with_attrs=[('maximum', {'kind': 'op', 'op': 'Maximum'}), - ('maximum_data', {'kind': 'data'}), - ('TensorIteratorInput', {'kind': 'op', 'op': 'TensorIteratorInput'})], - new_edges_with_attrs=[('maximum', 'maximum_data'), - ('Identity_1_data', 'TensorIteratorInput')], - update_nodes_attributes=[('init_1_data', {'value': np.array([0])}), - ('init_2_data', {'value': np.array([0])}), - ('add_1_y_data', {'value': np.array(1)}), - ('add_2_y_data', {'value': np.array(1)}), - ('loop_cond_data', {'value': None}), - ('Identity_2_data', {'value': None}, ), - ('Enter_1_less_data', {'value': None},), - ('Enter_2_less_data', {'value': None},), - ]) + new_nodes_with_attrs=[ + ('TensorIteratorInput', {'kind': 'op', 'op': 'TensorIteratorInput'})], + new_edges_with_attrs=[ + ('Identity_1_data', 'TensorIteratorInput')], + update_nodes_attributes=[ + ('init_1_data', {'value': np.array([0])}), + ('init_2_data', {'value': np.array([0])}), + ('add_1_y_data', {'value': np.array(1)}), + ('add_2_y_data', {'value': np.array(1)}), + ('loop_cond_data', {'value': None}), + ('Identity_2_data', {'value': None},), + ('Enter_1_less_data', {'value': None},), + ]) pattern_matcher.find_and_replace_pattern(graph) - graph_ref = build_graph_with_attrs( - nodes_with_attrs=[('TensorIteratorCondition', {'kind': 'op', 'op': 'TensorIteratorCondition'}), - ('loop_cond_data', {'kind': 'data'}), - ('identity_data', {'kind': 'data'}), - ('StridedSlice', {'kind': 'op', 'op':'StridedSlice'}), - ('StridedSlice_data', {'kind': 'data'}), - ('Maximum', {'kind': 'op', 'op': 'Maximum'}), - ('Maximum_data', {'kind': 'data'}), - ('minimum_data', {'kind': 'data'}), - ('TensorIteratorInput', {'kind': 'op', 'op': 'TensorIteratorInput'}) - ], - edges_with_attrs=[('Maximum', 'Maximum_data'), - ('StridedSlice', 'StridedSlice_data'), - ('StridedSlice_data', 'TensorIteratorCondition', {'in':0}), - ('minimum_data', 'TensorIteratorCondition', {'in':1}), - ('TensorIteratorCondition', 'loop_cond_data'), - ('TensorIteratorCondition', 'identity_data'), - ('identity_data', 'TensorIteratorInput'), - ], - update_edge_attrs=None, - new_nodes_with_attrs=[], - new_edges_with_attrs=[], - ) + nodes_attributes = { + **regular_op_with_empty_data('StridedSlice', {'op': 'StridedSlice', 'type': None}), + 'TensorIteratorCondition': {'kind': 'op', 'op': 'TensorIteratorCondition'}, + 'loop_cond_data': {'kind': 'data'}, + 'identity_data': {'kind': 'data'}, + 'minimum_data': {'kind': 'data'}, + 'TensorIteratorInput': {'kind': 'op', 'op': 'TensorIteratorInput'} + } + edges = [ + *connect('StridedSlice', '0:TensorIteratorCondition'), + ('minimum_data', 'TensorIteratorCondition', {'in':1}), + ('TensorIteratorCondition', 'loop_cond_data'), + ('TensorIteratorCondition', 'identity_data'), + ('identity_data', 'TensorIteratorInput') + ] + graph_ref = build_graph(nodes_attributes, edges) (flag, resp) = compare_graphs(graph, graph_ref, 'loop_cond_data', check_op_attrs=True) self.assertTrue(flag, resp) + def test_not_dynamic_2(self): + pattern_matcher = LoopConditionMatcher() + pattern = pattern_matcher.pattern(2) + graph = build_graph_with_attrs(nodes_with_attrs=pattern['nodes'], edges_with_attrs=pattern['edges'], + new_nodes_with_attrs=[ + ('TensorIteratorInput', {'kind': 'op', 'op': 'TensorIteratorInput'}), + ('some_op', {'kind': 'op', 'op': 'Add'})], + new_edges_with_attrs=[ + ('Identity_1_data', 'TensorIteratorInput'), + ('loop_cond_data', 'some_op'), + ], + update_nodes_attributes=[ + ('init_1_data', {'value': np.array([0])}), + ('init_2_data', {'value': np.array([0])}), + ('add_1_y_data', {'value': np.array(1)}), + ('add_2_y_data', {'value': np.array(1)}), + ('loop_cond_data', {'value': None}), + ('Identity_2_data', {'value': None},), + ('Enter_1_less_data', {'value': None},), + ]) + + pattern_matcher.find_and_replace_pattern(graph) + nodes_attributes = { + **regular_op_with_empty_data('loop_cond', {'op': 'TensorIteratorCondition', 'type': None}), + **regular_op_with_empty_data('StridedSlice', {'op': 'StridedSlice', 'type': None}), + 'some_op': {'kind': 'op', 'op': 'Add'}, + 'identity_data': {'kind': 'data'}, + 'TensorIteratorInput': {'kind': 'op', 'op': 'TensorIteratorInput'} + } + edges = [ + *connect('StridedSlice', 'loop_cond'), + *connect('loop_cond', 'some_op'), + ('loop_cond', 'identity_data'), + ('identity_data', 'TensorIteratorInput') + ] + graph_ref = build_graph(nodes_attributes, edges) + (flag, resp) = compare_graphs(graph, graph_ref, 'some_op', check_op_attrs=True) + self.assertTrue(flag, resp) diff --git a/tools/mo/unit_tests/mo/ops/broadcast_test.py b/tools/mo/unit_tests/mo/ops/broadcast_test.py index 55e8b99c72c..405411ffafb 100644 --- a/tools/mo/unit_tests/mo/ops/broadcast_test.py +++ b/tools/mo/unit_tests/mo/ops/broadcast_test.py @@ -6,7 +6,7 @@ import unittest import numpy as np from generator import generator, generate -from openvino.tools.mo.front.common.partial_infer.utils import int64_array +from openvino.tools.mo.front.common.partial_infer.utils import int64_array, undefined_shape_of_rank from openvino.tools.mo.graph.graph import Node from openvino.tools.mo.ops.broadcast import Broadcast from unit_tests.utils.graph import build_graph, valued_const_with_data, regular_op_with_empty_data, \ @@ -34,10 +34,10 @@ class BroadcastTest(unittest.TestCase): ([[3, 1]], [2, 1, 2], [-2, -1], 'explicit', [[[3, 1]], [[3, 1]]]), # ref_shape (2, 1, 2) ([[[9, 5, 7]], [[9, 5, 7]]], [2, 2, 1, 3], [1, 2, 3], 'explicit', # in_shape (2, 1, 3) - [[[[9, 5, 7]], [[9, 5, 7]]], [[[9, 5, 7]], [[9, 5, 7]]]]), # ref_out_shape (2, 2, 1, 3) + [[[[9, 5, 7]], [[9, 5, 7]]], [[[9, 5, 7]], [[9, 5, 7]]]]), # ref_out_shape (2, 2, 1, 3) - ([[[9, 5, 7]], [[3, 4, 8]]], [2, 1, 3, 3], [0, 1, 2], 'explicit', # in_shape (2, 1, 3) - [[[[9, 9, 9], [5, 5, 5], [7, 7, 7]]], [[[3, 3, 3], [4, 4, 4], [8, 8, 8]]]]), # ref_out_shape (2, 1, 3, 3) + ([[[9, 5, 7]], [[3, 4, 8]]], [2, 1, 3, 3], [0, 1, 2], 'explicit', # in_shape (2, 1, 3) + [[[[9, 9, 9], [5, 5, 5], [7, 7, 7]]], [[[3, 3, 3], [4, 4, 4], [8, 8, 8]]]]), # ref_out_shape (2, 1, 3, 3) # negative tests ([1], [2, 2], [0], 'explicit', None, True), @@ -76,3 +76,29 @@ class BroadcastTest(unittest.TestCase): self.assertTrue(np.array_equal(broadcast_node.out_node().value, np.array(ref_out))) else: self.assertTrue(np.array_equal(broadcast_node.out_node().shape, np.array(target_shape))) + + @generate(*[ + ([1], [3], 'numpy', undefined_shape_of_rank(3)), + ([1], [3], 'explicit', undefined_shape_of_rank(3)), + ([1, 2], [3], 'numpy', None, True), + ]) + def test_broadcast_dynamic(self, data, target_shape_shape, mode='numpy', ref_out_shape=None, test_raising=False): + nodes = { + **shaped_data('data', int64_array(data)), + **shaped_data('target_shape', int64_array(target_shape_shape)), + **regular_op_with_empty_data('broadcast', {'op': 'Broadcast', 'mode': mode}), + } + + edges = [('data', 'broadcast'), + ('target_shape', 'broadcast'), + ('broadcast', 'broadcast_d')] + + graph = build_graph(nodes, edges) + + broadcast_node = Node(graph, 'broadcast') + if test_raising: + self.assertRaises(AssertionError, Broadcast.infer, broadcast_node) + return + + Broadcast.infer(broadcast_node) + self.assertTrue(np.array_equal(broadcast_node.out_node().shape, ref_out_shape)) From 5bb8f77c3feaaff7faed69e7df4311439d48c33c Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Tue, 22 Feb 2022 16:51:41 +0300 Subject: [PATCH 071/310] [Python API] Remove get/set_config methods from the PyOV (#10587) --- .../src/pyopenvino/core/compiled_model.cpp | 18 ---------- .../python/src/pyopenvino/core/core.cpp | 33 ------------------- 2 files changed, 51 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp index 3203000b0e2..884bcf9b0bf 100644 --- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp +++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp @@ -149,15 +149,6 @@ void regclass_CompiledModel(py::module m) { :rtype: None )"); - // todo: remove after Accuracy Checker migration to set/get_property API - cls.def( - "get_config", - [](ov::CompiledModel& self, const std::string& name) -> py::object { - PyErr_WarnEx(PyExc_DeprecationWarning, "get_config() is deprecated, use get_property() instead.", 1); - return Common::from_ov_any(self.get_property(name)).as(); - }, - py::arg("name")); - cls.def( "get_property", [](ov::CompiledModel& self, const std::string& name) -> py::object { @@ -172,15 +163,6 @@ void regclass_CompiledModel(py::module m) { :rtype: Any )"); - // todo: remove after Accuracy Checker migration to set/get_property API - cls.def( - "get_metric", - [](ov::CompiledModel& self, const std::string& name) -> py::object { - PyErr_WarnEx(PyExc_DeprecationWarning, "get_metric() is deprecated, use get_property() instead.", 1); - return Common::from_ov_any(self.get_property(name)).as(); - }, - py::arg("name")); - cls.def("get_runtime_model", &ov::CompiledModel::get_runtime_model, R"( diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index 722967f91cc..20b32618bdf 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -32,19 +32,6 @@ void regclass_Core(py::module m) { cls.def(py::init(), py::arg("xml_config_file") = ""); - // todo: remove after Accuracy Checker migration to set/get_property API - cls.def( - "set_config", - [](ov::Core& self, const std::map& config, const std::string& device_name) { - PyErr_WarnEx(PyExc_DeprecationWarning, "set_config() is deprecated, use set_property() instead.", 1); - self.set_property(device_name, {config.begin(), config.end()}); - }, - py::arg("device_name") = "", - py::arg("properties"), - R"( - Sets properties for the device. - )"); - cls.def( "set_property", [](ov::Core& self, const std::map& properties) { @@ -369,16 +356,6 @@ void regclass_Core(py::module m) { new_compiled = core.import_model(user_stream, "CPU") )"); - // todo: remove after Accuracy Checker migration to set/get_property API - cls.def( - "get_config", - [](ov::Core& self, const std::string& device_name, const std::string& name) -> py::object { - PyErr_WarnEx(PyExc_DeprecationWarning, "get_config() is deprecated, use get_property() instead.", 1); - return Common::from_ov_any(self.get_property(device_name, name)).as(); - }, - py::arg("device_name"), - py::arg("name")); - cls.def( "get_property", [](ov::Core& self, const std::string& device_name, const std::string& name) -> py::object { @@ -397,16 +374,6 @@ void regclass_Core(py::module m) { :rtype: object )"); - // todo: remove after Accuracy Checker migration to set/get_property API - cls.def( - "get_metric", - [](ov::Core& self, const std::string device_name, const std::string name) -> py::object { - PyErr_WarnEx(PyExc_DeprecationWarning, "get_metric() is deprecated, use get_property() instead.", 1); - return Common::from_ov_any(self.get_property(device_name, name)).as(); - }, - py::arg("device_name"), - py::arg("name")); - cls.def("register_plugin", &ov::Core::register_plugin, py::arg("plugin_name"), From aced89a6551c1fc9ede0debcb97978923bd8f3ee Mon Sep 17 00:00:00 2001 From: Indira Salyahova Date: Tue, 22 Feb 2022 16:53:53 +0300 Subject: [PATCH 072/310] fix: don't pass parametr inplace_statistic for weights (#10593) --- tools/pot/openvino/tools/pot/algorithms/quantization/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py index 365dc814347..94bf92e6009 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py @@ -293,7 +293,8 @@ def get_tensor_statistics(range_estimator_config, for_weights, **kwargs): stat_mod_name = get_stat_name_by_config(range_estimator_config, stats_name) if fn_type in ['quantile', 'abs_quantile']: q_value = range_estimator_config[stats_name]['outlier_prob'] - ts_args['inplace_statistics'] = False + if not for_weights: + ts_args['inplace_statistics'] = False if stats_name == 'max': q_value = 1 - q_value ts_args.update({'q': q_value}) From bc0a84a1c1df9e69660d98fa4e8295aa6b905dcc Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 22 Feb 2022 16:54:20 +0300 Subject: [PATCH 073/310] [MO] Print information about new API 2.0 (#10567) * [MO] Print information about new API 2.0 Signed-off-by: Roman Kazantsev * Apply feedback Signed-off-by: Roman Kazantsev * Apply feedback Signed-off-by: Roman Kazantsev --- tools/mo/openvino/tools/mo/main.py | 6 +++++- .../openvino/tools/mo/utils/get_ov_update_message.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/mo/openvino/tools/mo/main.py b/tools/mo/openvino/tools/mo/main.py index b2d79017c60..1f20f8d0c47 100644 --- a/tools/mo/openvino/tools/mo/main.py +++ b/tools/mo/openvino/tools/mo/main.py @@ -34,7 +34,7 @@ from openvino.tools.mo.utils.cli_parser import check_available_transforms, get_c get_placeholder_shapes, get_tf_cli_options, get_tuple_values, parse_transform, parse_tuple_pairs from openvino.tools.mo.utils.error import Error, FrameworkError from openvino.tools.mo.utils.find_ie_version import find_ie_version -from openvino.tools.mo.utils.get_ov_update_message import get_ov_update_message +from openvino.tools.mo.utils.get_ov_update_message import get_ov_update_message, get_ov_api20_message from openvino.tools.mo.utils.guess_framework import deduce_legacy_frontend_by_namespace from openvino.tools.mo.utils.logger import init_logger, progress_printer from openvino.tools.mo.utils.model_analysis import AnalysisResults @@ -523,11 +523,15 @@ def main(cli_parser: argparse.ArgumentParser, fem: FrontEndManager, framework: s argv.feManager = fem ov_update_message = None + ov_api20_message = None if not hasattr(argv, 'silent') or not argv.silent: ov_update_message = get_ov_update_message() + ov_api20_message = get_ov_api20_message() ret_code = driver(argv) if ov_update_message: print(ov_update_message) + if ov_api20_message and ret_code == 0: + print(ov_api20_message) telemetry.send_event('mo', 'conversion_result', 'success') telemetry.end_session('mo') telemetry.force_shutdown(1.0) diff --git a/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py b/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py index 92311409c73..388082ebac7 100644 --- a/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py +++ b/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py @@ -14,3 +14,13 @@ def get_ov_update_message(): link = 'https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2022_bu_IOTG_OpenVINO-2022-1&content=upg_all&medium=organic' return msg_fmt.format(link) if current_date >= expected_update_date else None + + +def get_ov_api20_message(): + link = "https://docs.openvino.ai" + message = '[ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework ' \ + 'input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, ' \ + 'please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.\n' \ + 'Find more information about API v2.0 and IR v11 at {}'.format(link) + + return message From 71a0a6d261052da1bd93dc72355c48d5b28f3d7a Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Tue, 22 Feb 2022 16:54:56 +0300 Subject: [PATCH 074/310] [GNA] Klocwork fixes --- src/plugins/intel_gna/backend/am_intel_dnn.cpp | 3 +++ src/plugins/intel_gna/gna_graph_compiler.cpp | 3 +++ src/plugins/intel_gna/gna_plugin.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/plugins/intel_gna/backend/am_intel_dnn.cpp b/src/plugins/intel_gna/backend/am_intel_dnn.cpp index db1379916dc..b75cc3f265f 100644 --- a/src/plugins/intel_gna/backend/am_intel_dnn.cpp +++ b/src/plugins/intel_gna/backend/am_intel_dnn.cpp @@ -1518,6 +1518,9 @@ void GNAPluginNS::backend::AMIntelDNN::InitGNAStruct(Gna2Model *gnaModel, const if (i == 0) { THROW_GNA_EXCEPTION << "Pooling component with no preceeding component"; } else if (gnaOperation->Type == Gna2OperationTypeConvolution) { + if (gnaOperation->Operands == nullptr || gnaOperation->NumberOfOperands <= PwlOpIdx) { + THROW_GNA_EXCEPTION << "Number and details of operands are wrong"; + } auto pwlOperand = gnaOperation->Operands[PwlOpIdx]; if (pwlOperand != nullptr && pwlOperand->Shape.Dimensions[0] != 0 && gnaOperation->Operands[InOpIdx]->Shape.NumberOfDimensions == 2) { // kDnnConvolutional1dOp diff --git a/src/plugins/intel_gna/gna_graph_compiler.cpp b/src/plugins/intel_gna/gna_graph_compiler.cpp index 8cb40bb83e1..1e62ac6f15c 100644 --- a/src/plugins/intel_gna/gna_graph_compiler.cpp +++ b/src/plugins/intel_gna/gna_graph_compiler.cpp @@ -2392,6 +2392,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer, auto prevLayer = CNNNetPrevLayerSkipCertain(layer, idx, [](CNNLayerPtr l) { return LayerInfo(l).isNonFunctional(); }); + if (!prevLayer) { + THROW_GNA_EXCEPTION << "Input layer was not found"; + } gnalog() << "Connecting input " << layer->name << " to " << prevLayer->name << " ...\n"; diff --git a/src/plugins/intel_gna/gna_plugin.cpp b/src/plugins/intel_gna/gna_plugin.cpp index 7d095ec4793..8c3b9fb3485 100644 --- a/src/plugins/intel_gna/gna_plugin.cpp +++ b/src/plugins/intel_gna/gna_plugin.cpp @@ -196,6 +196,9 @@ void GNAPlugin::ExportScores(void *ptr_dst, uint32_t num_vector_stride, Precision precision_in, Precision precision_out) { + if (ptr_src == nullptr || ptr_dst == nullptr) { + THROW_GNA_EXCEPTION << "Received null pointer arguments"; + } if (precision_out != Precision::I32 && precision_out != Precision::FP32) { THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << precision_out.name(); } From e59739ce88ea9f7aac38d78a8943f00f7798ae42 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Tue, 22 Feb 2022 16:57:26 +0300 Subject: [PATCH 075/310] [CPU] RNN node enforce bf16 mode does not work. (#9859) --- src/plugins/intel_cpu/src/nodes/rnn.cpp | 14 ++++++-------- .../shared_tests_instances/skip_tests_config.cpp | 2 -- .../plugin/cpu/single_layer_tests/gru_cell.cpp | 5 ++--- .../plugin/cpu/single_layer_tests/gru_sequence.cpp | 12 ++++-------- .../plugin/cpu/single_layer_tests/lstm_cell.cpp | 5 ++--- .../cpu/single_layer_tests/lstm_sequence.cpp | 8 ++------ .../plugin/cpu/single_layer_tests/rnn_cell.cpp | 5 ++--- .../plugin/cpu/single_layer_tests/rnn_sequence.cpp | 8 ++------ 8 files changed, 20 insertions(+), 39 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index 5792057dd0e..08fc91c0ae1 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -810,16 +810,14 @@ void MKLDNNRNN::prepareParams() { bool wFormatWasChanged = false; // WA To avoid different weights layer and iter formats in FP32 case. - if (dataPrecision == Precision::FP32) { - if (SL != 1 || B < optimalBatchSize) { - if (wFormat != mkldnn::memory::format_tag::ldigo) { - wFormat = mkldnn::memory::format_tag::ldigo; - wFormatWasChanged = true; - } - } else if (wFormat != mkldnn::memory::format_tag::any) { - wFormat = mkldnn::memory::format_tag::any; + if (SL != 1 || B < optimalBatchSize) { + if (wFormat != mkldnn::memory::format_tag::ldigo) { + wFormat = mkldnn::memory::format_tag::ldigo; wFormatWasChanged = true; } + } else if (wFormat != mkldnn::memory::format_tag::any) { + wFormat = mkldnn::memory::format_tag::any; + wFormatWasChanged = true; } if (wFormatWasChanged) { auto weightsDims = MKLDNNExtensionUtils::convertToDnnlDims(VectorDims{ L, D, DC, G, SC }); diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index f0834a5fb55..ff4f7c36838 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -66,8 +66,6 @@ std::vector disabledTestPatterns() { // TODO: 57562 No dynamic output shape support R"(.*NonZeroLayerTest.*)", - // TODO: 74961. Enforce precision via inType and outType does not work properly. - R"(.*(RNN|GRU|LSTM).*ENFORCE_BF16=YES.*)", // Not expected behavior R"(.*Behavior.*InferRequestIOBBlobSetLayoutTest.*layout=(95|OIHW).*)", R"(.*Behavior.*InferRequestIOBBlobSetLayoutTest.*layout=(95|OIHW).*)", diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp index 1c244c8d2c6..03378b40de9 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/gru_cell.cpp @@ -88,11 +88,10 @@ protected: configuration.insert(additionalConfig.begin(), additionalConfig.end()); if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { - inType = outType = ElementType::bf16; + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); } else { - inType = outType = netPrecision; + selectedType = makeSelectedTypeStr(selectedType, netPrecision); } - selectedType = makeSelectedTypeStr(selectedType, outType); auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); std::vector WRB = {{3 * hiddenSize, inputSize}, {3 * hiddenSize, hiddenSize}, {(linearBeforeReset ? 4 : 3) * hiddenSize}}; diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp index c2c035697ee..34291d4306d 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/gru_sequence.cpp @@ -105,15 +105,11 @@ protected: const size_t inputSize = targetStaticShapes.front()[0][2]; const size_t numDirections = direction == ov::op::RecurrentSequenceDirection::BIDIRECTIONAL ? 2 : 1; - // 3rd input type must be an integer, thus it cannot be forced to BF16. if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { - if (inputDynamicShapes.size() > 2) - throw std::runtime_error("Invalid test case. Cannot enforce integer input to BF16."); - inType = outType = ElementType::bf16; + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); } else { - outType = netPrecision; + selectedType = makeSelectedTypeStr(selectedType, netPrecision); } - selectedType = makeSelectedTypeStr(selectedType, outType); auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); const size_t batchSize = inputDynamicShapes[0][0].is_static() ? inputDynamicShapes[0][0].get_length() : @@ -295,7 +291,7 @@ const std::vector> dynamicShapes = { { {1, 2, 10}, {1, 4, 10}, {1, 8, 10} } }, // Target shapes { {1, 1, 10}, // Dynamic shape 1 { {1, 1, 10}, {1, 1, 10}, {1, 1, 10} } }, // Target shapes - { {-1}, // Dynamic shape 2 + { {-1}, // Dynamic shape 2 { {1}, {1}, {1} } } }, // Target shapes { { {-1, -1, -1}, // #5. Dynamic shape 0 { {1, 2, 10}, {1, 4, 10}, {1, 8, 10} } }, // Target shapes @@ -304,7 +300,7 @@ const std::vector> dynamicShapes = { { {-1}, // Dynamic shape 2 { {1}, {1}, {1} } } }, // Target shapes { { {2, {1, 5}, 10}, // #6. Dynamic shape 0 - { {10, 2, 10}, {2, 3, 10}, {2, 4, 10} } }, // Target shapes + { {2, 2, 10}, {2, 3, 10}, {2, 4, 10} } }, // Target shapes { {2, 1, 1}, // Dynamic shape 1 { {2, 1, 1}, {2, 1, 1}, {2, 1, 1} } } }, // Target shapes { { {5, -1, 10}, // #7. Dynamic shape 0 diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp index e4e85a776d7..3a0d6bbd79d 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_cell.cpp @@ -87,11 +87,10 @@ protected: const size_t inputSize = targetStaticShapes.front()[0][1]; if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { - inType = outType = ElementType::bf16; + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); } else { - inType = outType = netPrecision; + selectedType = makeSelectedTypeStr(selectedType, netPrecision); } - selectedType = makeSelectedTypeStr(selectedType, outType); auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp index 2de06f59a05..cc3c3db39c6 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/lstm_sequence.cpp @@ -105,15 +105,11 @@ protected: configuration.insert(additionalConfig.begin(), additionalConfig.end()); - // 4th input type must be integer, thus it cannot be forced to BF16. if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { - if (inputDynamicShapes.size() > 3) - throw std::runtime_error("Invalid test case. Cannot enforce integer input to BF16."); - inType = outType = ElementType::bf16; + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); } else { - outType = netPrecision; + selectedType = makeSelectedTypeStr(selectedType, netPrecision); } - selectedType = makeSelectedTypeStr(selectedType, outType); auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); const size_t batchSize = inputDynamicShapes[0][0].is_static() ? inputDynamicShapes[0][0].get_length() : diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp index 0ec1d3f9789..3d2097f7b23 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_cell.cpp @@ -83,11 +83,10 @@ protected: configuration.insert(additionalConfig.begin(), additionalConfig.end()); if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { - inType = outType = ElementType::bf16; + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); } else { - inType = outType = netPrecision; + selectedType = makeSelectedTypeStr(selectedType, netPrecision); } - selectedType = makeSelectedTypeStr(selectedType, outType); auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); auto paramsOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp index 31963f9fa05..baeb99589c3 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/rnn_sequence.cpp @@ -94,15 +94,11 @@ protected: configuration.insert(additionalConfig.begin(), additionalConfig.end()); - // 3rd input type must be integer, thus it cannot be forced to BF16. if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { - if (inputDynamicShapes.size() > 2) - throw std::runtime_error("Invalid test case. Cannot enforce integer input to BF16."); - inType = outType = ElementType::bf16; + selectedType = makeSelectedTypeStr(selectedType, ElementType::bf16); } else { - outType = netPrecision; + selectedType = makeSelectedTypeStr(selectedType, netPrecision); } - selectedType = makeSelectedTypeStr(selectedType, outType); auto params = ngraph::builder::makeDynamicParams(netPrecision, inputDynamicShapes); const size_t batchSize = inputDynamicShapes[0][0].is_static() ? inputDynamicShapes[0][0].get_length() : From dab1a34aa2b2153d93f131fb0bdbcd88d0647e05 Mon Sep 17 00:00:00 2001 From: Maxim Shevtsov Date: Tue, 22 Feb 2022 17:19:23 +0300 Subject: [PATCH 076/310] =?UTF-8?q?checking=20the=20network=20batch-abilit?= =?UTF-8?q?y=20(internal=20helper=20func=20on=20top=20of=20bat=E2=80=A6=20?= =?UTF-8?q?(#10446)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * checking the network batchability (internal helper func on top of batch tracking) before doing hetero * more general logic with respect to batch-ability of the network * a dynamism check that I've owed from the PR-10560 * using the DO-detached mechanism for early hetero exit, also fixed this flag in the Batching plugin (although minor, as the DO is removed by HETERO) * adding the dimension tracking logic depending on whether implicitly/expicitly the auto-batching is enabled * changed the DetectionOutput affinity markup to go over results, also accomodate Convert, so only 2 subgraphs are made by the HETERO --- src/inference/src/check_network_batchable.cpp | 79 +++++++++++++++++++ src/inference/src/check_network_batchable.hpp | 23 ++++++ src/inference/src/ie_core.cpp | 44 ++++------- src/plugins/auto_batch/auto_batch.cpp | 4 +- 4 files changed, 120 insertions(+), 30 deletions(-) create mode 100644 src/inference/src/check_network_batchable.cpp create mode 100644 src/inference/src/check_network_batchable.hpp diff --git a/src/inference/src/check_network_batchable.cpp b/src/inference/src/check_network_batchable.cpp new file mode 100644 index 00000000000..8ce148dd152 --- /dev/null +++ b/src/inference/src/check_network_batchable.cpp @@ -0,0 +1,79 @@ +#include "check_network_batchable.hpp" + +#include "dimension_tracker.hpp" +#include "ie_ngraph_utils.hpp" +#include "ngraph/opsets/opset.hpp" +#include "openvino/op/detection_output.hpp" +#include "openvino/op/ops.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/common_optimizations/dimension_tracking.hpp" +#include "transformations/init_node_info.hpp" + +namespace InferenceEngine { +namespace details { + +NetworkBatchAbility isNetworkBatchable(const CNNNetwork& orig_network, + const std::string& deviceNameWithoutBatch, + bool strictly_track_dims) { + CNNNetwork clonedNetwork(cloneNetwork(orig_network)); + auto function = clonedNetwork.getFunction(); + // find the batch dim + ov::pass::Manager m; + m.register_pass(); + m.register_pass(true, strictly_track_dims); + m.run_passes(function); + bool any_batched_inputs = false; + // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts + // input(s) should have the batch dim as the first dim or none (current limitation of the auto-batching impl) + const auto& params = function->get_parameters(); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + const auto& input = params[input_id]; + const auto& shape = input->get_partial_shape(); + // currently no plugin support batched execution for dynamic networks + if (shape.is_dynamic()) + return NetworkBatchAbility::NO; + // check the batch dim: either 0th (and the original batch size of 1) or none + if (shape.size() && ov::DimensionTracker::get_label(shape[0])) { + const auto& static_shape = input->get_shape(); + if (static_shape[0] != 1) + return NetworkBatchAbility::NO; + else + any_batched_inputs = true; + } else { + // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch + for (size_t s = 1; s < shape.size(); s++) + if (ov::DimensionTracker::get_label(shape[s])) + return NetworkBatchAbility::NO; + } + } + if (!any_batched_inputs) + return NetworkBatchAbility::NO; + + for (auto&& node : orig_network.getFunction()->get_ops()) + node->get_rt_info()["affinity"] = "BATCH"; // default affinity (ignored if HETERO is not triggered) + // have to execute the DetectionOutput separately (without batching) + // as this layer does mix-in the values from the different inputs (batch id) + bool bDetectionOutput = false; + for (auto& result_node : orig_network.getFunction()->get_results()) { + auto do_node = result_node->input_value(0).get_node_shared_ptr(); + std::shared_ptr convert_node; + if (ov::is_type(do_node)) { // cases with do->convert->result + convert_node = do_node; + do_node = convert_node->get_input_node_shared_ptr(0); + } + // the code below doesn't need to separate the versions (opsets) of the DetectionOutput + // so base class check is enough + auto detectionOutputBase = std::dynamic_pointer_cast(do_node); + if (detectionOutputBase) { + result_node->get_rt_info()["affinity"] = deviceNameWithoutBatch; + do_node->get_rt_info()["affinity"] = deviceNameWithoutBatch; + if (convert_node) + convert_node->get_rt_info()["affinity"] = deviceNameWithoutBatch; + bDetectionOutput = true; + } + } + return bDetectionOutput ? NetworkBatchAbility::WITH_HETERO : NetworkBatchAbility::AS_IS; +} + +} // namespace details +} // namespace InferenceEngine \ No newline at end of file diff --git a/src/inference/src/check_network_batchable.hpp b/src/inference/src/check_network_batchable.hpp new file mode 100644 index 00000000000..ed0efdff4d3 --- /dev/null +++ b/src/inference/src/check_network_batchable.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#pragma once + +#include + +#include "cnn_network_ngraph_impl.hpp" + +namespace InferenceEngine { +namespace details { +/** + * @brief Checks if the input network is batch-able (e.g. no dynamic inputs, inputs has the batch dimension, etc) + * @param function A ngraph function to check for automatic-batching applicability + * @return An enum value indicating whether the network can be safely batched (with HETERO or as is) or not + */ +enum NetworkBatchAbility : uint32_t { NO = 0, AS_IS, WITH_HETERO }; +NetworkBatchAbility isNetworkBatchable(const CNNNetwork& network, + const std::string& deviceNoBatch, + bool strictly_track_dims); + +} // namespace details +} // namespace InferenceEngine diff --git a/src/inference/src/ie_core.cpp b/src/inference/src/ie_core.cpp index fac529fc855..b62a7b176d5 100644 --- a/src/inference/src/ie_core.cpp +++ b/src/inference/src/ie_core.cpp @@ -13,6 +13,7 @@ #include #include "any_copy.hpp" +#include "check_network_batchable.hpp" #include "cnn_network_ngraph_impl.hpp" #include "compilation_context.hpp" #include "cpp/ie_cnn_network.h" @@ -557,6 +558,8 @@ public: std::string& deviceName, std::map& config) { std::string deviceNameWithBatchSize, deviceNameWithoutBatch; + // fully strict dims tracking by default (Auto-Batching is enabled implicitly) + bool strictly_check_dims = true; if (deviceName.find("BATCH") != std::string::npos) { // explicitly enabled Auto-Batching auto pos = deviceName.find_first_of(":"); @@ -564,6 +567,9 @@ public: return; // BATCH device is already configured via the config deviceNameWithBatchSize = deviceName.substr(pos + 1); deviceNameWithoutBatch = DeviceIDParser::getBatchDevice(deviceNameWithBatchSize); + // when user sets the BATCH device explicitly, we may check the dims less strictly + // as the result is being checked by the user + strictly_check_dims = false; } else { // check whether the Auto-Batching is disabled explicitly const auto& batch_mode = config.find(ov::hint::allow_auto_batching.name()); @@ -594,38 +600,18 @@ public: if (bExclReqsEnabled || (!bTputInPlg && !bTputInLoadCfg)) return; } - auto function = network.getFunction(); - // have to execute the DetectionOutput separately (without batching) - // as this layer mix-in the values from the different inputs (batch id) - bool bDetectionOutput = false; - const std::string detectionOutputOpName = ngraph::op::DetectionOutput::get_type_info_static().name; - const std::string resultOpName = ngraph::op::Result::get_type_info_static().name; - for (auto&& node : function->get_ops()) { - auto isDetectionOutputParent = [&detectionOutputOpName](decltype(node)& nd) { - for (size_t n = 0; n < nd->get_input_size(); n++) { - // the code below doesn't need to separate the versions (opsets) of the DetectionOutput - // so type_info name check is enough - // (if in a future there will be a new ver that doesn't mix the batch, this will be new op) - if (detectionOutputOpName == nd->get_input_node_ptr(n)->get_type_info().name) - return true; - } - return false; - }; - - if ((detectionOutputOpName == node->get_type_info().name) || - ((resultOpName == node->get_type_info().name) && isDetectionOutputParent(node))) { - node->get_rt_info()["affinity"] = deviceNameWithoutBatch; - bDetectionOutput = true; - } else { - node->get_rt_info()["affinity"] = "BATCH"; - } - } auto batchConfig = deviceNameWithBatchSize.empty() ? deviceNameWithoutBatch : deviceNameWithBatchSize; - if (bDetectionOutput) { + auto res = InferenceEngine::details::isNetworkBatchable(network, deviceNameWithoutBatch, strictly_check_dims); + switch (res) { + case InferenceEngine::details::NetworkBatchAbility::NO: + return; + case InferenceEngine::details::NetworkBatchAbility::AS_IS: + deviceName = "BATCH:" + batchConfig; + break; + case InferenceEngine::details::NetworkBatchAbility::WITH_HETERO: deviceName = "HETERO:BATCH," + deviceNameWithoutBatch; config[CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG)] = batchConfig; - } else { - deviceName = "BATCH:" + batchConfig; + break; } } diff --git a/src/plugins/auto_batch/auto_batch.cpp b/src/plugins/auto_batch/auto_batch.cpp index 53c3ef7fca4..0c7d15514d8 100644 --- a/src/plugins/auto_batch/auto_batch.cpp +++ b/src/plugins/auto_batch/auto_batch.cpp @@ -841,7 +841,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN // find the batch dim ov::pass::Manager m; m.register_pass(); - m.register_pass(true, check_dims); + m.register_pass(false, check_dims); m.run_passes(function); // do not reshape/re-batch originally batched networks and when there are no inputs with the N* layouts // input(s) should have the batch dim as the first dim (current limitation of the auto-batching impl) @@ -871,6 +871,8 @@ InferenceEngine::IExecutableNetworkInternal::Ptr AutoBatchInferencePlugin::LoadN for (size_t output_id = 0; output_id < results.size(); output_id++) { const auto& output = results[output_id]; const auto& shape = output->get_output_partial_shape(0); + if (shape.is_dynamic()) + IE_THROW(NotImplemented) << "Auto-batching does not support dynamic networks!"; // check the batch dim: either 0th (and the original batch size of 1) or none if (shape.size() && ov::DimensionTracker::get_label(shape[0])) { if (shape[0] != 1) From e2df6d149b472acecafe59e5a6fdfa6e73f2e8a0 Mon Sep 17 00:00:00 2001 From: Indira Salyahova Date: Tue, 22 Feb 2022 17:46:08 +0300 Subject: [PATCH 077/310] [POT] Update face detection sample (#10471) * support cascade model for sw api * update mtcnnengine * delete empty line --- .../face_detection/face_detection_sample.py | 97 ++++++++++++++++--- 1 file changed, 81 insertions(+), 16 deletions(-) diff --git a/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py b/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py index c22adefbd9c..d9e6cc90aec 100644 --- a/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py +++ b/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py @@ -6,12 +6,18 @@ from argparse import ArgumentParser from functools import partial from time import time +import copy import cv2 import numpy as np +from openvino.runtime import PartialShape # pylint: disable=E0611,E0401 from openvino.tools.pot import Metric, DataLoader, IEEngine, \ load_model, compress_model_weights, create_pipeline from openvino.tools.pot.graph.model_utils import add_outputs +from openvino.tools.pot.samplers.batch_sampler import BatchSampler +from openvino.tools.pot.engines.utils import process_accumulated_stats, \ + restore_original_node_names, align_stat_names_with_results, \ + add_tensor_names, collect_model_outputs from openvino.tools.pot.utils.logger import init_logger, get_logger from openvino.tools.pot.api.samples.face_detection import utils @@ -28,7 +34,7 @@ class WiderFaceLoader(DataLoader): def __init__(self, config): super().__init__(config) self._min_height_ann = 60 - self._img_ids, self._annotations = self._read_image_ids_annotations(config.annotation_file) + self._img_ids, self._annotations = self._read_image_ids_annotations(self.config.annotation_file) def __getitem__(self, index): """ @@ -98,14 +104,68 @@ class MTCNNEngine(IEEngine): self._model = self._set_model(model) self._output_layers = {} stage_names = ['pnet', 'rnet', 'onet'] - for stage, model_dict in enumerate(model.models): + for stage, _ in enumerate(model.models): self._output_layers[stage_names[stage]] = { - 'probabilities': model_dict['name'] + '_' + self.config['outputs']['probabilities'][stage], - 'regions': model_dict['name'] + '_' + self.config['outputs']['regions'][stage], + 'probabilities': self.config['outputs']['probabilities'][stage], + 'regions': self.config['outputs']['regions'][stage], } def _add_outputs(self, nodes_name): - add_outputs(self._model, nodes_name) + return add_outputs(self._model, nodes_name) + + def predict(self, stats_layout=None, sampler=None, stat_aliases=None, + metric_per_sample=False, print_progress=False): + stat_names_aliases = None + if sampler is None: + sampler = BatchSampler(self.data_loader) + if stats_layout: + model_with_stat_op, nodes_names_map, output_to_node_names = self._statistic_graph_builder. \ + insert_statistic(copy.deepcopy(self._nx_model), + stats_layout, stat_aliases) + self.set_model(model_with_stat_op) + + nodes_name = [] + for names_map in nodes_names_map.values(): + nodes_name.extend(list(names_map.keys())) + + outputs = self._add_outputs(nodes_names_map) + for model_name, outputs_data in outputs.items(): + add_tensor_names(outputs_data, nodes_names_map[model_name].keys()) + + model_output_names = [] + for model in self._model: + model_output_names.extend(collect_model_outputs(model['model'])) + + align_stat_names_with_results(model_output_names, + nodes_name, + output_to_node_names, + stats_layout, + stat_aliases) + + # Creating statistics layout with IE-like names + stats_layout, stat_names_aliases = self._convert_stats_names(stats_layout) + + self._predict(stats_layout=stats_layout, + sampler=sampler, + print_progress=print_progress, + need_metrics_per_sample=metric_per_sample) + + accumulated_stats = \ + process_accumulated_stats(stat_names_aliases=stat_names_aliases, + accumulated_stats=self._accumulated_layer_stats) + + if stats_layout: + restore_original_node_names(output_to_node_names, accumulated_stats, stats_layout, stat_aliases) + + metrics = None + if self._metric: + metrics = self._metric.avg_value + if metric_per_sample: + metrics = (sorted(self._per_sample_metrics, key=lambda i: i['sample_id']), metrics) + + self._reset() + + return metrics, accumulated_stats def _predict(self, stats_layout, sampler, print_progress=False, need_metrics_per_sample=False): @@ -142,12 +202,12 @@ class MTCNNEngine(IEEngine): progress_log_fn('Inference finished') def _infer(self, data, ie_network, stats_collect_callback=None): + ie_network.reshape(PartialShape(data.shape)) filled_input = self._fill_input(ie_network, data) - input_shapes = {layer_name: data.shape for layer_name, data in filled_input.items()} - ie_network.reshape(input_shapes) - exec_model = self._ie.load_network(network=ie_network, - device_name=self.config.device) - result = exec_model.infer(filled_input) + compiled_model = self._ie.compile_model(model=ie_network, + device_name=self.config.device) + infer_request = compiled_model.create_infer_request() + result = infer_request.infer(filled_input) # Collect statistics if stats_collect_callback: stats_collect_callback(self._transform_for_callback(result)) @@ -173,9 +233,12 @@ class MTCNNEngine(IEEngine): total_boxes = np.zeros((0, 9), np.float) for idx, outputs in enumerate(output): scales = input_meta['scales'][idx] + mapping = outputs[[i for i, _ in outputs.items() + if i.any_name == self._output_layers['pnet']['probabilities']][0]][0, 1] + + regions = outputs[[i for i, _ in outputs.items() + if i.any_name == self._output_layers['pnet']['regions']][0]][0] - mapping = outputs[self._output_layers['pnet']['probabilities']][0, 1] - regions = outputs[self._output_layers['pnet']['regions']][0] boxes = utils.generate_bounding_box(mapping, regions, scales, 0.6) if len(boxes) != 0: pick = utils.nms(boxes, 0.5) @@ -207,8 +270,9 @@ class MTCNNEngine(IEEngine): return np.transpose(img, [0, 3, 2, 1]) def postprocess(output): - score = output[self._output_layers['rnet']['probabilities']][:, 1] - regions = output[self._output_layers['rnet']['regions']] + score = output[[i for i, _ in output.items() + if i.any_name == self._output_layers['rnet']['probabilities']][0]][:, 1] + regions = output[[i for i, _ in output.items() if i.any_name == self._output_layers['rnet']['regions']][0]] return utils.calibrate_bboxes(prev_stage_output, score, regions, nms_type='union') ie_network = self._model[1]['model'] @@ -223,8 +287,9 @@ class MTCNNEngine(IEEngine): return np.transpose(img, [0, 3, 2, 1]) def postprocess(output): - score = output[self._output_layers['onet']['probabilities']][:, 1] - regions = output[self._output_layers['onet']['regions']] + score = output[[i for i, _ in output.items() + if i.any_name == self._output_layers['onet']['probabilities']][0]][:, 1] + regions = output[[i for i, _ in output.items() if i.any_name == self._output_layers['onet']['regions']][0]] bboxes = utils.calibrate_bboxes(prev_stage_output, score, regions) pick = utils.nms(bboxes, 0.7, 'min') bboxes_to_remove = np.setdiff1d(np.arange(len(bboxes)), pick) From b12c3389eeffc863cce2a49149db4cf55bcdc756 Mon Sep 17 00:00:00 2001 From: Ivan Novoselov Date: Tue, 22 Feb 2022 18:18:49 +0300 Subject: [PATCH 078/310] [Sinppets] Add virt destructors to Emitter and TargetMachine (#10588) --- src/common/snippets/include/snippets/emitter.hpp | 1 + src/common/snippets/include/snippets/generator.hpp | 1 + src/plugins/intel_cpu/src/emitters/cpu_generator.hpp | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/snippets/include/snippets/emitter.hpp b/src/common/snippets/include/snippets/emitter.hpp index 469c8794cb9..2ba0f85c5de 100644 --- a/src/common/snippets/include/snippets/emitter.hpp +++ b/src/common/snippets/include/snippets/emitter.hpp @@ -48,6 +48,7 @@ public: */ virtual void emit_data() const { } + virtual ~Emitter() = default; }; } // namespace snippets diff --git a/src/common/snippets/include/snippets/generator.hpp b/src/common/snippets/include/snippets/generator.hpp index 0deb9d9eade..e1a1fdf720a 100644 --- a/src/common/snippets/include/snippets/generator.hpp +++ b/src/common/snippets/include/snippets/generator.hpp @@ -60,6 +60,7 @@ public: bool has(const ngraph::DiscreteTypeInfo type) const { return jitters.find(type) != jitters.end(); } + virtual ~TargetMachine() = default; protected: std::map(std::shared_ptr)>> jitters; diff --git a/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp index 4e0ad438b5f..7301fcb177b 100644 --- a/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/cpu_generator.hpp @@ -28,7 +28,6 @@ private: class CPUGenerator : public ngraph::snippets::Generator { public: CPUGenerator(dnnl::impl::cpu::x64::cpu_isa_t isa); - ~CPUGenerator() = default; }; } // namespace intel_cpu From 14d31d59af5e9e4f5153e2e5fe2c3f7922cb74ee Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Wed, 23 Feb 2022 00:25:26 +0900 Subject: [PATCH 079/310] [GPU] Enable deconv with oneDNN (#10580) * [GPU] Enable deconv with oneDNN remove post-op data_type into oneDNN. Signed-off-by: hyunback * Update to use data_type in conv sum post-op. Signed-off-by: hyunback --- .../intel_gpu/src/graph/layout_optimizer.cpp | 10 --------- .../intel_gpu/src/graph/program_node.cpp | 21 ++++++++++++++----- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 1d5422cf137..12278270388 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -765,16 +765,6 @@ static bool is_node_for_onednn(deconvolution_node const& node) { auto spatial_dims_num = input_layout.get_spatial_rank(); - // oneDNN doesn't support sum post ops for deconvolutions - for (auto& fused_op : node.get_fused_primitives()) { - if (fused_op.node->is_type() && fused_op.deps.size() == 1) { - auto eltw_in_layout = node.get_dependency(fused_op.dep_start_idx).get_output_layout(); - if (program_helpers::needs_onednn_sum_post_op(fused_op.node->as(), eltw_in_layout)) { - return false; - } - } - } - return onednn_valid_dt && onednn_valid_params && spatial_dims_num <= 3; } diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 5d1dc6a6aba..a73f0833d8a 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -388,7 +388,11 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const float scale; dnnl::memory::data_type data_type; cur_p_ops.get_params_sum(idx, scale, data_type); - new_p_ops.append_sum(scale, data_type); + if (is_type()) { + new_p_ops.append_sum(scale, data_type); + } else { + new_p_ops.append_sum(scale); + } break; } @@ -665,7 +669,6 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const } else if (sum_and_eltw) { dnnl::algorithm alg; float sum_scale, eltw_scale, alpha, beta; - dnnl::memory::data_type data_type; dnnl::algorithm next_alg; float next_scale, next_alpha, next_beta; @@ -686,14 +689,18 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const // Try to optimize eltwise (any) + sum + eltwise_linear (with beta = 0) chain of operations if (can_optimize_eltw_and_sum) { + dnnl::memory::data_type data_type; p_ops.get_params_sum(cur_idx, sum_scale, data_type); p_ops.get_params_eltwise(prev_idx, eltw_scale, alg, alpha, beta); dnnl::post_ops eltw_p_op_prev, sum_p_op; eltw_p_op_prev.append_eltwise(eltw_scale * next_alpha * next_scale, alg, alpha, beta); - sum_p_op.append_sum(sum_scale * next_alpha, data_type); - + if (is_type()) { + sum_p_op.append_sum(sum_scale * next_alpha, data_type); + } else { + sum_p_op.append_sum(sum_scale * next_alpha); + } add_post_op(prev_type, eltw_p_op_prev, optimized_p_ops, 0); add_post_op(cur_type, sum_p_op, optimized_p_ops, 0); @@ -823,7 +830,11 @@ void program_node::init_onednn_primitive_attributes() { if (e_node.get_primitive()->mode == eltwise_mode::sum) { if (program_helpers::needs_onednn_sum_post_op(e_node, in)) { - post_ops.append_sum(1.0f, onednn::convert_data_type(in.data_type)); + if (is_type()) { + post_ops.append_sum(1.0f, onednn::convert_data_type(in.data_type)); + } else { + post_ops.append_sum(1.0f); + } update_onednn_post_op_list(onednn_post_op_type::sum, dep_idx); } else { dnnl::memory::desc in_desc = onednn::layout_to_memory_desc(in); From 37923a9183817d3ae8d4b4aa5340b7934538ea82 Mon Sep 17 00:00:00 2001 From: Liubov Talamanova Date: Tue, 22 Feb 2022 18:38:08 +0300 Subject: [PATCH 080/310] [POT] Remove DataFreeEngine (#10600) --- .../pot/configs/data_free_mode_template.json | 34 -- tools/pot/openvino/tools/pot/app/argparser.py | 34 +- tools/pot/openvino/tools/pot/app/run.py | 4 +- .../pot/openvino/tools/pot/configs/config.py | 17 +- .../tools/pot/data_loaders/creator.py | 12 +- .../tools/pot/data_loaders/image_loader.py | 3 - .../pot/data_loaders/synthetic_background.npy | Bin 24128 -> 0 bytes .../data_loaders/synthetic_image_loader.py | 327 ------------------ .../pot/openvino/tools/pot/engines/creator.py | 3 - .../tools/pot/engines/data_free_engine.py | 19 - tools/pot/tests/test_cmd_params.py | 2 +- tools/pot/tests/test_data_generation.py | 51 --- tools/pot/tests/test_sanity.py | 18 - 13 files changed, 10 insertions(+), 514 deletions(-) delete mode 100644 tools/pot/configs/data_free_mode_template.json delete mode 100644 tools/pot/openvino/tools/pot/data_loaders/synthetic_background.npy delete mode 100644 tools/pot/openvino/tools/pot/data_loaders/synthetic_image_loader.py delete mode 100644 tools/pot/openvino/tools/pot/engines/data_free_engine.py delete mode 100644 tools/pot/tests/test_data_generation.py diff --git a/tools/pot/configs/data_free_mode_template.json b/tools/pot/configs/data_free_mode_template.json deleted file mode 100644 index 8c621fb7b6b..00000000000 --- a/tools/pot/configs/data_free_mode_template.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "model": { - "model_name": "model_name", // Model name - "model": "", // Path to model (.xml format) - "weights": "" // Path to weights (.bin format) - }, - "engine": { - - "type": "data_free", // Engine type​ - "generate_data": "True", // (Optional) If True, generate synthetic data and store to `data_source`​ - // Otherwise, the dataset from `--data-source` will be used'​ - "layout": "NCHW", // (Optional) Layout of input data. Supported: ["NCHW", "NHWC", "CHW", "CWH"]​ - "shape": "[None, None, None, None]", // (Optional) if model has dynamic shapes, input shapes must be provided​ - "data_type": "image", // (Optional) You can specify the type of data to be generated.​ - // Currently only `image` is supported.​ - // It is planned to add 'text` and 'audio' cases​ - "data_source": "PATH_TO_SOURCE" // (Optional) You can specify path to directory​ - // where synthetic dataset is located or will be generated and saved​ - }, - "compression": { - "algorithms": [ - { - "name": "DefaultQuantization", // Optimization algorithm name - "params": { - "preset": "performance", // Preset [performance, mixed, accuracy] which control the quantization - // mode (symmetric, mixed (weights symmetric and activations asymmetric) - // and fully asymmetric respectively) - "stat_subset_size": 300 // Size of subset to calculate activations statistics that can be used - // for quantization parameters calculation - } - } - ] - } -} diff --git a/tools/pot/openvino/tools/pot/app/argparser.py b/tools/pot/openvino/tools/pot/app/argparser.py index bd919246e88..d5f8fdf8f77 100644 --- a/tools/pot/openvino/tools/pot/app/argparser.py +++ b/tools/pot/openvino/tools/pot/app/argparser.py @@ -50,7 +50,7 @@ def get_common_argument_parser(): parser.add_argument( '--engine', - choices=['accuracy_checker', 'data_free', 'simplified'], + choices=['accuracy_checker', 'simplified'], type=str, help='Engine type. Default: `accuracy_checker`') @@ -114,35 +114,7 @@ def get_common_argument_parser(): parser.add_argument( '--data-source', - help='Valid for DataFree and Simplified modes. For Simplified mode path to dataset dir is required. ' - 'For DataFree mode specify path to directory ' - 'where syntetic dataset is located or will be generated and saved. ' - 'For DataFree mode default: `./pot_dataset`') - - data_free_opt = parser.add_argument_group('DataFree mode options') - - data_free_opt.add_argument( - '--shape', - type=str, - help='Required for models with dynamic shapes. ' - 'Input shape that should be fed to an input node of the model. ' - 'Shape is defined as a comma-separated list of integer numbers enclosed in ' - 'parentheses or square brackets, for example [1,3,227,227] or (1,227,227,3), where ' - 'the order of dimensions depends on the framework input layout of the model.') - - data_free_opt.add_argument( - '--data-type', - type=str, - default='image', - choices=['image'], - help='Type of data for generation. Dafault: `image`') - - data_free_opt.add_argument( - '--generate-data', - action='store_true', - default=False, - help='If specified, generate synthetic data and store to `data-source`. ' - 'Otherwise, the dataset from `--data-source` will be used') + help='Valid only for Simplified modes. Path to dataset dir is required.') return parser @@ -160,7 +132,7 @@ def check_dependencies(args): (args.engine == 'accuracy_checker' or args.engine is None)): raise ValueError( '--quantize option requires AC config to be specified ' - 'or --engine should be `data_free` or `simplified`.') + 'or --engine should be `simplified`.') if args.quantize == 'accuracy_aware' and args.max_drop is None: raise ValueError('For AccuracyAwareQuantization --max-drop should be specified') if args.config is None and args.engine == 'simplified' and args.data_source is None: diff --git a/tools/pot/openvino/tools/pot/app/run.py b/tools/pot/openvino/tools/pot/app/run.py index f7677237d5d..3c3f2694c22 100644 --- a/tools/pot/openvino/tools/pot/app/run.py +++ b/tools/pot/openvino/tools/pot/app/run.py @@ -39,15 +39,13 @@ def app(argv): if args.engine: config.engine['type'] = args.engine if args.engine else 'accuracy_checker' if 'data_source' not in config.engine: - if args.data_source is None and config.engine.type == 'data_free': - args.data_source = 'pot_dataset' config.engine['data_source'] = args.data_source config.configure_params(args.ac_config) config.update_from_args(args) if config.engine.type != 'accuracy_checker' and args.evaluate: - raise Exception('Can not make evaluation in simplified or data_free mode') + raise Exception('Can not make evaluation in simplified mode') log_dir = _create_log_path(config) init_logger(level=args.log_level, diff --git a/tools/pot/openvino/tools/pot/configs/config.py b/tools/pot/openvino/tools/pot/configs/config.py index f97af55e656..72724cfbd6b 100644 --- a/tools/pot/openvino/tools/pot/configs/config.py +++ b/tools/pot/openvino/tools/pot/configs/config.py @@ -63,19 +63,6 @@ class Config(Dict): self.model['output_dir'] = args.output_dir self.model['direct_dump'] = args.direct_dump self.engine['evaluate'] = args.evaluate - if self.engine.type == 'data_free': - if 'data_type' not in self.engine: - self.engine['data_type'] = args.data_type - if 'generate_data' not in self.engine: - self.engine['generate_data'] = args.generate_data - if 'shape' not in self.engine: - self.engine['shape'] = args.shape - if self.engine['generate_data']: - subset_size = 0 - for algo in self.compression['algorithms']: - subset_size = max(subset_size, algo.get('stat_subset_size', 300)) - self.engine['subset_size'] = subset_size - self.model['keep_uncompressed_weights'] = args.keep_uncompressed_weights if 'optimizer' in self: self.optimizer.params['keep_uncompressed_weights'] = args.keep_uncompressed_weights @@ -309,9 +296,9 @@ class Config(Dict): if 'type' not in engine or engine.type == 'accuracy_checker': self._configure_ac_params() self.engine.type = 'accuracy_checker' - elif engine.type == 'simplified' or engine.type == 'data_free': + elif engine.type == 'simplified': if engine.data_source is None: - raise KeyError(f'Missed data dir for {engine.type} engine') + raise KeyError('Missed data dir for simplified engine') self.engine.device = engine.device if engine.device else 'CPU' engine.data_source = Path(engine.data_source) else: diff --git a/tools/pot/openvino/tools/pot/data_loaders/creator.py b/tools/pot/openvino/tools/pot/data_loaders/creator.py index bdce5a3ccb0..54cdf5935da 100644 --- a/tools/pot/openvino/tools/pot/data_loaders/creator.py +++ b/tools/pot/openvino/tools/pot/data_loaders/creator.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from openvino.tools.pot.data_loaders.image_loader import ImageLoader -from openvino.tools.pot.data_loaders.synthetic_image_loader import SyntheticImageLoader from openvino.tools.pot.graph.model_utils import get_nodes_by_type @@ -25,14 +24,9 @@ def create_data_loader(config, model): data_loader = None for in_node in inputs: if tuple(in_node.shape) != (1, 3): - if config.type == 'simplified': - data_loader = ImageLoader(config) - data_loader.shape = in_node.shape - data_loader.get_layout(in_node) - elif config.type == 'data_free': - if not config.shape: - config.shape = in_node.shape - data_loader = SyntheticImageLoader(config) + data_loader = ImageLoader(config) + data_loader.shape = in_node.shape + data_loader.get_layout(in_node) return data_loader if data_loader is None: diff --git a/tools/pot/openvino/tools/pot/data_loaders/image_loader.py b/tools/pot/openvino/tools/pot/data_loaders/image_loader.py index 54e90950403..2a0f16fe118 100644 --- a/tools/pot/openvino/tools/pot/data_loaders/image_loader.py +++ b/tools/pot/openvino/tools/pot/data_loaders/image_loader.py @@ -64,9 +64,6 @@ class ImageLoader(DataLoader): layout_from_ir = get_layout_values(input_node.graph.meta_data.get('layout', None)) if layout_from_ir is not None: layout_from_ir = layout_from_ir[next(iter(layout_from_ir))].get('source_layout', None) - # SyntheticImageLoader uses only H,W,C dimensions - if self._shape is not None and 'N' in layout_from_ir and len(self._shape) == 3: - layout_from_ir = layout_from_ir[1:] self._layout = Layout(layout_from_ir) return diff --git a/tools/pot/openvino/tools/pot/data_loaders/synthetic_background.npy b/tools/pot/openvino/tools/pot/data_loaders/synthetic_background.npy deleted file mode 100644 index 28da19dee252afceda0d0cdbc8e6b2780a967b9d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24128 zcmbST_dnKe7bhAj*+fZ5GNK_Wl_S}kd+(c+WMq_C6e^*ij7VfBqs+34kVrx)GplGQ z5+(JV@4xW$TVB%r`CQjI=e>^WRzyo1Dlg9|on+vS$(7O5f3jP zFRnY=!|Slqe_uD^x_Ud|*S(#&C!Fx_Teff8woQHGP8Icyr#Jq8e^xf0tt)kKCqY+_ zRg;P&9ZG@rQ+(3t) zjk_1Uep>)zSu-|F@)(2@!2p##bcCU~oOGEhEqb z>Wg{iRdg7zMQf*xa0wG8dM^k3IYou}{qv5F%ehb{M5(>}hXW!ls_7hidsy6>>0Xsa zg1G~lQVP%LpqrYo=zEO~QppcO)=U$i?WUe}l^G3$v}8GHyO`i(GM$zuKnJb?=a7^F z2No^jH)lC=;IYtbzm6XRdc%+Iw3O$-8+W0kyK6Y`(3rB8q)vkOSB}UXvtz=m;VwG` zIR-e|>iH}4Goa*i#q?$;Hrz4V6wCLI4eMME-yL0`fqj;??inK%oWILsSZ~CJWJ#{f z>N`|W&Q{1DE9bx=?TIss8<@~~``DgeBQ$6np10^<#(@~mJdZ2vWL-8LGi`s~Vb>ZO8*32RLC8x5kklFLRnv*BzI zU$>Yr6Q;Ck+uK%A!MVO@GfkWc>UsZ4zrA9B{S_b0&0;hdy(&I*6Ti>tsBfAuK3}wW zipEk!M+oqLRrxr=0i15zuykFy@HyJ0R49-G>YWk9kbNxp(q6ARFGh!derL_CWmw>q zDZgDuCLP8qDWRr(J$bg>7RKO{&p0aD&(WT`rjh`zJ?lB zVM>SitjEvT(oFFB$hX*J%7T#y2ZB}*1I9V2RYQkJ(B5g|&3lOnXP1B2SAUKH>s=+3 zLLBg(T)v?vDNP1P-sx>|^>kQj7U@wf&w`&+x~^aX33zWm$<;nVhs>JpULGQzgLRF# z`35qC)Unp~J21h%)%fV(C=0H{WczpY62a3svt;lr6>|0lzK~kahV|mci6ueyFxvN9 zg+--8!ASH$fnEym39XI|$g+nbQ`;3XTUoGGr*f1L$$(c;B9}tHP~cUdZ)Dzj7P#J& zjoBf<1=A{j*QZWosO$M>5FA7Yxy_I6pGaoG>3V;f4cLS0;TO3RiBzc9@#r(a`czrn za#U@B3dzkA-CPEOD~;E&A^1T@h|oGZJ^iA&5tLO z3Ze#|zm74<@c2h?Zof1Y1eSZ2gkrr#)bamnuwlbSK7ZBN$3&QWWb3!fjt)+2!95#7 zS#ao^?0oJa8n|58T5+|U1?ysq4%`T1LrC}u&HfXXz*`_Pe7J=Oe`@ATvKiMvx&=xpLfX`xga-Ex~(7tnVpje3s{g-Q|A6~$7`F-*f zx04Ent|NOZMHpZa=9KighzM>G0R-WHn71iJZ(0i$(RF!(X%=Dt1>)_2SP zj1D3c|Ey~8ct?lO#!&Xs0v7OnNIx&ALj|wa?Isr!Nubj=A>>bH!t?S54W;v3$jIka zX|$q1&Wyr@^Z*?ycTT1~W3fPQk*;QchYT4G0Yk)<3=qQmie_R9l ztZy=bdhc3pkuL`dj#kFFzQ=Q4Exa-2Fb9_1enEYAp9OZh#OT%!EKuORCOy!K{X9D> z&4=Uw7LFp`K@ntVkrF!g)RqIfy4(k|8}PobFt7ZlNP@N5=l{BYri0Q!+Gr*n`$;_0 z&fSv?8q2PJl6b@bO6PHT{$LuMQY1E+e`mpGpMdwSksL6&)u5f?!G$$9cF82iQQ*{} zR@Xib1C+WVX}6!SVMb1s80JTZLUH~h1Z57$F@6i&--B~8^8IRHvEd~>KtJ@j1L)tS zu$>DW!Q)5nLi7w7wopEd?;hnJ)9q9H_tl$?^an7Z_Cj zx%3t~$opr?t#o0+wCCfC#MLYiDC#~e;z|RhzeCG4O<17+$?cmw_Nl$ib;HTIH2B)) z>_OPhfwxjOzO?pp;f}KCc_}=XQ_Ag2wUmmvS zz}BUg+>#^NAjGr7L6S@X+f^UGFWPf$LL9hR?>G#}t(~Fuq_U-G=jAj_>9p@?j#}syppY`ACJr5|^O2J{0H)5IU4{ z58uN%w^!JIGA2LaZ#278lQKik0&xgtg`b?RXQDvY8Flh z&9XtSU2t#6W2|e9l?Q!2>9C-+IYx8`3sRWRV`n%lINa}M!1f?Q<_}T6Tg&O-;@+!1 zp+*M{%EW`zZ8Z3)w(g8Se(uvxCTO8d4wU`wb(1h8z}Z%NqZDrn`0P(1-xH@o(aC#W zCb3j#$mCtyt;~Yy<3Gb{@!Wb|DQ6pPq`}>@RmwcZ4DiXRviRl9gso9^yvuh`AX>v{ zyZjM4T-UhQ#687;vO$vVSL}=WZ4Y*PJJ`W@p-m5Isiq*Z-6|}VK!U`*eJ^R_6sUj8 zJ=^n^1`Wq{ch_{#Kygp7llKe@l2othobn{VcjvGip(=Y|79QpwaHN1))`IlElMJ9; zhqWSN7VvCeS4)tg9lR}j=cYJf3#QhZdHYkz;Hl*Oj)-$QYnkVrurCbo-muoH{}=(z zzcJLzUqOSu_vSsz#2C;}sQ1@5n*y{wX|+q#nLtxma$wPg4U%WOxjr`!y09y(NAJO*IUvceVf|w3!LATMOU59ufsmyw>riu zhYDg>r7cfyMVuGB*E=thjkq>sG5#?D);Aj*;MJsnb?Peh2hW)BJH>$LI!J_qNW~~! zM=E^#v}CRkajuMUpxNyt5(xND$v#Q^CgrxeQ3!{>d;vhYgm&i(V`}obLrDTRO%_kRj}CA?8E{pLP@5Iu`~Ic0ASN zBeS9T;*vqN{T#5^eJ^xJI|B~;TD{~IWq@7V)MrtwtGJTm(eHd&Q2A?deVGCslH~30 z*#Pp_oXLQlsrjo)x7)+ev)ylPxny_{eoATTIvsIs=3H?K1t@(teEYXB!DavL z6#=X1@HDn@k)BI}LsC@P4`UQa6}@xd)F>HFUt^115GTRJc${(J9|kOYIC^MmfdLhF zv`2jWsqk<^uduTM3kIKP>-rQjLG*{lH#ZwHkk^EKdy4&Y$}#)MMK^pd{k;eLfeZ~5 zoi9gEvEb>#>E-A3nb67EbGma43-0g<4fMQcLxxe(rI)EJkbAJ~Ns$`%nbZ0w=k~F| zmE`I+Hq3$VuvGWQ7B&nS?ffWn3Gwg4Y}39u4rsPH#y|MQhE~VTG2aLrAgTD=s!`>@ z$90nGAucQkUeDkaH{?LbP_nVG7aQ)*Z6^xFvw?iYevyoQb>%%}=L?Pa-t`Q)k#<~= zpH@%ecjSW9gj7)Lc@A(356(KDWx?u%uJGfJSO7{I-`D^59@ehgwr>{$HgD=6Dp(=z z_RFx*iDJSbB{hX~Z6<^?(d{L^&_VxM$XWVL><50&TMyT8VDncu1Q(3a)b_ten_jvzq5yRT@j0ETzp=qE9Wl0V}qf&-S4g8W)QO^d!{jf z0dK_b-7%#wL1#|_?do$H9GO4H4r!x6MZLFG_#hn~?or-zG=dE+3Q;ZxifE8J2`_X6 z*`T<$vd&W+=hVVJdl7y*?};1U~F6wFT_7p8z-%+uL_$QSDLH;N|N zvSD{pzq=ln0$J(gh7Ia;u*fieduNORi3SM|#k|O%bn)K#8hqZh_f9oy++u=fj!UO@ z0~!9Ux4NL{!+@X>zax@t7FbI~7=7MNfw}m^OTDsq58hUN`;C2c(RF2I;a>{GbUyh| z_>BqtLLu78VmQ~>f3@@W(7;*wQFF^D7Vs#P^8PMlL%LWvH9Ct7>q)|nH&f`KrE&Vc z3Erpoawc6awG^1lSjc!}#sEu!-t5OBELgMRb;eJ`RROK)g{1;4Fn->wqHn>08yWgl zM%7f9l8OlzN8Uw}i0$dy#)hR6jae{EhJ-iUKRMyNEOfs$^{Eo;t2BnLa*+yLiETCQ zbOvY#@TxGDGr^9kJ?5 z{JFqwpNe~n^E$=gT1i?m9i#*%2F_ufx0xKcRCEG)=&mxU>C4E=S7g;E4>N&ht!qsd z;_b0Krueuf4cfAPTCLxY_X-&c>BbQ$c6M;J}|vOxXA9)9dObOc01@z9ok^ZGE3_E7;N@@saZ? zm6r^dT4YzyC#f*`$MDF$|LT3b3C(qRS^{{7>0O@{?7Wh#VhDGCJCyhvWBjrIPtdvi73!zg--!b(4UZ^8eR z2sx-52r=~{rk8=u%lE&g0`h^Hc{|z&N(DVB8(wm)NayD>fkS;=BKe ze=i4Gx?~J)-J^rdY}l5`6vTh>#cKCIu|a!@e%=wB*YE0Te+i4R;qqd(jxqN0VE0&Y zD`OhC-ffutTgQfaQ85XPEDjj$Z@eq~o(MX3pYK@moeH@{t*gCpJ{qL#o^;0huNf2i zG}xU18bzDeKh|eM3|PcxT*iJ?Q((NWo(-#oA5z$DY%uG#Uz&nnKXOXc(h&P#QRUbn ziE$>l(K23E7cxM;x=V4JJL0OD3RVt2UuE+);Z8iSM3sy7*>_kV>EB^jQjWZP&fvoL zL>gRn?jsIgWr3&Dcn2qx4dV$dLJI3CAT#50ZmR|p27f8rRn)RzbHonvy}dLzlvw?9 zx{e7WdlFibjahI#;i-6{Bn9YB*GYHdXpnX0_p?82Cgl3$EH*D?z}Z7TzF)UvK~BR_ zx!;%Zy_LrtI>@3!#^%4D)v-@r7x}X@19^egdl^FQ6a{#kHTYf;5%(YdwGlE|?7_w_E16FA41Iyr)qOxQ~(N@_7AL8w<% z;oU|WY@D^t6-N9!uPL$m`Vis@krxUzGIZ#0y3%g4nFZAYL5#8-0>mw5jfd!wVJ_8l zuI308HoqPI7lAx#>m=1i9_N&-%3Yl@3dsmmElPCpR#m<7G-< zb{7r!WL5cfo7up>pX}g_^LCB4OJPDY1(JV|qm(}}p?>MHezo1GKS^r53ph*xu?IT4 zohb}p?r-wuU&)4o)v>fTKI}WCa(9Q3mx?#=a_sUr(0>1zm;NIfWKb7JH=kz1*N*S6 z+^!^F=_f(_muf|VJrgqPj7`0U+2H*rYVzF@GDs%dhYuUlpb|bmd9alZF=0KX zv5&}*e%1Ts@IeMx|9!OS7xJf9l`iWh4^csKmtJHt&fS3J>PIEB@toy|>V2rc{2Y}2 zOfRRwzAK5pU!B3Zy=}6{$_~%v!qR-{I^@^8_?(SvQI~t}GVN!9?|+H^eiaECXjc?0 zhTu8qpJNn7E~7%#b%A8QAJ`Wj$A6`+V}aBC<2`kOWGGu;>1Pb%`94+8PI%7(O6C6K zm!~K&-M!tjyPg4t2T~>Me<5#B<1We$&|sO5RcEUJ2R8lLaitt}Je`se+ngji(4UDX zE*)S(P=aEs@Fo@repzEI-$sXLpX96m#*q;eovg~ab@bHP8 z!Mr5{MmN81{J}?sH~UqX4@6n8rL@mt26ef8K6{>CFr$Ku!}9yTN?FjhG4D&m9;}zt z+V6Q|G}!T0d~u|Q1sjtx(}rKLq5bnZiS2YMG;c3cyIRkL{ZXdU=2&MQ`E`4k*QxNk z{L13CY3%!qj_vGbCOo}fbIz}e1lm(y&Mm2-!o;IRhup_>n9eV$^)#UXr%^fRodyFg zx@`)|?O?!_h~zpR)I->FWdaedc&-;lN^OXAXd!g)2G=lvT4U<-!I}dq_seRfoH*DQ zJ6MUxQ=)d|o@vB+x#F6;8h4Nl&yHEN$vV+tbFo&Y58^dKdr~BO3u+>LciQ~djJyxm-B6!pQnMRkWG2s3MR;m zTYZ~Y&xEy3j`g>-XmF=Eaerqc>Sk5agbgB0u+@BiELVsOdsTkOoDx7hV5i^xEfzoj zZfEtcBNVvEZ_P|Yew(UO=65-X3F0FM$CVw)U@5OWqaK6!+|KgSjd&IqsC8~CTS)=+ zpLs6})-mA@Nh*I^4I831bu1oa4W6o~=#yZ7TbXI%RuO{m4Pog{Dg%7vb06 zEZG_q5yylCsqTj3LR2WGDhcPK?kf;(f5Wtz3GceZ+_H8v!1nUIbr6{aGI!6XUghOL zP}rpe!y)8-NxGB7Z8*1H6Q2&7v%sv1@XsQc0PvCVWp;-N7FGNi;#D)D<#;g!TT(_3Y9ICZX&LbJf<>a>_3m@r_F zcQRb|CmqzoY<14@lHln&H;YrK!|UW+A=Cx3pnt{A`>U={;ks`6b48r@0m7QC&k9(m z_htn0l#pQE%aVQf)EKaO2X`wGaf+C{V#T644f0$pe>OBQ;M9dQdL+)H{W3>#7Oqhs zNKucv8hL4-uJga9eg;g8HkLVmq=V^#Y2tRg{||3I>fBz#0zd7zE%S&&f99-CP$*@> z@-*JId1KVg{J!|t$B@CtOjYYE>h>39{|f2MA#O2$Tov&jKU+~5q+i2?)goixh=>zk ztX7px`G9k(W8K`sQ5J;zXZBY_aln1k3x8i*DmeYSB-(@b$MD$Z2e1R47C6O{O{pSz~{u!^)2QSBlLv zFv#FhVsp@cm^mb?iaOw%tuF>NMcClX__d^-pADmfA%yrL8VIiPkgCwZbB({hs5`-e z{oYgK$FN^T)xMjP?4v>ad%m+IWd@`eM_YeSpu*DwrE&tl5pP_TjnVo}2RGrbD~{!o zp`baLp?RJO&BZ?x4~lTWY}xk4nFc!4Q$(y!GuWWz|1kDNGzYBSDW!y@6CoqP_c0f7 zWmPk^Z?^*8!>#n3MFs&plD!vVwxKS{(*G*Gi2OSEmgcQ4GK?CTI)(|cpnlik0u`Ls z6t(ar_fXGyyrz)%D9(xF*Vi2BLSG^@YUsq2AnI%Z8p@IT7?5$|3ZrQY;&zW${U-;I z4|6+dXF3^>{<-C;ixlG9c_GPfYiaN*NBTjE3gRYd@jWjPrx^Y`CUy2J8MFnjIC!G2 z6g9Lf#L&yKa`%%S5RVw@g(1 zCc^xKRhn9L6i|J$Y|E!;I{1vpIc?tA zXif)koJvUwWkBAW2H~rt=sT=VZ_`2lsOpFE&BMrZ$2H`9&KEJDV4cn7217Q~jejqW zM|`KA)o1E7We+~-&+j@a;XS>TTE7)>z{VG`v(DRykhfueTU`$g)>vv>Ejq)3TJ=%E z<$q9*J=~*8IztE5t-IdJzh%MD=j>8v4GO$4kxqSFhCDKakNf%s2S&2xN1k1vz|6Ok zx9yS{uynu9#ZlBPACH}t{iIC>s~)XJ&IJaz_xFgVRMX+6Yxu!SVGb~|Gi9+r&jJ34 zc59x<;llZ$ZNF;wkilEty>omy4ZIg=9gS8rxZ#*_V-occ&nzqLlow3I%^#NK0vi|? zZQPC?p@7|8ztl?XkH=!C%5Gm_LkB@XbP4j2D<8)r3@@15FUk_jd5B=3 ze0yv?&c&}=UPKY7Y%rgEtla&Q1D4&5Cb-`iqGXmeTb+{29F4GhsmTG)$d4yw@aOSAAJ+;zON7s+q6*TfZ0HZm@twhXZq8cHsOoahWZ-iaIB7KTWMUM}+9a&4K?Cu&!^AX9$9*V?WMc?-)pdv0b^>ODoX7 zNUM7gI*89bY}_K(L4v_OrRYquV06o#4!^xDkd08$5o)7?{Iyw1=PvA1yoHkHIB&g+ zy5lDUNnpyBu?a>z!k-z=MY&1x^(aJa!xFoI-qVru?vZ(;g-ioE#ewvmn6Mx10YFjT+gUU_nP)Ce(JM1TiT(&5U#zR@V=M@n$-G(hdwi*Y-7;#hK+Wxu=T1ED$MZF z-$eaUI2DY_w3nFfumwL&{tKGezammMbbLpgmD^T2V4`6UT5p*D3=!Wf?o9gqu9F6` zKU@4>yr)3oR}E681`Vz)_w#woro*Mb5`1mr$RDjXuHCnR0p;I4S^qX7&N?3SbSLV` zH#Yo92;5Bvk7v(xtzXiC*$_Dr>q3FX$Nqnws3BjAbgbR~70>w;!*50y0S8wnU|RHWzv1M6sD)E*%6-yo%hk=#bK+63}{q1q%=S_o~D@ zz?k>fB9|ugLH395SZ>V#kA=Ez!FQ~g+n2mYb%7BFor|c7M zRJh}QxOXS^%`KUgMA|45K5i{Keg6{$ZY52$)#1nnW%g`Y7%%3_BFBbtG8rOTYJiwgWN>Wj^D zSfF^0-||2Gv9~o}>U}E}tW_TP?mL3~`T}P$SdtBG$=8Ke)6tiSY#@)8(t%H3Mq@SV zA?Kscd8%I^!{q|+MLk8tUxMRDKXo$U&l67`i_fSJWL+K3M7|fn@vKirJu7RVwD#$1 z#6uZl_Y_b!SZ_7@X}dTFhHjE-E};K+ne$oyStJ(>e!k?Js&#;s*Nq!pqS24w5hKj3 z!9MUS|50)V8yZ5AtZ$o>q5gx$r9BylcT4?V|3-b~t5^H@J?!rzoAiWc6xhJ0?E0oY zi35R6fA0K395v#`H7nM|dDFkHQ5$)y?P0fpJk+}^ri;DhfdP$;ZE|ccCK&Gfr(lRU zGlN&)M0P730xTBZ{->j?52z|BPp5#3hNu2E#FY=#L=Qej|6z+~Hdo*z1uSEV?hPM6 ze}FW6t;d%RhNNL{6Jsi5jyAQNN8D^J(I@j2efFe{*IG%51aP&9EYVy|1+x{DHM6l; zN7>}!nOh|AjW&BIwx14ux!a_=pVGl>*P-c09|=%bv?`^m5QBuI^i=aG+!j+N}oW56^wOF`3}2npHwdcr#B zM~#XZl2QM9>b=zbb14Pltxr9rY-a#Zw!_(j*yn9Ceh2g-J}l?c5h%?tTfI-To(Lvi&SsH~Xz=XU(qGr?kzW=wS87X< zAgG1tJ&gTC@w4i=n~_Z5>)1*-g#WJJyK=>4)ZL;C)U?i_&++}l`|FchWaupk9NJ|_ z2b0y0b5?J}^NBcYN-!lsiQ>9Vn=ntH8r_tZUr&dI&nfq*(+TxN0d{uttuhW%eihsggH>x=guAv->d z0G&TCrT2=^pm+Y+(50gkkR^4F+W?C@ZoDnOq`S1tNi`V$7^l)ohHP^3fE{^Noy6XW3%@XeY@wVFnVnM&yYI{d|bOsY?IjVGjm&(wGjaZugq6) zKVpCV*O{Eop~2brjSotf5ujbzM_cC_8FAmlZmDAQk4^fwkj|kVS+~nd_ALWMQj<;| z3B(-1v%$8Tc#iy2dK)%8rNZa!cVc-6NPFCQSsAY+mo=3=!&Iw#o-z#d+TSf*co%I5o7-CenxjnNhdy zCSvYLWkz$E;w>8RkTiK;Es$XGqq9twG7TL_w2_sJvm$xC#{6(LwY-s=ICK8y9gap!Zm=G1zJ~@p(tPH7OXfT>k0-UN(tv$9<7w$qTTlpI z);xH_4!XX@CQ*9`(EmMqSP+vMZYpzOvZi)$XXU>J3kDfxJeL1E-pU2ufw+JR862qX z39aQvUbbt@`Q2AT2Z&m0rf?*W3sbA6o>N_s*JnF68G1Q@y~^)=_s6IUTog{Ybc_Y` zK3~_V2e$BFs-`Cu@m)l^`CWx`RH(cfw0CL)`dEJ=&Q_3UFq2H%Al=J=*TT+`V^$pa z2Wn&5o48P0>J;ND#D%K2+U{{%P&XAfF1aJhg(HhCDq?rJkiJr|)Wn|x^V?K+T|&Kx ztKrhr<%9Q-)cDsVfDX&9T={WK3iC^^_um_uN1rzB;=`?&$1q8u=1bhA0^4+%%45`V zBY^+JGvq%Xm$4&-P!}*uZ+cy_75$vkyCtSk2l){{^@%4Nd8~zemOkRNf|}`}Z~I7) zuKe82awqnU=RA|G>1;Tx7_MOMNdk-SwYG@@6fkNke|Pd1=5AC9x*|5AuhVp5UHw7S zwOQi=GN}Iry2*H|BCl)ayB(2){ls}o+pGnZ3o`{1FU@sD2#qxVhu@dn*SR zOl=-~P;C!U9orf9sE1WNvAjMMhk1=QGlwz6nbqOu2|0;l<#ikF7cAjt0% z4}X$9Se)%t?|4XtqWv4=@1PG=8EI(!68X`@($a^XJ2;@Y{*L?jQZ^Wg-iX^EfjH7b z@8v2vHt5<#1vsMau`I9HVmbP&4@TGXm?4jD{BeJT(T4o`qDcAC1k9n`&0%jo&xXAl zhZ~b6I8b%>vY3pb13Xq3Gw9_wfOhK})_p-okW~{anz)O84nK>)j>Uc+)!ueHo(24} zPG@TopNKj-srLP%gUa~JWv9Y0A4!#tH@QRuqfncPg)};}bsZh`M1H)*R6|wj9|wL5 z>hIcd0ddbYx6gA!9M}?=bb*+Fdj7S_!E5U=hvEFKDrpVkN0~kDSw9#cM|iX|2lY@F zZuHoy?G7-$E5qg%_Sdy1B|_VeV9vt)rbcTK;!$16>uS0xL z5f#4PUYY?3584;6U>>ATEGe;a3j-L}FUKB7{p*=>wZNn8Y*@Uan#TBoKJ#l`;-C{7 z)U2yRZ(hbcr1;u1p2&9%w)1vLqyI~>HQ%5Ui~OA%_IyB&1UIBYcXqx(e4Tnpc@Fs+ zDch4_`-==58jq)KBpnib&YZ8TWP^Lz$+%Fw*CV_#ckHa$;MkkFW%e}chfTzt+F?w% zAscQp-Oqyi_Z6Nis$t(*mUq_3j|t39p&yTLVg5=?;`(0fSAHj(4l7_jK%K{!f2AS~ z+?S}jC7aNou$kyTqD}xksrMxxHj^Q@dx%dIb@;ZL&&*fow-=Y}l6PB6hv*>FF!ld( zeOBt?;jsj8aUSfg#`E_qxx4Pm2AoS0&zhnW%t6e8w}WS;E#$;|GMmLav7Jqwb;O-n{WC`jz|(i~4z(%d}N@-LoD2 zJiLE#b#tkLvIalFES$BX-9{$LJ=UA)pL{5c28RV2m@O1Z$7u;-7j zFBzt{+(;b5b8h)c(KvAy^SyJ8=hN`MrH+~!j-g-Sbxxo|X@&|eoagCTKM@y1%tRTZ zzjSK6Q$jwK3Vk*C<1;I1kfT(i@Lqrc{7T9#N0Fay78z*&y8`{3ME)$Q4hIgGowUkB z9pja+g{7@B13WkH5tKuIM0gsZNV>{^YXX}(2Ua>lDqFJq%u6m@?_=F*UWcDM_`{P? z&g#`MOz+#ymMb(0Oh@8sC3 zAs;hP!w4(t59T!Ai6&89U z53>BqR=RPA4&Bk`R)1x2K=M)FWzHf6$|97m$Dps)e^tb*)XV|i?WFweQKdqUUY&&L za^%Tt410H$FhIYme~YgQ=2AiiHQp*9Ke2kVTmJec(7$n;<&DF8 zAV(*@-?o(k5VS?)f`8or<5(yKOJn1?&P{K>UA%zq|sb*5lW zct!3rM_Zi31r}?M5piz0ocQ(qXaWAcg=IdSNxWCOnQuP* zm+Kn~da$Vk=h6bF=QAJXgSXT_`V@|N38r)pzwBGoF(#+qh9h3w{m!~b?J(94?GtY| z=BZ}J9@WQU{)S+`{zG{`2ZUC?C}byaA>i#`VM;jW5Dvs}V#d)QTtmFNN|z1a%WVUs zEdKK~?_B*j$Al|)Hu5{RVD8@J-}Br0Bv3kf&b#|E71r+J8y32aenF+H!RHbpI5!6E z%Pd9RWRF+aZtS!Dx8ofIP#;ivF5bD%iUrgn86gJt;h2=VlcL{nzaXJ^!yfNcyAWyr`KM<{Q9T3iZ8avfeT9euc^xSp#HnC`*SV&%?V7U+C2|AVAwrD zcfXJPlHWSr+m8j_JT*-bmpKs6ThTlmLLg$tklNtmr6AU^9!*BjD8{kEQIXTwBay)tcbcsm`!`sdHa zq2H?LG5+4@1lFO$(1lN{FeiH7yZIN^Z^^RT?J|fL9AYfhej*-nexvTx*203js{|=S z&j{d@YZW@DM1mC{I~ekm0`h+TOS~f~Q1EJ?#``E8+Px^mT&%nF7uo*XFfY`yp}3Gc zNrPhghaS`03=peLTfZTX00R2VGeSc%>SSZL?{{BV+%I&u|94(koyWikrAq#U0Ito30yD_I)oLZ-= zPKDiTWCqf9aWIV;3kG^k+cwShn9Z`swM_T^q0TaAEXDxJ=Ig2kfJiTqJRhIni3G z-0}TCJzX)FZ-f5ivX^|(i0juyKb&k%qCrZ&|BM{!1-t_Sic<}ki(8kuA)(m;g0vEB z&WLm2a@^GVg^ideOa7KONFzX)&?$ery<~Vg;3>SM8~2AU&uy7ZLY$FVd}3S#`+z%- z-)iJLRuLbbo7aT(3V_JE#y8wK|KKPJ@mO0!EWe^vlHGHW&mV-d<%r zqp*btqJ?2*V^2vS=;1bV{Tvy1;*OTvA}`61{`>Y6;?hm-%G7V(T#(xuVm!{K!W<{d z{7F0NuG{%%e-?9KsZVf$$T~Xgul-p28s~zfc3H<1=B@7hUATAxv=FS_5tfJ z6PNPkm~iAx&FvGIpLqSiJ5Lt}$7vsrxAJ`Q1GnfBrpkaQk z!a#K|^1w|3H&4b-Qoz8epsVsa6LH`9w3c@a`2EP#b=Hpxfopa5-)^J>C0M}E41E;S z_AO!2C2R=$SUmLtbHiqt|Fomb(Qiu#@H8`{Lvr%b+o|0w2#dDUTR@yPBl7!)w=NTY zI83F{W7u$dpnc{tg9|~{Pt?A-5<$gb_5$@a87gmHe&ta^gFu(IX9nwOkic`9SA8=R zz8p~_{J=gdxn$K3DcnO@Gdb_#hJKXhA@kCs$Q$?E9>|z|VF$J|rR5Ko<9lSN#Uxwxg(XZ`(wVy@vSjf~zR2&jCns4lL=T zRIpc;KctQG*RM9!qeB__Vo6WN(*x)$r|%U?R_DOw13oIxFn5$0m$s*O85Q37{bA^C zW`Ife0jMlwf%su>VkGwKogv#K*F8ZWYSpKUwl)r6sIa6r;2909);!uLtx_Yrsx8?|bky3l_eJml|~hI36T^_XD;&NbT&9to#W z_i5$-$+DEj99EKwmsJGrPt7Ua4#iw@ug%giS3WYlDEb#EkNj1%PB~cUwmrDz8Tp8P zB*FJLIS01#&_U!kiLc}f?rV6-%Kn*Vfr&f+sFw`~N`{4nmqlRyB=7j)H5VC}Ydp3&iyszzn9LzV=aKi^EY5`|fpM zY={C2$-0qmiRioD3KuBjr2xsHrNb3-%w6 z%-60YcidUkg*n)B3AW$s(AO3_cIa0D=3K@4h=JBD=sL1d*&q9~&dZ-wbtR}51wVgO zJKzAsZwJrpSmFpw<*X+xD@Uk(N=M-9W#f$>?!3*0%J8}?uL6;;ScYlSKA|2(uKm|H%Y=Ui zgF{NxaKElWSek%2Lfvio8&*eP9@t+zt>-%#uG8x--%X-It0B=dxEFE4$nN?3^5|E@ zUOQwY?EtUG=Rz{V9bm<%e{iuF6#^m^cFj3szNBAy?;-rTzRTYm8Ra-YxK-0R%TPz$ zJ6SdURO>M0SJMe^l89j2Fekmk75&&(v{pe))C1f#s)y%oK!^AEzrnpEV3gey8Vy4| zgzLl27r;DP-lg^$8?9a@n+)|8*dy@;WJn9?ES8P)PaWHBTjW4=VaAfRY18|LpTbF%+(fNwtVsQo1pMCEw0>CZ8@ z-)m-i0`;nx@P(^f)HCc=niC7U(ckQ4ZxvC%`(-!wI(6I@yfZcnyDA|rTP!$I`4PQ$iApOKlzw>KoeIZ%@~E;z<}3^A zJ;g6Y0QD}HhzG-(R49{jc*sV)rk&MpAB%YQX`kFxD`nh&4RQLlxdi)aPJTra`qrg) z?N7I-p-yWsIgxn40n*3a_XYK19{a|xB1O#6xGFty)kWPx!HXD0Z{@(1{5NAJS8*RV zIp_8CQwLCcwX{tId1qX5X3`ol3Q`!uAcx} zF-O-q>_+{eaM0ejf({bOGIQTmZK3sOM||G{dr0p~)$bgjz*@JhsXgss4GPrk&h`dGdwVyqPyB5D`)3JcRyA z&m*<{7Dbrn;jk+*c&V^4O*qdN>*V|p{Jo9!U7XE`bBv+)?F`W8`yyB&U;O}c-H(=EYQsI<{MnYqGKw8Xg^^JYP@iwz^;%@xx=$m4&XDI36ik_+0E??)eAzU)vvUJ3fdy z;*T zUredafDFf0pXdGdodR!)*Crf|!#r-Zkp39z@~O)Y9lM1%`r3(c!;5Rs|Ep=~s!C=- z-r&ftg0CEK{NZ+PY!mu4!NLy`rViQb;OSg;XmUgj%hkTsC?t7QZDLZvmMS z@xR>RFZVI$`128+?)jhpw99X9T#61^Z?zWXQ70^)a{srB!GJG^lVy!JQ9-q}X@fHI zz4Q&cg1ZRF7uOs=aqJx#tQ}WOoZW{x71&09j(Fl$%i`&@Mf4?-Z<}bLE^iT-K#alj zcUIOPY()LJxmD~zDCTwC|87|svV#esC9Q8R4v}GsIJa^b?~nFpQIC(fr>nPaNFXhr zh&lYpBzeTcPeEZzYeb|cZ5Ipon&trq0XkeUZT?I zFo>O$Nh3}khT>+G#+T8UPdXhdN5q_!*!JM?--x^9_j*OB>vJJipxQb_nggfF6<*mX znCm<Qvs)(rPkwU)d#kHI|1#f(4J_swWfOAnqH#e9OM!({7F59a#J4CXeV z9~7WieCkOY=8X1l{95ceKgp+@;pjsC#6>l$s`o0~I8XDFbqp5rB@Sd03|8!G={9?Wgbo$P3R zXWi1~BR-bM-3hnsxN`u+-W;Q2cd+nTqi-`6`_+={+rq1E;7g5#JwKl@~2 zdjJQHwQBavBhOX0sy_1xa{-f8Zijc-;OBXAuG0YP{%!*G$~l}nQvJSVR=CG;-N4W* zY=8mxEj9;mP!HL;X3fzRh%?(aYG085#sW*V9fJd>GGIONNd6ql;eRo}c<_h5mc_m_HmRZqAZn0H6Bn z{xVh6bN|RH6uzQLk-Fzr5!M>Qqj;HLe<&Y*^J4M4VW|fr_ige5&trq4)WK?Fr1G=2wT!#>%3; zruxXp;Q<%O-d5FW94D9-oxdI;b_C4E2;cv-AAuy^jr3NmR|auKheQVE=&pIRMrIFDW0wn@Cc&jCB`sOvd>ItV;-&<^UxJdEZvEeZRLeOO;)67rSZ zMJFZ^r?M>_z`Oa={r+mx$w9c=>wjsI3Lz`|7&^0fuCEi%&il1 z1kxza&$8c4(DscKtVUgOI`VaeCEhEawHA*Ikgw0Em5E89-u307l!nME%$3{P?cRX= zz2#%;s-hGX>^uknP#Org2BOKO>?e`!PV0&VLRc!+iBwr0b zZo<6_{ZP#TBO3aAQ{`rFS98H=+pAr>Q1=lk^C)B@ei*#El%Lv(cqmPk_e}`yq4VzA znP-FZV`1O=*~h3m#jY^=iu-i@^Nd#MC=wLEPO1z>zT)&1oRw}N&poiN+!1rfIiW#u zPrT7*zy9v-uqXOTRx35vU|qdBq(uo+Vt}94$o|?gCajeFA>~iT&$VxjIr$3Sd&Mg8 zanxHL-n;+V;05-_c-1S}|M7O7wcJ5nBuJO9S_gHI#NfdPLz#$MrXr84 zWgs6hi8YM|tmu z?kTH4?3umjFNvR9b!!~+y4f2ZQO_tMkP}W|p5V=8+xAe* zd(2hUm^p>oLGtdB4W3xX8Ft_9A9;*@J8NCv58OAKD)*{;xtas-gM+1SnRDTfmqUR) z{+wffULIr!qb}(%Cc-y{zMz4R;hz6ha2<|R_H8&SMJXf{(m)C+31!{M4sq;b9OpZLM56e5}FH7d!BLPg;X5kf;grDzBVkuusTeOG?~9?!Uc_cd>U_wP5F%Et9=E|cyZ z1wUb?;d4`Oib1W?ISW3(2c#I2{N3P*1Bw31;Y)B}QpNlkKDA!_|I!)4y2!EW?bQ@E zyMUgA1+z1Xy&R!yd-QtXx!e-}w)N0BauPqduY^hf-x_yMC|9xRj$ezL9QMf%g5IL$ zztA7yxg~55^i~g!y6vtBSGv+P6FCI^LCIxPd;EXsAGr7YYfm_fZ1qKaSRCwY@$LG5 z@y#br=KyaO`04fOA4h;oq=Rqw-2;yp({s*HGz@-@YsoG%$e}IPH$K9%5Bq!5!0uJZ zKle2TB~1K1e~$9$pYVgP;iT?yqbLR)R8`Ux-EU2I=GXe>YuHi8+l(qNe zIFVt8sDXkHbcNi8$|`(^ogC(m4n6QZ-j{DH0Vm8ps=Q!HfCKGcTi%`7=tM>SD%+f} z|1dcPr?s5i$WvAQU@N|tSfiR9vy|BMk>}OcrG?0qgbV3Z?E$}{y(#w8Cwq#k8Nam$ z{OPu^g+rgf?+Fz(1W0Xhq?>!>7awkQBSqDcSw^XDlwZ3oe_Gm|Jm0ieY<+1@HAhxk z@f0BcV)-<90YCL6P6ZW*u_oj5n49?{PQcbpz@B_@XAF<;@t}|}mvd8d^+VdNi{)RtpC_GlC z62EWFoIX}BKa-Ysh=o7Yb*4|&2N(ZgGfK5be)!*m8{^)C$G!Sg zuKybN!)O7C5 zEfw&^Q{Vn=#(S45p(q-s=R&8I#e^G>gIu2bHI!F?MVq)EoL~10{6K}RQfma0&PVPu zY@XvxQkT26xo5NJWX+vB`@!3?@0Ubt;5_-t=daZZ9bWu#-s6u+=JaqIPuEvrODey$ zF2Mr)hyjBHpB_Q~MWMsq4CuZdqp>Y!%dE+|=%MrZ>l|thljh@XWK*Ti8Wn31HaXj$ z%UJUYxLHN`Tu~+POm~LZM3pOjFXfm>VlNnaq9APg!HN2Z|8?&!Mej@5o_o^J5$7xC z9F%W$B}3^AYVu_FE>x4U!up#4!BYf8Uf$jzQCFqM<8I^7Y{z5c& z=){;N@@ErkB{nkbNR;ixTcZuULi~Pu0mwC&N12(L!@qigKS@jkzMUe;JAR^Ua<$4I ze+$2K@9Z^)V*en|s+HuMnG8Ly$zs;6eYWHlDBxxK$DaDFW@j6TW4;w`4=-b&|Hr%~ z&l-F~-R6SvwQBG;`!8sSvIVcxn;g|Pg#EpDO|V`u{5)+6tFrL@UpH=gF&Dhp2~WA` zMho~do_w97uWm!qDqo8lYV4`LE+~7Y9Ol}BEmd@y4S(LR>Wg+9;EA9n?qoMQmeL%N zY7Ks{Q`w^fb2&!AY*kqy_Gf0KQ$xQc`IU@bsOJVhsnTb8Ki`2CZIx`*XfUPpveXte zUkmCvLGLDXt*K_RdU=^5{FFs*mqb^(Q1F}Q&ib3--zeo>I%g33(1uX{bmZ=TrMs)& zbakS;A3rzj9I~bE>TedOaK6c`+m72S4m~bErFT5o4&KAMcaLzrBh3VrgzbmUGpi{6 zpG7_J9VC}358(U?j~*%wtw-;Xp_(Eu__{i;&FlOYzz=@N`KZtX>|N1Y5@J(aNNx2s zlZ)l(!C3kH@Ip%_DSWpUiXLZ@NV%H*=??hItPbnvpy%Q71GR%v@Xh2+v`c(1!yYGW z_<5OeIoqivZr&A6R^+RAqT!9_=R3Z*gN<^{r!hSjIsY1 z4CHbTqu(Jb!_gW1dQIB>{P8uIHyL54MrP1=W|<!d( zJhZutMV)#31mA(DKH0ld^*D6HCvAcJH-IPCFXOg2X@#8TFiqHCt_m*^FABYf{ZlYS zZs>+RW!GIk)mjUj?Pe;_2%pr+!((Eq0XDRoYg{%9__30+*4B1)kHhg}se7DB&*GS{)az@z-LCKR&p99e2UK8$kcNJrY<&&Pdes4K-gWik~$L78g@aVN{^Nhs?G+l1? zJHf}0B4ww!h2Xz8{~y!5#uWbZ<=NaP;vCU$a$nQ06FEqikSTlMqCM+Y`#(-XP9R3M z!2o`oT?U_hpQHaxvbD!g9eL`u6*EKSZ=Fc5CF#fZ>-MB#ny@373;NPLi8#CM@DH%u zq+{SS)?|!+J~D|u2)7Qejx)&Pe2y3lIbchCD~ILuMxg&UZsXmBzhBALP>ytPpoKyk z--n@BMMQDT%m?qu2j$beodag1qi_GlBixc+txuilgFYA~6PO<44xM4D_)k^~`b<<3 zpZ$mT_7%U9Rk=hg$fu!QzV(dHv`^)dF}CL9Jky&3Il?(YKEZpqC}jTT%-CZXVT3?YWBRTT?Sg zkXOQaX;aS=!tF|~jGxz^mq54CPRw`2e6l;`deQZU3;j_Xw0=tPv&9bD@H|3pNM`G4 z4cwP#ot48OI3H`4)<}-QXQdN+ZF~T@Iq}+suI>f!LvIkw2pa*P=)5X8{=Ey0{(k4j zfY0OmwcqJy;E(0Cb9wjk4DeRwRN**uEwv`qnf$ZR>%C4YJR4$Bj^2&v?2im;`{&z+ zDNSptd^4-CWjFfnN~88}M}DQRH-udde*5j?gEOz-EBV%bA~c{7I6=Vh^$yR!zA62~ z#n=zx+qqogDy>NBNU8s$CAM@-t1wIh{Wp=`OIBTl&nA9IcUheglcuc0(k(8aM@r+D z+B113Z3vHFdM(9?{8~26)opU3)2h>J)WoewV>)E}f2uY#KO=wfV=L%JA%70ca<`}O zkgF00)3FaS-DLd0M=Jc2NCGX;LFG~fTK@%3@05!TM!vG3aDPOuzZI#O`|fajY)iiU zY4en^KUP1h`TRB){;wMb+{ufOqsx=>NPzD|r6?sg_NP7ZRh##9K!2%ynRdTD54v10 zmu~oa8w%Xis$-~+eyFbpzBm@6-=?wN=i(pu#CC?N>Z(ESZTG&+4IU>h`Tl~t#SDsd zNv-M^vW1^GZ^a0DL=vVZ)q{|q*BTB9m8pP#`c~B5v*1PB-M8(k;I*gV2VX3$!7s1V zAM8o~AM`HmSpSDl;VUrw&}WSEH0jQs`4#FMN-GsLs`ke|5mUnAUg1Km(dV-~cd&^i zc|q3n9{ff9@86%p`@>m~<6(FU`y|)?bK<~xUpt2b^nEy_pmCwJ&5BKJc{WmM@td(ZwbW*Kz%XbQXCk z4vL2YhbG?-<{jGaM(b<+NC|szPt)cI0R!Ynmp$4k%nw|mx<=7rhXYA>TE|_>2JQ;w zRaOmQ(JL3@^-qHxsn7gz!&A)jgFYo4N}6Q@EHB^%u}I6z)GP-1`(CXc%C{wk!1h}IdgvjN7Q(SlY$!P- z|Bep6%W7YTxf0-?&S)7ZynPEDyF__o0rvGR*Sd3$XCQ}r@XG@+_}^qqRCl~EaUnOu z^Q|S|jm&QZj9UI=(KGf^?M6`+tsCR{_6@$mUvvEgv~)RS;3ySy82rAv+Rbnox^^r9V|sk*=$LyJ~<{Nqj{Yh_$_{eixC z)tO5Y^TBWCXps*1#pFrtqP_~?z!1GxQYMgYC3JShQsd4reHM-C%4tn9?CF5!h{B3f=p8t^x4_~s^p=5h zN|#F=sO`%0vtiJ0GP<`u?8UrYm8v^8893Aa_1*r67~~A}7I3p#z&9#h?DyMfOOY zpRdk+KHwLsxl*(RIV7RF;*?rE^i=&MhZ=eKl#ic_^#)FqRaxThzuT0~kEl|pJLhks}PCEza?^?0iy3!VupvRfe!hX^EO}dq3%Aq}8 zUejMM+aSNT(Q03?GZmU|OO61(l*^jGpmMnlWlg^G+I;`7Zhlu#9(lAVXKz2Zbqw-p z5s59l;7rqfZkssB4=$D~vDa_5hYmCBIfH#LyWq}$cWBM&3z z%${h0Prp4yEX@Kro!bIZi-Mdu^yH0v@V12<>TxsXE3CC4Ef0s&JZ#K^gn_Z#F%Hey zmG7GzWK03?5Ao0XyWhs@OvXLtP)Bu7Vs#txHiFMDRa?1`s{4&eOU-}!x=td9-w;<4*WqE9e?4o*a_YQX;g=bk3l2jI(qy^B15;{1K> zy;|sp1)Wt`B;TKiez?5`?rBljOJ{~7T>RmeU0B~I34fJ|d@1i$+_%2pO8+I9!RNoV z#=V6fe2ofoy=)bS?pF!C?0Jn`jI4yAZ7B5fqmkv;CD1pMaD{&h_WTX5ZGze0Pa+x> zL|=kGf0>pdvFegF-FT24TYa5LPtPtrpuPdTy^^2ru0ktH-R%>i4gVQ6i+8Np#il@S z)h%nG3vV;(t$%5RePrHc^|_1SKeQR*>%n_2X_sUY3SPD=C+;~{uqzcl^?Ff1Yy;nR zt?OxC2YR>Af`0<@NqMN{!=>%$n``^AlQY63rjw`Mm2vFJ8a2%;HlSC6=XB1^<;eNe zhnF|Cm-9Z@$u+2(|Yu*!3g?3|9rLlPJ0%8uqwJ@@&Y|?9KJ8(`tV^j zKh_E3LT|-)R#?kc2r3A;Zi8FQg$=<(t@_N;fFWT@K$<_&vVa{p#e++k6$f8wH7Wf6dXHk=5TYWyDiiN1@Iw=V92e@=YpY}DTzu$7nP#ND6PWTo71qBswJ z$Fx(1MXifc#(|RsI54Uf7{aVV-h8SN)caDB? zpg`@qwF} 4: - raise ValueError(f'Input shape should have 3 or 4 dimensions, but provided {self._shape}') - if self._shape[self._layout.get_index_by_name('C')] != 3: - raise ValueError('SyntheticImageLoader can generate images with only channels == 3') - - def _download_colorization_model(self): - proto_name = 'colorization_deploy_v2.prototxt' - model_name = 'colorization_release_v2.caffemodel' - npy_name = 'pts_in_hull.npy' - - if not os.path.exists(proto_name): - url = 'https://raw.githubusercontent.com/richzhang/colorization/caffe/colorization/models/' - proto = requests.get(url + proto_name) - open(proto_name, 'wb').write(proto.content) - if not os.path.exists(model_name): - url = 'http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/' - model = requests.get(url + model_name) - open(model_name, 'wb').write(model.content) - if not os.path.exists(npy_name): - url = 'https://github.com/richzhang/colorization/raw/caffe/colorization/resources/' - pts_in_hull = requests.get(url + npy_name) - open(npy_name, 'wb').write(pts_in_hull.content) - - def _initialize_params(self, height, width): - default_img_size = 362 * 362 - points_coeff = max(1, int(np.round(height * width / default_img_size))) - self._num_of_points = 100000 * points_coeff - - if self.subset_size < len(self._weights): - self._instances = 1 - self._categories = 1 - self._weights = self._weights[:self.subset_size, :] - else: - self._instances = np.ceil(0.25 * self.subset_size / self._weights.shape[0]).astype(int) - self._categories = np.ceil(self.subset_size / (self._instances * self._weights.shape[0])).astype(int) - - def generate_dataset(self): - height = self._shape[self._layout.get_index_by_name('H')] - width = self._shape[self._layout.get_index_by_name('W')] - self._initialize_params(height, width) - - # to avoid multiprocessing error: can't pickle openvino.pyopenvino.Layout objects - self._layout = str(self._layout) - - with Pool(processes=self._cpu_count) as pool: - params = pool.map(self._generate_category, [1e-5] * self._categories) - - instances_weights = np.repeat(self._weights, self._instances, axis=0) - weight_per_img = np.tile(instances_weights, (self._categories, 1)) - repeated_params = np.repeat(params, self._weights.shape[0] * self._instances, axis=0) - repeated_params = repeated_params[:self.subset_size] - weight_per_img = weight_per_img[:self.subset_size] - assert weight_per_img.shape[0] == len(repeated_params) == self.subset_size - - splits = min(self._cpu_count, self.subset_size) - params_per_proc = np.array_split(repeated_params, splits) - weights_per_proc = np.array_split(weight_per_img, splits) - - generation_params = [] - offset = 0 - for param, w in zip(params_per_proc, weights_per_proc): - indices = list(range(offset, offset + len(param))) - offset += len(param) - generation_params.append((param, w, height, width, indices)) - - with Pool(processes=self._cpu_count) as pool: - pool.starmap(self._generate_image_batch, generation_params) - - self._layout = Layout(self._layout) - - def _generate_image_batch(self, params, weights, height, width, indices): - pts_in_hull = np.load('pts_in_hull.npy').transpose().reshape(2, 313, 1, 1).astype(np.float32) - net = cv.dnn.readNetFromCaffe('colorization_deploy_v2.prototxt', 'colorization_release_v2.caffemodel') - net.getLayer(net.getLayerId('class8_ab')).blobs = [pts_in_hull] - net.getLayer(net.getLayerId('conv8_313_rh')).blobs = [np.full([1, 313], 2.606, np.float32)] - - for i, param, weight in zip(indices, params, weights): - image = self._generator(param, 'gray', self._iterations, height, width, weight) - color_image = self._colorize(image, net) - aug_image = self._augment(color_image) - cv.imwrite(os.path.join(self.data_source, "{:06d}.png".format(i)), aug_image) - - @staticmethod - def _generator(params, draw_type, iterations, height=512, width=512, weight=None): - generators = IFSFunction(prev_x=0.0, prev_y=0.0) - for param in params: - generators.set_param(param[:6], param[6], weight) - generators.calculate(iterations) - img = generators.draw(draw_type, height, width) - return img - - def _generate_category(self, eps, height=512, width=512): - pixels = -1 - while pixels < self._threshold: - param_size = np.random.randint(2, 8) - params = np.zeros((param_size, 7), dtype=np.float32) - - sum_proba = eps - for i in range(param_size): - a, b, c, d, e, f = np.random.uniform(-1.0, 1.0, 6) - prob = abs(a * d - b * c) - sum_proba += prob - params[i] = a, b, c, d, e, f, prob - params[:, 6] /= sum_proba - - fracral_img = self._generator(params, 'point', self._num_of_points, height, width) - pixels = np.count_nonzero(fracral_img) / (height * width) - return params - - @staticmethod - def _rgb2lab(frame): - y_coeffs = np.array([0.212671, 0.715160, 0.072169], dtype=np.float32) - frame = np.where(frame > 0.04045, np.power((frame + 0.055) / 1.055, 2.4), frame / 12.92) - y = frame @ y_coeffs.T - L = np.where(y > 0.008856, 116 * np.cbrt(y) - 16, 903.3 * y) - return L - - def _colorize(self, frame, net): - H_orig, W_orig = frame.shape[:2] # original image size - if len(frame.shape) == 2 or frame.shape[-1] == 1: - frame = np.tile(frame.reshape(H_orig, W_orig, 1), (1, 1, 3)) - - frame = frame.astype(np.float32) / 255 - img_l = self._rgb2lab(frame) # get L from Lab image - img_rs = cv.resize(img_l, (224, 224)) # resize image to network input size - img_l_rs = img_rs - 50 # subtract 50 for mean-centering - - net.setInput(cv.dnn.blobFromImage(img_l_rs)) - ab_dec = net.forward()[0, :, :, :].transpose((1, 2, 0)) - - ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig)) - img_lab_out = np.concatenate((img_l[..., np.newaxis], ab_dec_us), axis=2) # concatenate with original image L - img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1) - frame_normed = 255 * (img_bgr_out - img_bgr_out.min()) / (img_bgr_out.max() - img_bgr_out.min()) - frame_normed = np.array(frame_normed, dtype=np.uint8) - return cv.resize(frame_normed, (H_orig, W_orig)) - - def _augment(self, image): - if np.random.random(1) >= 0.5: - image = cv.flip(image, 1) - - if np.random.random(1) >= 0.5: - image = cv.flip(image, 0) - - height, width = image.shape[:2] - angle = np.random.uniform(-30, 30) - rotate_matrix = cv.getRotationMatrix2D(center=(width / 2, height / 2), angle=angle, scale=1) - image = cv.warpAffine(src=image, M=rotate_matrix, dsize=(width, height)) - - image = self._fill_background(image) - - k_size = np.random.choice(list(range(1, 16, 2))) - image = cv.GaussianBlur(image, (k_size, k_size), 0) - return image - - @staticmethod - def _fill_background(image): - synthetic_background = Path(__file__).parent / 'synthetic_background.npy' - imagenet_means = np.load(synthetic_background) - class_id = np.random.randint(0, imagenet_means.shape[0]) - rows, cols = np.where(~np.any(image, axis=-1)) # background color = [0, 0, 0] - image[rows, cols] = imagenet_means[class_id] - return image diff --git a/tools/pot/openvino/tools/pot/engines/creator.py b/tools/pot/openvino/tools/pot/engines/creator.py index 895da98e7aa..563e983c2dd 100644 --- a/tools/pot/openvino/tools/pot/engines/creator.py +++ b/tools/pot/openvino/tools/pot/engines/creator.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from openvino.tools.pot.engines.ac_engine import ACEngine -from openvino.tools.pot.engines.data_free_engine import DataFreeEngine from openvino.tools.pot.engines.simplified_engine import SimplifiedEngine @@ -17,6 +16,4 @@ def create_engine(config, **kwargs): return ACEngine(config) if config.type == 'simplified': return SimplifiedEngine(config, **kwargs) - if config.type == 'data_free': - return DataFreeEngine(config, **kwargs) raise RuntimeError('Unsupported engine type') diff --git a/tools/pot/openvino/tools/pot/engines/data_free_engine.py b/tools/pot/openvino/tools/pot/engines/data_free_engine.py deleted file mode 100644 index 35789b36cb8..00000000000 --- a/tools/pot/openvino/tools/pot/engines/data_free_engine.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2021-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from openvino.tools.pot.data_loaders.synthetic_image_loader import SyntheticImageLoader -from openvino.tools.pot.engines.simplified_engine import SimplifiedEngine - -class DataFreeEngine(SimplifiedEngine): - def __init__(self, config, data_loader=None, metric=None): - super().__init__(config) - if not data_loader: - self._data_loader = self.get_data_loader(config) - else: - self._data_loader = data_loader - - def get_data_loader(self, config): - if config.data_type == 'image': - return SyntheticImageLoader(config) - - raise NotImplementedError("Currently data-free optimization is available for Computer Vision models only") diff --git a/tools/pot/tests/test_cmd_params.py b/tools/pot/tests/test_cmd_params.py index a699573163c..e7f437ca78e 100644 --- a/tools/pot/tests/test_cmd_params.py +++ b/tools/pot/tests/test_cmd_params.py @@ -21,7 +21,7 @@ test_params = [('', 'Either --config or --quantize option should be specified', ('-e -m path_model', 'Either --config or --quantize option should be specified', ValueError), ('--quantize default -w path_weights -m path_model', '--quantize option requires AC config to be specified ' - 'or --engine should be `data_free` or `simplified`.', ValueError), + 'or --engine should be `simplified`.', ValueError), ('--quantize accuracy_aware -m path_model --ac-config path_config', '--quantize option requires model and weights to be specified.', ValueError), ('-c path_config -m path_model', 'Either --config or --model option should be specified', ValueError), diff --git a/tools/pot/tests/test_data_generation.py b/tools/pot/tests/test_data_generation.py deleted file mode 100644 index 75c7a19ba9f..00000000000 --- a/tools/pot/tests/test_data_generation.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os -from addict import Dict - -import pytest - -from openvino.tools.pot.data_loaders.creator import create_data_loader -from openvino.tools.pot.graph import load_model -from openvino.tools.pot.graph.model_utils import get_nodes_by_type - - -TEST_MODELS = [ - ('mobilenet-v2-pytorch', 'pytorch', None, None), - ('mobilenet-v2-pytorch', 'pytorch', None, (3, 640, 720)), - ('mobilenet-v2-pytorch', 'pytorch', 'HWC', (224, 224, 3)), - ('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (1, 224, 224, 3)), - ('mobilenet-v2-pytorch', 'pytorch', 'CHW', (3, 224, 224)), - ('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (1, 3, 224, 224)), -] - -@pytest.mark.parametrize( - 'model_name, model_framework, layout, input_shape', TEST_MODELS, - ids=['{}_{}_{}_{}'.format(m[0], m[1], m[2], m[3]) for m in TEST_MODELS]) -def test_generate_image(tmp_path, models, model_name, model_framework, layout, input_shape): - path_image_data = os.path.join(tmp_path, 'pot_dataset') - stat_subset_size = 5 - engine_config = Dict({'device': 'CPU', - 'type': 'data_free', - 'data_source': path_image_data, - 'subset_size': stat_subset_size, - 'layout': layout, - 'shape': input_shape, - 'generate_data': 'True'}) - model = models.get(model_name, model_framework, tmp_path) - model = load_model(model.model_params) - data_loader = create_data_loader(engine_config, model) - - num_images_from_data_loader = len(list(data_loader)) - num_images_in_dir = len(os.listdir(path_image_data)) - assert num_images_from_data_loader == num_images_in_dir == stat_subset_size - - image = data_loader[0] - if input_shape is None: - in_node = get_nodes_by_type(model, ['Parameter'], recursively=False)[0] - input_shape = tuple(in_node.shape[1:]) - elif len(input_shape) == 4: - input_shape = input_shape[1:] - - assert image.shape == input_shape diff --git a/tools/pot/tests/test_sanity.py b/tools/pot/tests/test_sanity.py index f29334c1e0b..c8bc9a4eba3 100644 --- a/tools/pot/tests/test_sanity.py +++ b/tools/pot/tests/test_sanity.py @@ -219,24 +219,6 @@ def test_simplified_mode(tmp_path, models): assert metrics == pytest.approx(expected_accuracy, abs=0.006) -DATAFREE_TEST_MODELS = [ - ('mobilenet-v2-pytorch', 'pytorch', 'DefaultQuantization', 'performance', - {'accuracy@top1': 0.679, 'accuracy@top5': 0.888}) -] - - -def test_datafree_mode(tmp_path, models): - engine_config = Dict({'type': 'data_free', - 'data_source': os.path.join(tmp_path, 'pot_dataset'), - 'generate_data': 'True', - 'subset_size': 30, - 'device': 'CPU'}) - - _, _, _, _, expected_accuracy = DATAFREE_TEST_MODELS[0] - metrics = launch_simplified_mode(tmp_path, models, engine_config) - assert metrics == pytest.approx(expected_accuracy, abs=0.06) - - def test_frame_extractor_tool(): # hack due to strange python imports (same as in sample test) pot_dir = Path(__file__).parent.parent From 5b3b48aa17b90656823942173d1a6fa5b4724561 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 22 Feb 2022 20:11:42 +0300 Subject: [PATCH 081/310] samples overview & model protection: docs (#10596) * Renamed hetero md * Renamed some guides * Updated OpenVINO_Runtime_User_Guide.md * Updated plugin's page * More updates * Fixed links * Updated link names * Fixed links * Fixed docs build * Self-review * Fixed issues in doc snippets * Updated Samples_Overview.md * Updated model protection guide * Renamed ngraph_function creation samples --- docs/OV_Runtime_UG/API_Changes.md | 1 - docs/OV_Runtime_UG/Samples_Overview.md | 51 ++++++++---------- docs/OV_Runtime_UG/ShapeInference.md | 2 +- docs/OV_Runtime_UG/protecting_model_guide.md | 20 +++---- docs/snippets/protecting_model_guide.cpp | 19 ++++--- .../CMakeLists.txt | 6 +-- .../README.md | 16 +++--- .../lenet.bin | Bin .../lenet.labels | 0 .../main.cpp | 7 +-- .../model_creation_sample.hpp} | 0 .../README.md | 14 ++--- .../data.py | 0 .../lenet.bin | Bin .../model_creation_sample.py} | 0 src/core/include/openvino/op/parameter.hpp | 6 +-- src/frontends/ir/src/ir_deserializer.hpp | 2 +- 17 files changed, 65 insertions(+), 79 deletions(-) rename samples/cpp/{ngraph_function_creation_sample => model_creation_sample}/CMakeLists.txt (52%) rename samples/cpp/{ngraph_function_creation_sample => model_creation_sample}/README.md (78%) rename samples/cpp/{ngraph_function_creation_sample => model_creation_sample}/lenet.bin (100%) rename samples/cpp/{ngraph_function_creation_sample => model_creation_sample}/lenet.labels (100%) rename samples/cpp/{ngraph_function_creation_sample => model_creation_sample}/main.cpp (98%) rename samples/cpp/{ngraph_function_creation_sample/ngraph_function_creation_sample.hpp => model_creation_sample/model_creation_sample.hpp} (100%) rename samples/python/{ngraph_function_creation_sample => model_creation_sample}/README.md (91%) rename samples/python/{ngraph_function_creation_sample => model_creation_sample}/data.py (100%) rename samples/python/{ngraph_function_creation_sample => model_creation_sample}/lenet.bin (100%) rename samples/python/{ngraph_function_creation_sample/ngraph_function_creation_sample.py => model_creation_sample/model_creation_sample.py} (100%) diff --git a/docs/OV_Runtime_UG/API_Changes.md b/docs/OV_Runtime_UG/API_Changes.md index 0490f9228f2..3f1c0bf42e2 100644 --- a/docs/OV_Runtime_UG/API_Changes.md +++ b/docs/OV_Runtime_UG/API_Changes.md @@ -7,4 +7,3 @@ The sections below contain detailed list of changes made to the OpenVINO™ Runt ### New API * The OpenVINO™ 2.0 API was introduced. - diff --git a/docs/OV_Runtime_UG/Samples_Overview.md b/docs/OV_Runtime_UG/Samples_Overview.md index c7d6dd66f82..ddb7a476aca 100644 --- a/docs/OV_Runtime_UG/Samples_Overview.md +++ b/docs/OV_Runtime_UG/Samples_Overview.md @@ -1,4 +1,4 @@ -# Inference Engine Samples {#openvino_docs_IE_DG_Samples_Overview} +# OpenVINO Samples {#openvino_docs_IE_DG_Samples_Overview} @sphinxdirective @@ -19,8 +19,8 @@ openvino_inference_engine_ie_bridges_c_samples_hello_nv12_input_classification_README openvino_inference_engine_samples_hello_query_device_README openvino_inference_engine_ie_bridges_python_sample_hello_query_device_README - openvino_inference_engine_samples_ngraph_function_creation_sample_README - openvino_inference_engine_ie_bridges_python_sample_ngraph_function_creation_sample_README + openvino_inference_engine_samples_model_creation_sample_README + openvino_inference_engine_ie_bridges_python_sample_model_creation_sample_README openvino_inference_engine_samples_speech_sample_README openvino_inference_engine_ie_bridges_python_sample_speech_sample_README openvino_inference_engine_samples_benchmark_app_README @@ -28,14 +28,14 @@ @endsphinxdirective -The Inference Engine sample applications are simple console applications that show how to utilize specific Inference Engine capabilities within an application, assist developers in executing specific tasks such as loading a model, running inference, querying specific device capabilities and etc. +The OpenVINO sample applications are simple console applications that show how to utilize specific OpenVINO API capabilities within an application, assist developers in executing specific tasks such as loading a model, running inference, querying specific device capabilities and etc. After installation of Intel® Distribution of OpenVINO™ toolkit, С, C++ and Python* sample applications are available in the following directories, respectively: * `/samples/c` * `/samples/cpp` * `/samples/python` -Inference Engine sample applications include the following: +OpenVINO sample applications include the following: - **Speech Sample** - Acoustic model inference based on Kaldi neural networks and speech feature vectors. - [Automatic Speech Recognition C++ Sample](../../samples/cpp/speech_sample/README.md) @@ -50,7 +50,7 @@ Inference Engine sample applications include the following: - **Hello NV12 Input Classification Sample** – Input of any size and layout can be provided to an infer request. The sample transforms the input to the NV12 color format and pre-process it automatically during inference. The sample supports only images as inputs. - [Hello NV12 Input Classification C++ Sample](../../samples/cpp/hello_nv12_input_classification/README.md) - [Hello NV12 Input Classification C Sample](../../samples/c/hello_nv12_input_classification/README.md) -- **Hello Query Device Sample** – Query of available Inference Engine devices and their metrics, configuration values. +- **Hello Query Device Sample** – Query of available OpenVINO devices and their metrics, configuration values. - [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md) - [Hello Query Device Python* Sample](../../samples/python/hello_query_device/README.md) - **Hello Reshape SSD Sample** – Inference of SSD networks resized by ShapeInfer API according to an input size. @@ -59,10 +59,10 @@ Inference Engine sample applications include the following: - **Image Classification Sample Async** – Inference of image classification networks like AlexNet and GoogLeNet using Asynchronous Inference Request API (the sample supports only images as inputs). - [Image Classification Async C++ Sample](../../samples/cpp/classification_sample_async/README.md) - [Image Classification Async Python* Sample](../../samples/python/classification_sample_async/README.md) -- **nGraph Function Creation Sample** – Construction of the LeNet network using the nGraph function creation sample. - - [nGraph Function Creation C++ Sample](../../samples/cpp/ngraph_function_creation_sample/README.md) - - [nGraph Function Creation Python Sample](../../samples/python/ngraph_function_creation_sample/README.md) - +- **OpenVINO Model Creation Sample** – Construction of the LeNet model using the OpenVINO model creation sample. + - [OpenVINO Model Creation C++ Sample](../../samples/cpp/model_creation_sample/README.md) + - [OpenVINO Model Creation Python Sample](../../samples/python/model_creation_sample/README.md) + > **NOTE**: All C++ samples support input paths containing only ASCII characters, except the Hello Classification Sample, that supports Unicode. ## Media Files Available for Samples @@ -79,8 +79,8 @@ To run the sample, you can use [public](@ref omz_models_group_public) or [Intel' The officially supported Linux* build environment is the following: -* Ubuntu* 18.04 LTS 64-bit or CentOS* 7 64-bit -* GCC* 7.5.0 (for Ubuntu* 18.04) or GCC* 4.8.5 (for CentOS* 7.6) +* Ubuntu* 18.04 LTS 64-bit or Ubuntu* 20.04 LTS 64-bit +* GCC* 7.5.0 (for Ubuntu* 18.04) or GCC* 9.3.0 (for Ubuntu* 20.04) * CMake* version 3.10 or higher > **NOTE**: For building samples from the open-source version of OpenVINO™ toolkit, see the [build instructions on GitHub](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). @@ -102,7 +102,7 @@ You can also build the sample applications manually: ```sh mkdir build ``` -> **NOTE**: If you ran the Image Classification verification script during the installation, the C++ samples build directory was already created in your home directory: `~/inference_engine_samples_build/` +> **NOTE**: If you ran the Image Classification verification script during the installation, the C++ samples build directory was already created in your home directory: `~/inference_engine_cpp_samples_build/` 2. Go to the created directory: ```sh @@ -130,22 +130,17 @@ for the debug configuration — in `/intel64/Debug/`. The recommended Windows* build environment is the following: * Microsoft Windows* 10 -* Microsoft Visual Studio* 2017, or 2019 +* Microsoft Visual Studio* 2019 * CMake* version 3.10 or higher -> **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14. +> **NOTE**: If you want to use Microsoft Visual Studio 2019, you are required to install CMake 3.14 or higher. To build the C or C++ sample applications on Windows, go to the `\samples\c` or `\samples\cpp` directory, respectively, and run the `build_samples_msvc.bat` batch file: ```sh build_samples_msvc.bat ``` -By default, the script automatically detects the highest Microsoft Visual Studio version installed on the machine and uses it to create and build -a solution for a sample code. Optionally, you can also specify the preferred Microsoft Visual Studio version to be used by the script. Supported -versions are `VS2017` and `VS2019`. For example, to build the C++ samples using the Microsoft Visual Studio 2017, use the following command: -```sh -\samples\cpp\build_samples_msvc.bat VS2017 -``` +By default, the script automatically detects the highest Microsoft Visual Studio version installed on the machine and uses it to create and build a solution for a sample code Once the build is completed, you can find sample binaries in the following folders: * C samples: `C:\Users\\Documents\Intel\OpenVINO\inference_engine_c_samples_build\intel64\Release` @@ -159,7 +154,7 @@ directory. The officially supported macOS* build environment is the following: -* macOS* 10.15 64-bit +* macOS* 10.15 64-bit or higher * Clang* compiler from Xcode* 10.1 or higher * CMake* version 3.13 or higher @@ -180,7 +175,7 @@ You can also build the sample applications manually: > **NOTE**: Before proceeding, make sure you have OpenVINO™ environment set correctly. This can be done manually by ```sh -cd /bin +cd / source setupvars.sh ``` @@ -188,7 +183,7 @@ source setupvars.sh ```sh mkdir build ``` -> **NOTE**: If you ran the Image Classification verification script during the installation, the C++ samples build directory was already created in your home directory: `~/inference_engine_samples_build/` +> **NOTE**: If you ran the Image Classification verification script during the installation, the C++ samples build directory was already created in your home directory: `~/inference_engine_cpp_samples_build/` 2. Go to the created directory: ```sh @@ -217,7 +212,7 @@ for the debug configuration — in `/intel64/Debug/`. ### Get Ready for Running the Sample Applications on Linux* Before running compiled binary files, make sure your application can find the -Inference Engine and OpenCV libraries. +OpenVINO Runtime libraries. Run the `setupvars` script to set all necessary environment variables: ```sh source /setupvars.sh @@ -246,7 +241,7 @@ list above. ### Get Ready for Running the Sample Applications on Windows* Before running compiled binary files, make sure your application can find the -Inference Engine and OpenCV libraries. +OpenVINO Runtime libraries. Use the `setupvars` script, which sets all necessary environment variables: ```sh \setupvars.bat @@ -255,13 +250,13 @@ Use the `setupvars` script, which sets all necessary environment variables: To debug or run the samples on Windows in Microsoft Visual Studio, make sure you have properly configured **Debugging** environment settings for the **Debug** and **Release** configurations. Set correct paths to the OpenCV libraries, and -debug and release versions of the Inference Engine libraries. +debug and release versions of the OpenVINO Runtime libraries. For example, for the **Debug** configuration, go to the project's **Configuration Properties** to the **Debugging** category and set the `PATH` variable in the **Environment** field to the following: ```sh -PATH=\runtime\bin;\opencv\bin;%PATH% +PATH=\runtime\bin;%PATH% ``` where `` is the directory in which the OpenVINO toolkit is installed. diff --git a/docs/OV_Runtime_UG/ShapeInference.md b/docs/OV_Runtime_UG/ShapeInference.md index 4f836e23710..1c50659b262 100644 --- a/docs/OV_Runtime_UG/ShapeInference.md +++ b/docs/OV_Runtime_UG/ShapeInference.md @@ -166,7 +166,7 @@ To feed input data of a shape that is different from the model input shape, resh Once the input shape of IENetwork is set, call the `IECore.load_network` method to get an ExecutableNetwork object for inference with updated shapes. -There are other approaches to reshape the model during the stage of IR generation or [nGraph function](https://docs.openvino.ai/latest/openvino_docs_nGraph_DG_PythonAPI.html#create_an_ngraph_function_from_a_graph) creation. +There are other approaches to reshape the model during the stage of IR generation or [OpenVINO model](https://docs.openvino.ai/latest/openvino_docs_nGraph_DG_PythonAPI.html#create_an_ngraph_function_from_a_graph) creation. Practically, some models are not ready to be reshaped. In this case, a new input shape cannot be set with the Model Optimizer or the `IENetwork.reshape` method. diff --git a/docs/OV_Runtime_UG/protecting_model_guide.md b/docs/OV_Runtime_UG/protecting_model_guide.md index c5d891e70cf..222bdb90ffc 100644 --- a/docs/OV_Runtime_UG/protecting_model_guide.md +++ b/docs/OV_Runtime_UG/protecting_model_guide.md @@ -16,22 +16,22 @@ This guide demonstrates how to use OpenVINO securely with protected models. After a model is optimized by the OpenVINO Model Optimizer, it's deployed to target devices in the Intermediate Representation (IR) format. An optimized -model is stored on an edge device and executed by the Inference Engine. -(ONNX and nGraph models can also be read natively by the Inference Engine.) +model is stored on an edge device and executed by the OpenVINO Runtime. +(ONNX, PDPD models can also be read natively by the OpenVINO Runtime.) To protect deep-learning models, you can encrypt an optimized model before deploying it to the edge device. The edge device should keep the stored model protected at all times and have the model decrypted **in runtime only** for use -by the Inference Engine. +by the OpenVINO Runtime. ![deploy_encrypted_model](img/deploy_encrypted_model.png) ## Loading Encrypted Models -The OpenVINO Inference Engine requires model decryption before loading. Allocate +The OpenVINO Runtime requires model decryption before loading. Allocate a temporary memory block for model decryption and use the -`InferenceEngine::Core::ReadNetwork` method to load the model from a memory buffer. -For more information, see the `InferenceEngine::Core` Class Reference Documentation. +`ov::Core::read_model` method to load the model from a memory buffer. +For more information, see the `ov::Core` Class Reference Documentation. @snippet snippets/protecting_model_guide.cpp part0 @@ -40,12 +40,12 @@ Hardware-based protection such as Intel® Software Guard Extensions bind them to a device. For more information, go to [Intel® Software Guard Extensions](https://software.intel.com/en-us/sgx). -Use `InferenceEngine::Core::ReadNetwork()` to set model representations and +Use `ov::Core::read_model` to set model representations and weights respectively. Currently there is no way to read external weights from memory for ONNX models. -The `ReadNetwork(const std::string& model, const Blob::CPtr& weights)` function -should be called with `weights` passed as an empty `Blob`. +The `ov::Core::read_model(const std::string& model, const Tensor& weights)` method +should be called with `weights` passed as an empty `ov::Tensor`. @snippet snippets/protecting_model_guide.cpp part1 @@ -55,6 +55,6 @@ should be called with `weights` passed as an empty `Blob`. - OpenVINO™ toolkit online documentation: [https://docs.openvino.ai](https://docs.openvino.ai) - Model Optimizer Developer Guide: [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) - [OpenVINO™ runTime User Guide](openvino_intro.md) -- For more information on Sample Applications, see the [Inference Engine Samples Overview](Samples_Overview.md) +- For more information on Sample Applications, see the [OpenVINO Samples Overview](Samples_Overview.md) - For information on a set of pre-trained models, see the [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel) - For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/snippets/protecting_model_guide.cpp b/docs/snippets/protecting_model_guide.cpp index bcf6856023f..0f148e76167 100644 --- a/docs/snippets/protecting_model_guide.cpp +++ b/docs/snippets/protecting_model_guide.cpp @@ -1,7 +1,8 @@ -#include #include #include +#include "openvino/runtime/core.hpp" + void decrypt_file(std::ifstream & stream, const std::string & pass, std::vector & result) { @@ -9,24 +10,22 @@ void decrypt_file(std::ifstream & stream, int main() { //! [part0] -std::vector model; -std::vector weights; +std::vector model_data, weights_data; std::string password; // taken from an user std::ifstream model_file("model.xml"), weights_file("model.bin"); // Read model files and decrypt them into temporary memory block -decrypt_file(model_file, password, model); -decrypt_file(weights_file, password, weights); +decrypt_file(model_file, password, model_data); +decrypt_file(weights_file, password, weights_data); //! [part0] //! [part1] -InferenceEngine::Core core; +ov::Core core; // Load model from temporary memory block -std::string strModel(model.begin(), model.end()); -InferenceEngine::CNNNetwork network = core.ReadNetwork(strModel, - InferenceEngine::make_shared_blob({InferenceEngine::Precision::U8, - {weights.size()}, InferenceEngine::C}, weights.data())); +std::string str_model(model_data.begin(), model_data.end()); +auto model = core.read_model(str_model, + ov::Tensor(ov::element::u8, {weights_data.size()}, weights_data.data())); //! [part1] return 0; diff --git a/samples/cpp/ngraph_function_creation_sample/CMakeLists.txt b/samples/cpp/model_creation_sample/CMakeLists.txt similarity index 52% rename from samples/cpp/ngraph_function_creation_sample/CMakeLists.txt rename to samples/cpp/model_creation_sample/CMakeLists.txt index c96ea7fae49..5c9d64db6aa 100644 --- a/samples/cpp/ngraph_function_creation_sample/CMakeLists.txt +++ b/samples/cpp/model_creation_sample/CMakeLists.txt @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # -set(TARGET_NAME "ngraph_function_creation_sample") +set(TARGET_NAME "model_creation_sample") -ie_add_sample(NAME ngraph_function_creation_sample +ie_add_sample(NAME model_creation_sample SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp" - HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/ngraph_function_creation_sample.hpp" + HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/model_creation_sample.hpp" DEPENDENCIES format_reader ie_samples_utils) diff --git a/samples/cpp/ngraph_function_creation_sample/README.md b/samples/cpp/model_creation_sample/README.md similarity index 78% rename from samples/cpp/ngraph_function_creation_sample/README.md rename to samples/cpp/model_creation_sample/README.md index 390d522a13b..e0edc2c3625 100644 --- a/samples/cpp/ngraph_function_creation_sample/README.md +++ b/samples/cpp/model_creation_sample/README.md @@ -1,8 +1,8 @@ -# nGraph Function Creation C++ Sample {#openvino_inference_engine_samples_ngraph_function_creation_sample_README} +# Model Creation C++ Sample {#openvino_inference_engine_samples_model_creation_sample_README} This sample demonstrates how to execute an synchronous inference using [model](../../../docs/OV_Runtime_UG/model_representation.md) built on the fly which uses weights from LeNet classification model, which is known to work well on digit classification tasks. -You do not need an XML file to create a model. The API of ngraph::Function allows creating a model on the fly from the source code. +You do not need an XML file to create a model. The API of ov::Model allows creating a model on the fly from the source code. The following C++ API is used in the application: @@ -13,7 +13,7 @@ The following C++ API is used in the application: | Tensor Operations | `ov::Tensor::get_byte_size`, `ov::Tensor:data` | Get tensor byte size and its data | | Model Operations | `ov::set_batch` | Operate with model batch size | | Infer Request Operations | `ov::InferRequest::get_input_tensor` | Get a input tensor | -| nGraph Functions | `ov::opset8::Parameter`, `ov::Node::output`, `ov::opset8::Constant`, `ov::opset8::Convolution`, `ov::opset8::Add`, `ov::opset1::MaxPool`, `ov::opset8::Reshape`, `ov::opset8::MatMul`, `ov::opset8::Relu`, `ov::opset8::Softmax`, `ov::descriptor::Tensor::set_names`, `ov::opset8::Result`, `ov::Model`, `ov::ParameterVector::vector` | Used to construct an nGraph function | +| Model creation objects | `ov::opset8::Parameter`, `ov::Node::output`, `ov::opset8::Constant`, `ov::opset8::Convolution`, `ov::opset8::Add`, `ov::opset1::MaxPool`, `ov::opset8::Reshape`, `ov::opset8::MatMul`, `ov::opset8::Relu`, `ov::opset8::Softmax`, `ov::descriptor::Tensor::set_names`, `ov::opset8::Result`, `ov::Model`, `ov::ParameterVector::vector` | Used to construct an OpenVINO model | Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](../hello_classification/README.md). @@ -23,7 +23,7 @@ Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](.. | Model Format | model weights file (\*.bin) | | Validated images | single-channel `MNIST ubyte` images | | Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) | -| Other language realization | [Python](../../../samples/python/ngraph_function_creation_sample/README.md) | +| Other language realization | [Python](../../../samples/python/model_creation_sample/README.md) | ## How It Works @@ -42,7 +42,7 @@ To build the sample, please use instructions available at [Build the Sample Appl ## Running ``` -ngraph_function_creation_sample +model_creation_sample ``` > **NOTES**: @@ -56,7 +56,7 @@ ngraph_function_creation_sample You can do inference of an image using a pre-trained model on a GPU using the following command: ``` -ngraph_function_creation_sample lenet.bin GPU +model_creation_sample lenet.bin GPU ``` ## Sample Output @@ -176,10 +176,6 @@ classid probability label -*Starting with the OpenVINO™ toolkit 2020.2 release, all of the features previously available through nGraph have been merged into the OpenVINO™ toolkit. As a result, all the features previously available through ONNX RT Execution Provider for nGraph have been merged with ONNX RT Execution Provider for OpenVINO™ toolkit.* - -*Therefore, ONNX RT Execution Provider for nGraph will be deprecated starting June 1, 2020 and will be completely removed on December 1, 2020. Users are recommended to migrate to the ONNX RT Execution Provider for OpenVINO™ toolkit as the unified solution for all AI inferencing on Intel® hardware.* - ## See Also - [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) diff --git a/samples/cpp/ngraph_function_creation_sample/lenet.bin b/samples/cpp/model_creation_sample/lenet.bin similarity index 100% rename from samples/cpp/ngraph_function_creation_sample/lenet.bin rename to samples/cpp/model_creation_sample/lenet.bin diff --git a/samples/cpp/ngraph_function_creation_sample/lenet.labels b/samples/cpp/model_creation_sample/lenet.labels similarity index 100% rename from samples/cpp/ngraph_function_creation_sample/lenet.labels rename to samples/cpp/model_creation_sample/lenet.labels diff --git a/samples/cpp/ngraph_function_creation_sample/main.cpp b/samples/cpp/model_creation_sample/main.cpp similarity index 98% rename from samples/cpp/ngraph_function_creation_sample/main.cpp rename to samples/cpp/model_creation_sample/main.cpp index cb552cdf76c..1ae6db0554c 100644 --- a/samples/cpp/ngraph_function_creation_sample/main.cpp +++ b/samples/cpp/model_creation_sample/main.cpp @@ -21,7 +21,7 @@ #include "samples/classification_results.h" #include "samples/slog.hpp" -#include "ngraph_function_creation_sample.hpp" +#include "model_creation_sample.hpp" // clang-format on constexpr auto N_TOP_RESULTS = 1; @@ -214,10 +214,7 @@ std::shared_ptr create_model(const std::string& path_to_weights) { } /** - * @brief The entry point for inference engine automatic ov::Model - * creation sample - * @file ngraph_function_creation_sample/main.cpp - * @example ngraph_function_creation_sample/main.cpp + * @brief The entry point for OpenVINO ov::Model creation sample */ int main(int argc, char* argv[]) { try { diff --git a/samples/cpp/ngraph_function_creation_sample/ngraph_function_creation_sample.hpp b/samples/cpp/model_creation_sample/model_creation_sample.hpp similarity index 100% rename from samples/cpp/ngraph_function_creation_sample/ngraph_function_creation_sample.hpp rename to samples/cpp/model_creation_sample/model_creation_sample.hpp diff --git a/samples/python/ngraph_function_creation_sample/README.md b/samples/python/model_creation_sample/README.md similarity index 91% rename from samples/python/ngraph_function_creation_sample/README.md rename to samples/python/model_creation_sample/README.md index 22979415726..d50cbf20a8e 100644 --- a/samples/python/ngraph_function_creation_sample/README.md +++ b/samples/python/model_creation_sample/README.md @@ -1,13 +1,13 @@ -# nGraph Function Creation Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_ngraph_function_creation_sample_README} +# Model Creation Python* Sample {#openvino_inference_engine_ie_bridges_python_sample_model_creation_sample_README} -This sample demonstrates how to run inference using a [model](../../../docs/OV_Runtime_UG/model_representation.md) built on the fly that uses weights from the LeNet classification model, which is known to work well on digit classification tasks. You do not need an XML file, the model is created from the source code on the fly. +This sample demonstrates how to run inference using a [model](../../../docs/OV_Runtime_UG/model_representation.md) built on the fly that uses weights from the LeNet classification model, which is known to work well on digit classification tasks. You do not need an XML file, the model is created from the source code on the fly. -The following Python API is used in the application: +The following OpenVINO Python API is used in the application: | Feature | API | Description | | :--------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------ | | Model Operations | [openvino.runtime.Model], [openvino.runtime.set_batch], [openvino.runtime.Model.input] | Managing of model | -| nGraph Functions | [openvino.runtime.op.Parameter], [openvino.runtime.op.Constant], [openvino.runtime.opset8.convolution], [openvino.runtime.opset8.add], [openvino.runtime.opset1.max_pool], [openvino.runtime.opset8.reshape], [openvino.runtime.opset8.matmul], [openvino.runtime.opset8.relu], [openvino.runtime.opset8.softmax] | Description of a model topology using nGraph Python API | +| Opset operations | [openvino.runtime.op.Parameter], [openvino.runtime.op.Constant], [openvino.runtime.opset8.convolution], [openvino.runtime.opset8.add], [openvino.runtime.opset1.max_pool], [openvino.runtime.opset8.reshape], [openvino.runtime.opset8.matmul], [openvino.runtime.opset8.relu], [openvino.runtime.opset8.softmax] | Description of a model topology using OpenVINO Python API | Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample](../hello_classification/README.md). @@ -16,7 +16,7 @@ Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample | Validated Models | LeNet | | Model Format | Model weights file (\*.bin) | | Supported devices | [All](../../../docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md) | -| Other language realization | [C++](../../../samples/cpp/ngraph_function_creation_sample/README.md) | +| Other language realization | [C++](../../../samples/cpp/model_creation_sample/README.md) | ## How It Works @@ -35,7 +35,7 @@ each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_wi To run the sample, you need to specify model weights and device. ``` -python ngraph_function_creation_sample.py +python model_creation_sample.py ``` > **NOTE**: @@ -49,7 +49,7 @@ python ngraph_function_creation_sample.py For example: ``` -python ngraph_function_creation_sample.py lenet.bin GPU +python model_creation_sample.py lenet.bin GPU ``` ## Sample Output diff --git a/samples/python/ngraph_function_creation_sample/data.py b/samples/python/model_creation_sample/data.py similarity index 100% rename from samples/python/ngraph_function_creation_sample/data.py rename to samples/python/model_creation_sample/data.py diff --git a/samples/python/ngraph_function_creation_sample/lenet.bin b/samples/python/model_creation_sample/lenet.bin similarity index 100% rename from samples/python/ngraph_function_creation_sample/lenet.bin rename to samples/python/model_creation_sample/lenet.bin diff --git a/samples/python/ngraph_function_creation_sample/ngraph_function_creation_sample.py b/samples/python/model_creation_sample/model_creation_sample.py similarity index 100% rename from samples/python/ngraph_function_creation_sample/ngraph_function_creation_sample.py rename to samples/python/model_creation_sample/model_creation_sample.py diff --git a/src/core/include/openvino/op/parameter.hpp b/src/core/include/openvino/op/parameter.hpp index 0a5bf1e180b..b6ede9bcd8d 100644 --- a/src/core/include/openvino/op/parameter.hpp +++ b/src/core/include/openvino/op/parameter.hpp @@ -10,11 +10,11 @@ namespace ov { namespace op { namespace v0 { -/// \brief A function parameter. +/// \brief A model parameter. /// /// Parameters are nodes that represent the arguments that will be passed to -/// user-defined functions. Function creation requires a sequence of parameters. -/// Basic graph operations do not need parameters attached to a function. +/// user-defined models. Model creation requires a sequence of parameters. +/// Basic graph operations do not need parameters attached to a model. class OPENVINO_API Parameter : public op::Op { public: OPENVINO_OP("Parameter", "opset1"); diff --git a/src/frontends/ir/src/ir_deserializer.hpp b/src/frontends/ir/src/ir_deserializer.hpp index 01cfaf6c070..76c4213cc5e 100644 --- a/src/frontends/ir/src/ir_deserializer.hpp +++ b/src/frontends/ir/src/ir_deserializer.hpp @@ -187,7 +187,7 @@ private: std::unordered_map>& m_variables; /// - /// store information about parameters/results order during function creation + /// store information about parameters/results order during a model creation /// it will be used during Inputs/Outputs Description creation in SubGraph processing /// IoMap io_map; From 3f4e384d5d69d2e8a6ed84cdb5e3ba7ae3bf4c4e Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 22 Feb 2022 23:05:23 +0300 Subject: [PATCH 082/310] Disable reshape for new API (#10064) * Disable reshape for new API * Update cnn_network_ngraph_impl.cpp Co-authored-by: Ilya Lavrenov --- src/inference/src/cnn_network_ngraph_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/src/cnn_network_ngraph_impl.cpp b/src/inference/src/cnn_network_ngraph_impl.cpp index cbea1f4b443..777d80bc0d8 100644 --- a/src/inference/src/cnn_network_ngraph_impl.cpp +++ b/src/inference/src/cnn_network_ngraph_impl.cpp @@ -441,7 +441,7 @@ void CNNNetworkNGraphImpl::reshape(const std::map specialized_ngraph_function = nullptr; - if (outputs_are_static) { + if (outputs_are_static || _new_api) { specialized_ngraph_function = _ngraph_function; } else { specialized_ngraph_function = ngraph::clone_function(*_ngraph_function); From 817550fa0a115b9468cf62630a19dbd514385e4a Mon Sep 17 00:00:00 2001 From: Vladimir Dudnik Date: Tue, 22 Feb 2022 23:37:55 +0300 Subject: [PATCH 083/310] [OMZ] update OMZ submodule, docs updated (#10594) * update OMZ submodule, docs updated * rebase to master --- thirdparty/open_model_zoo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index d4f504bf6f9..2792b159890 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit d4f504bf6f96647c0a5727bf69cbfa2721a8717d +Subproject commit 2792b159890a6c7bf0b4059f32f96439c776848d From 75cca1e9e97fc9ab657fabd09f6938077d35b3e5 Mon Sep 17 00:00:00 2001 From: Fedor Zharinov Date: Wed, 23 Feb 2022 01:30:08 +0300 Subject: [PATCH 084/310] [benchamrk_app] error if -b is set but there's no batch info (#10592) * Added code showing error message if -b is provided, but got no batch info for inputs * stylefix / batch>1 case --- samples/cpp/benchmark_app/utils.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp index b9384303859..f943d637afa 100644 --- a/samples/cpp/benchmark_app/utils.cpp +++ b/samples/cpp/benchmark_app/utils.cpp @@ -451,6 +451,7 @@ std::vector get_inputs_info(const std::string& shape_ for (size_t i = 0; i < min_size; ++i) { benchmark_app::InputsInfo info_map; + bool is_there_at_least_one_batch_dim = false; for (auto& item : input_info) { benchmark_app::InputInfo info; auto name = item.get_any_name(); @@ -602,6 +603,7 @@ std::vector get_inputs_info(const std::string& shape_ } info.dataShape[batch_index] = batch_size; reshape_required = true; + is_there_at_least_one_batch_dim = true; } } else { slog::warn << "Input '" << item.get_any_name() @@ -612,6 +614,12 @@ std::vector get_inputs_info(const std::string& shape_ info_map[name] = info; } + if (batch_size > 1 && !is_there_at_least_one_batch_dim) { + throw std::runtime_error("-b option is provided in command line, but there's no inputs with batch(B) " + "dimension in input layout, so batch cannot be set. " + "You may specify layout explicitly using -layout option."); + } + // Update scale and mean std::map> scale_map = parse_scale_or_mean(scale_string, info_map); std::map> mean_map = parse_scale_or_mean(mean_string, info_map); From 7ff8ada805f9526ac76d8d6d80f8896efc1b0985 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Wed, 23 Feb 2022 06:29:03 +0300 Subject: [PATCH 085/310] Fixed API for transformations (#10584) * Fixed API for transformations * Fixed code style * Fixed build * Fixed typo --- docs/IE_PLUGIN_DG/Intro.md | 2 +- docs/IE_PLUGIN_DG/Plugin.md | 18 ++-- docs/IE_PLUGIN_DG/PluginTesting.md | 10 +- docs/OV_Runtime_UG/model_representation.md | 2 +- docs/OV_Runtime_UG/openvino_temporary.md | 2 +- ...ransformation.md => ov_transformations.md} | 78 ++++----------- docs/template_plugin/src/template_plugin.cpp | 4 +- .../template_function_transformation.hpp | 24 ----- ....cpp => template_model_transformation.cpp} | 15 ++- .../template_model_transformation.hpp | 24 +++++ .../template_pattern_transformation.cpp | 96 +++++++++---------- .../template_pattern_transformation.hpp | 14 +-- .../include/openvino/cc/ngraph/itt.hpp | 34 +------ .../include/openvino/cc/pass/itt.hpp | 46 +++++++++ src/core/include/ngraph/rt_info.hpp | 17 +--- src/core/include/openvino/core/rt_info.hpp | 28 ++++++ src/core/src/rt_info.cpp | 25 +++-- 17 files changed, 209 insertions(+), 230 deletions(-) rename docs/OV_Runtime_UG/{nGraphTransformation.md => ov_transformations.md} (82%) delete mode 100644 docs/template_plugin/src/transformations/template_function_transformation.hpp rename docs/template_plugin/src/transformations/{template_function_transformation.cpp => template_model_transformation.cpp} (72%) create mode 100644 docs/template_plugin/src/transformations/template_model_transformation.hpp create mode 100644 src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp create mode 100644 src/core/include/openvino/core/rt_info.hpp diff --git a/docs/IE_PLUGIN_DG/Intro.md b/docs/IE_PLUGIN_DG/Intro.md index 17228b12115..99cbffbec9c 100644 --- a/docs/IE_PLUGIN_DG/Intro.md +++ b/docs/IE_PLUGIN_DG/Intro.md @@ -56,7 +56,7 @@ Detailed guides * Plugin and its components [testing](@ref openvino_docs_ie_plugin_dg_plugin_testing) * [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks) * [Low precision transformations](@ref openvino_docs_IE_DG_lpt) guide -* [Writing nGraph transformations](@ref ngraph_transformation) guide +* [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide API References ----------------------- diff --git a/docs/IE_PLUGIN_DG/Plugin.md b/docs/IE_PLUGIN_DG/Plugin.md index f92a03021a8..410bed856c9 100644 --- a/docs/IE_PLUGIN_DG/Plugin.md +++ b/docs/IE_PLUGIN_DG/Plugin.md @@ -30,7 +30,7 @@ Based on that, declaration of a plugin class can look as follows: The provided plugin class also has several fields: -* `_backend` - a backend engine that is used to perform actual computations for network inference. For `Template` plugin `ngraph::runtime::Backend` is used which performs computations using ngraph reference implementations. +* `_backend` - a backend engine that is used to perform actual computations for network inference. For `Template` plugin `ngraph::runtime::Backend` is used which performs computations using OpenVINO™ reference implementations. * `_waitExecutor` - a task executor that waits for a response from a device about device tasks completion. * `_cfg` of type `Configuration`: @@ -67,7 +67,7 @@ which holds a backend-dependent compiled graph in an internal representation: Before a creation of an `ExecutableNetwork` instance via a constructor, a plugin may check if a provided InferenceEngine::ICNNNetwork object is supported by a device. In the example above, the plugin checks precision information. -The very important part before creation of `ExecutableNetwork` instance is to call `TransformNetwork` method which applies ngraph transformation passes. +The very important part before creation of `ExecutableNetwork` instance is to call `TransformNetwork` method which applies OpenVINO™ transformation passes. Actual graph compilation is done in the `ExecutableNetwork` constructor. Refer to the [ExecutableNetwork Implementation Guide](@ref openvino_docs_ie_plugin_dg_executable_network) for details. @@ -77,27 +77,27 @@ Actual graph compilation is done in the `ExecutableNetwork` constructor. Refer t ### `TransformNetwork()` -The function accepts a const shared pointer to `ngraph::Function` object and performs the following steps: +The function accepts a const shared pointer to `ov::Model` object and performs the following steps: 1. Deep copies a const object to a local object, which can later be modified. -2. Applies common and plugin-specific transformations on a copied graph to make the graph more friendly to hardware operations. For details how to write custom plugin-specific transformation, please, refer to [Writing ngraph transformations](@ref ngraph_transformation) guide. See detailed topics about network representation: +2. Applies common and plugin-specific transformations on a copied graph to make the graph more friendly to hardware operations. For details how to write custom plugin-specific transformation, please, refer to [Writing OpenVINO™ transformations](@ref openvino_docs_transformations) guide. See detailed topics about network representation: * [Intermediate Representation and Operation Sets](../_docs_MO_DG_IR_and_opsets.html) * [Quantized networks](@ref openvino_docs_ie_plugin_dg_quantized_networks). @snippet template_plugin/src/template_plugin.cpp plugin:transform_network -> **NOTE**: After all these transformations, a `ngraph::Function` object contains operations which can be perfectly mapped to backend kernels. E.g. if backend has kernel computing `A + B` operations at once, the `TransformNetwork` function should contain a pass which fuses operations `A` and `B` into a single custom operation `A + B` which fits backend kernels set. +> **NOTE**: After all these transformations, a `ov::Model` object contains operations which can be perfectly mapped to backend kernels. E.g. if backend has kernel computing `A + B` operations at once, the `TransformNetwork` function should contain a pass which fuses operations `A` and `B` into a single custom operation `A + B` which fits backend kernels set. ### `QueryNetwork()` Use the method with the `HETERO` mode, which allows to distribute network execution between different -devices based on the `ngraph::Node::get_rt_info()` map, which can contain the `"affinity"` key. +devices based on the `ov::Node::get_rt_info()` map, which can contain the `"affinity"` key. The `QueryNetwork` method analyzes operations of provided `network` and returns a list of supported -operations via the InferenceEngine::QueryNetworkResult structure. The `QueryNetwork` firstly applies `TransformNetwork` passes to input `ngraph::Function` argument. After this, the transformed network in ideal case contains only operations are 1:1 mapped to kernels in computational backend. In this case, it's very easy to analyze which operations is supposed (`_backend` has a kernel for such operation or extensions for the operation is provided) and not supported (kernel is missed in `_backend`): +operations via the InferenceEngine::QueryNetworkResult structure. The `QueryNetwork` firstly applies `TransformNetwork` passes to input `ov::Model` argument. After this, the transformed network in ideal case contains only operations are 1:1 mapped to kernels in computational backend. In this case, it's very easy to analyze which operations is supposed (`_backend` has a kernel for such operation or extensions for the operation is provided) and not supported (kernel is missed in `_backend`): -1. Store original names of all operations in input `ngraph::Function` +1. Store original names of all operations in input `ov::Model` 2. Apply `TransformNetwork` passes. Note, the names of operations in a transformed network can be different and we need to restore the mapping in the steps below. -3. Construct `supported` and `unsupported` maps which contains names of original operations. Note, that since the inference is performed using ngraph reference backend, the decision whether the operation is supported or not depends on whether the latest OpenVINO opset contains such operation. +3. Construct `supported` and `unsupported` maps which contains names of original operations. Note, that since the inference is performed using OpenVINO™ reference backend, the decision whether the operation is supported or not depends on whether the latest OpenVINO opset contains such operation. 4. `QueryNetworkResult.supportedLayersMap` contains only operations which are fully supported by `_backend`. @snippet template_plugin/src/template_plugin.cpp plugin:query_network diff --git a/docs/IE_PLUGIN_DG/PluginTesting.md b/docs/IE_PLUGIN_DG/PluginTesting.md index a5d75cd21b8..9ed3fa8911f 100644 --- a/docs/IE_PLUGIN_DG/PluginTesting.md +++ b/docs/IE_PLUGIN_DG/PluginTesting.md @@ -26,7 +26,7 @@ Engine concepts: plugin creation, multiple executable networks support, multiple @snippet single_layer_tests/convolution.cpp test_convolution:instantiate 3. **Sub-graph tests** (`subgraph_tests` sub-folder). This group of tests is designed to tests small patterns or combination of layers. E.g. when a particular topology is being enabled in a plugin e.g. TF ResNet-50, there is no need to add the whole topology to test tests. In opposite way, a particular repetitive subgraph or pattern can be extracted from `ResNet-50` and added to the tests. The instantiation of the sub-graph tests is done in the same way as for single layer tests. -> **Note**, such sub-graphs or patterns for sub-graph tests should be added to `IE::ngraphFunctions` library first (this library is a pre-defined set of small `ngraph::Function`) and re-used in sub-graph tests after. +> **Note**, such sub-graphs or patterns for sub-graph tests should be added to `IE::ngraphFunctions` library first (this library is a pre-defined set of small `ov::Model`) and re-used in sub-graph tests after. 4. **HETERO tests** (`subgraph_tests` sub-folder) contains tests for `HETERO` scenario (manual or automatic affinities settings, tests for `QueryNetwork`). @@ -41,18 +41,18 @@ To use these tests for your own plugin development, link the `IE::funcSharedTest To build test binaries together with other build artifacts, use the `make all` command. For details, see [Build Plugin Using CMake*](@ref openvino_docs_ie_plugin_dg_plugin_build). -### Tests for plugin-specific ngraph transformations +### Tests for plugin-specific OpenVINO™ transformations -Please, refer to [Transformation testing](@ref ngraph_transformation) guide. +Please, refer to [Transformation testing](@ref openvino_docs_transformations) guide. ### How to Extend Inference Engine Plugin Tests Inference Engine Plugin tests are open for contribution. Add common test case definitions applicable for all plugins to the `IE::funcSharedTests` target within the DLDT repository. Then, any other plugin supporting corresponding functionality can instantiate the new test. -All Inference Engine per-layer tests check test layers functionality. They are developed using nGraph functions +All Inference Engine per-layer tests check test layers functionality. They are developed using ov::Models as input graphs used by tests. In this case, to test a new layer with layer tests, extend -the `IE::ngraphFunctions` library, which is also included in the Inference Engine Developer package, with a new nGraph function +the `IE::ngraphFunctions` library, which is also included in the Inference Engine Developer package, with a new OpenVINO™ Model including the corresponding operation. > **NOTE**: When implementing a new subgraph test, add new single-layer tests for each operation of the subgraph if such test does not exist. diff --git a/docs/OV_Runtime_UG/model_representation.md b/docs/OV_Runtime_UG/model_representation.md index 91202aeb8c8..eedf4291104 100644 --- a/docs/OV_Runtime_UG/model_representation.md +++ b/docs/OV_Runtime_UG/model_representation.md @@ -83,7 +83,7 @@ The following code creates a model with several outputs: @snippet example_ngraph_utils.cpp ov:serialize ### How can I develop my own transformation pass? - See the [Transformations Developer Guide](./nGraphTransformation.md). + See the [Transformations Developer Guide](./ov_transformations.md). ## See Also diff --git a/docs/OV_Runtime_UG/openvino_temporary.md b/docs/OV_Runtime_UG/openvino_temporary.md index df5aae0f7d4..ed102170cac 100644 --- a/docs/OV_Runtime_UG/openvino_temporary.md +++ b/docs/OV_Runtime_UG/openvino_temporary.md @@ -12,7 +12,7 @@ openvino_docs_IE_DG_Model_caching_overview openvino_docs_IE_DG_Int8Inference openvino_docs_IE_DG_Bfloat16Inference - ngraph_transformation + openvino_docs_transformations openvino_docs_IE_DG_Extensibility_DG_Intro @endsphinxdirective diff --git a/docs/OV_Runtime_UG/nGraphTransformation.md b/docs/OV_Runtime_UG/ov_transformations.md similarity index 82% rename from docs/OV_Runtime_UG/nGraphTransformation.md rename to docs/OV_Runtime_UG/ov_transformations.md index 39fb3606e5a..14a65c34d4c 100644 --- a/docs/OV_Runtime_UG/nGraphTransformation.md +++ b/docs/OV_Runtime_UG/ov_transformations.md @@ -1,84 +1,42 @@ -# Overview of Transformations API {#ngraph_transformation} +# Overview of Transformations API {#openvino_docs_transformations} -This guide contains all necessary information that you need to start implementing nGraph transformations. - -## Prerequisites -Before creating a transformation, do the following: - -* Make sure that there is no transformation with the same functionality in the [Transformation Library](group__ie__transformation__api.html) -* Learn how the [Transformation Library](group__ie__transformation__api.html) is structured and how transformations are organized -* Understand where to put your transformation code - -### Transformation Library Structure -OpenVINO transformations are located in the `src/common/transformations` directory. - -Transformations root directory contains two folders: -* `ngraph_ops` - Contains internal opset operations that are common for plugins. -* `transformations` - Includes all transformations, utils, runtime info attributes, and pass managers. - -All internal operations and transformations located inside the [Transformation Library](group__ie__transformation__api.html) can be used inside plugins. -All legacy operations and transformations were moved to a legacy library and are not recommended to be used. - -### Transformation Flow Layers -Transformation flow in the transformation library has several layers: - -1. Pass managers - Execute any type of transformations and provide additional debug capabilities. -2. Transformations - Perform a particular transformation algorithm on `ngraph::Function`. -3. Low-level functions - Take a set of nodes and perform some transformation action. -They are not mandatory and all transformation code can be located inside the transformation. -But if some transformation parts can potentially be reused in other transformations, we suggest keeping them as separate functions. - -### Location for Your Transformation Code -To decide where to store your transformation code, please follow these rules: - -1. If it is a plugin-specific transformation and cannot be reused by other plugins, keep source code inside plugin. -2. If this transformation relates to opset operation conversion or optimization, keep sources inside the transformation library. - -After you decide where to store your transformation code, you can start developing your own nGraph transformation. - -## ngraph::Function and graph representation - -nGraph function is a very simple thing: it stores shared pointers to `ngraph::op::Parameter`, `ngraph::op::Result` and `ngraph::op::Sink` operations that are inputs, outputs and sinks of the graph. -Sinks of the graph have no consumers and not included into results vector. All other operations hold each other via shared pointers: child operation holds its parent (hard link). If operation has no consumers and it's not Result or Sink operation -(shared pointer counter is zero) then it will be destructed and won't be accessible anymore. Each operation in `ngraph::Function` has a `std::shared_ptr` type. - -For examples of how to build an nGraph function, see the [Build nGraph Function](./model_representation.md) page. +This guide contains all necessary information that you need to start implementing OpenVINO™ transformations. ## Transformations types -nGraph has three main transformation types: +OpenVINO™ Runtime has three main transformation types: -* `ngraph::pass::FunctionPass` - straightforward way to work with `ngraph::Function` directly -* `ngraph::pass::MatcherPass` - pattern-based transformation approach -* `ngraph::pass::GraphRewrite` - container for matcher passes needed for efficient execution +* `ov::pass::ModelPass` - straightforward way to work with `ov::Model` directly +* `ov::pass::MatcherPass` - pattern-based transformation approach +* `ov::pass::GraphRewrite` - container for matcher passes needed for efficient execution ![transformations_structure] -### ngraph::pass::FunctionPass +### ov::pass::ModelPass -`ngraph::pass::FunctionPass` is used for transformations that take entire `ngraph::Function` as an input and process it. +`ov::pass::ModelPass` is used for transformations that take entire `ov::Model` as an input and process it. Template for FunctionPass transformation class -@snippet src/transformations/template_function_transformation.hpp function_pass:template_transformation_hpp +@snippet src/transformations/template_model_transformation.hpp model_pass:template_transformation_hpp -@snippet src/transformations/template_function_transformation.cpp function_pass:template_transformation_cpp +@snippet src/transformations/template_model_transformation.cpp model_pass:template_transformation_cpp -Using `ngraph::FunctionPass`, you need to override the `run_on_function` method where you will write the transformation code. +Using `ov::pass::ModelPass`, you need to override the `run_on_model` method where you will write the transformation code. Return value is `true` if the original function has changed during transformation (new operation was added, or operations replacement was made, or node attributes were changed); otherwise, it is `false`. -For transformation API, please follow the [working with ngraph::Function](#working_with_ngraph_function) section. -Also `ngraph::FunctionPass` based transformations can be executed via `pass::Manager`. See the examples in the [Using pass manager](#using_pass_manager) section. +For transformation API, please follow the [working with ov::Model](#working_with_ov_model) section. +Also `ov::pass::ModelPass` based transformations can be executed via `ov::pass::Manager`. See the examples in the [Using pass manager](#using_pass_manager) section. -### ngraph::pass::MatcherPass +### ov::pass::MatcherPass -`ngraph::pass::MatcherPass` is used for pattern-based transformations. +`ov::pass::MatcherPass` is used for pattern-based transformations. Template for MatcherPass transformation class @snippet src/transformations/template_pattern_transformation.hpp graph_rewrite:template_transformation_hpp @snippet src/transformations/template_pattern_transformation.cpp graph_rewrite:template_transformation_cpp -To use `ngraph::pass::MatcherPass`, you need to complete these steps: +To use `ov::pass::MatcherPass`, you need to complete these steps: 1. Create a pattern 2. Implement a callback 3. Register the pattern and Matcher @@ -87,7 +45,7 @@ To use `ngraph::pass::MatcherPass`, you need to complete these steps: So let's go through each of these steps. ### Create a pattern -Pattern is a single root `ngraph::Function`. But the only difference is that you do not need to create a function object, you just need to create and connect opset or special pattern operations. +Pattern is a single root `ov::Model`. But the only difference is that you do not need to create a function object, you just need to create and connect opset or special pattern operations. Then you need to take the last created operation and put it as a root of the pattern. This root node will be used as a root node in pattern matching. > **NOTE**: Any nodes in a pattern that have no consumers and are not registered as root will not be used in pattern matching. @@ -186,7 +144,7 @@ This example shows how to use predicate to construct a pattern. Also it shows ho > **NOTE**: Be careful with manual matching because Matcher object holds matched nodes. To clear a match, use the m->clear_state() method. -## Working with ngraph::Function +## Working with ngraph::Function In this chapter we will review nGraph API that allows us to manipulate with `ngraph::Function`. diff --git a/docs/template_plugin/src/template_plugin.cpp b/docs/template_plugin/src/template_plugin.cpp index acd4a809664..76c3dbc2182 100644 --- a/docs/template_plugin/src/template_plugin.cpp +++ b/docs/template_plugin/src/template_plugin.cpp @@ -82,8 +82,8 @@ std::shared_ptr TransformNetwork(const std::shared_ptr(); - passManager.register_pass(); + passManager.register_pass(); + passManager.register_pass(); // Register any other transformations // .. diff --git a/docs/template_plugin/src/transformations/template_function_transformation.hpp b/docs/template_plugin/src/transformations/template_function_transformation.hpp deleted file mode 100644 index 68641470730..00000000000 --- a/docs/template_plugin/src/transformations/template_function_transformation.hpp +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace ngraph { -namespace pass { - -class MyFunctionTransformation; - -} // namespace pass -} // namespace ngraph - -// ! [function_pass:template_transformation_hpp] -// template_function_transformation.hpp -class ngraph::pass::MyFunctionTransformation : public ngraph::pass::FunctionPass { -public: - NGRAPH_RTTI_DECLARATION; - bool run_on_model(const std::shared_ptr& f) override; -}; -// ! [function_pass:template_transformation_hpp] diff --git a/docs/template_plugin/src/transformations/template_function_transformation.cpp b/docs/template_plugin/src/transformations/template_model_transformation.cpp similarity index 72% rename from docs/template_plugin/src/transformations/template_function_transformation.cpp rename to docs/template_plugin/src/transformations/template_model_transformation.cpp index 669b5c811e2..4319da6fcbe 100644 --- a/docs/template_plugin/src/transformations/template_function_transformation.cpp +++ b/docs/template_plugin/src/transformations/template_model_transformation.cpp @@ -2,18 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "template_function_transformation.hpp" +#include "template_model_transformation.hpp" -#include +#include -using namespace ngraph; - -// ! [function_pass:template_transformation_cpp] +// ! [model_pass:template_transformation_cpp] // template_function_transformation.cpp -NGRAPH_RTTI_DEFINITION(ngraph::pass::MyFunctionTransformation, "MyFunctionTransformation", 0); -bool pass::MyFunctionTransformation::run_on_model(const std::shared_ptr& f) { - RUN_ON_FUNCTION_SCOPE(MyFunctionTransformation); +bool ov::pass::MyFunctionTransformation::run_on_model(const std::shared_ptr& f) { + RUN_ON_MODEL_SCOPE(MyFunctionTransformation); // Example transformation code NodeVector nodes; @@ -40,4 +37,4 @@ bool pass::MyFunctionTransformation::run_on_model(const std::shared_ptr + +namespace ov { +namespace pass { + +class MyFunctionTransformation; + +} // namespace pass +} // namespace ov + +// ! [model_pass:template_transformation_hpp] +// template_model_transformation.hpp +class ov::pass::MyFunctionTransformation : public ov::pass::ModelPass { +public: + OPENVINO_RTTI("MyFunctionTransformation", "0"); + bool run_on_model(const std::shared_ptr& f) override; +}; +// ! [model_pass:template_transformation_hpp] diff --git a/docs/template_plugin/src/transformations/template_pattern_transformation.cpp b/docs/template_plugin/src/transformations/template_pattern_transformation.cpp index 7169d5946fc..21da6aa2d64 100644 --- a/docs/template_plugin/src/transformations/template_pattern_transformation.cpp +++ b/docs/template_plugin/src/transformations/template_pattern_transformation.cpp @@ -4,154 +4,148 @@ #include "transformations/template_pattern_transformation.hpp" -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -#include "transformations/template_function_transformation.hpp" - -using namespace ngraph; +#include "transformations/template_model_transformation.hpp" // ! [graph_rewrite:template_transformation_cpp] // template_pattern_transformation.cpp -NGRAPH_RTTI_DEFINITION(ngraph::pass::DecomposeDivideMatcher, "DecomposeDivideMatcher", 0); - -ngraph::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() { +ov::pass::DecomposeDivideMatcher::DecomposeDivideMatcher() { MATCHER_SCOPE(DecomposeDivideMatcher); // Pattern example auto input0 = pattern::any_input(); auto input1 = pattern::any_input(); - auto div = std::make_shared(input0, input1); + auto div = std::make_shared(input0, input1); - ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto div = std::dynamic_pointer_cast(m.get_match_root()); + ov::matcher_pass_callback callback = [](pattern::Matcher& m) { + auto div = std::dynamic_pointer_cast(m.get_match_root()); // We can not apply this transformation in case with integer input data type if (!div || div->input(0).get_element_type().is_integral()) { return false; } // Decompose Divide into Multiply with Power operations - auto pow = std::make_shared( + auto pow = std::make_shared( div->input_value(1), opset3::Constant::create(div->get_input_element_type(1), Shape{1}, {-1})); - auto mul = std::make_shared(div->input_value(0), pow); + auto mul = std::make_shared(div->input_value(0), pow); // Save original name to last operation in replacement sub-graph mul->set_friendly_name(div->get_friendly_name()); // Copy runtime info attributes to newly created operation - ngraph::copy_runtime_info(div, {pow, mul}); + ov::copy_runtime_info(div, {pow, mul}); // Replace Divide operation with Multiply - ngraph::replace_node(div, mul); + ov::replace_node(div, mul); // Return true as the root node was changed return true; }; // Register pattern with Divide operation as a pattern root node - auto m = std::make_shared(div, "ConvertDivide"); + auto m = std::make_shared(div, "ConvertDivide"); // Register Matcher register_matcher(m, callback); } // ! [graph_rewrite:template_transformation_cpp] // ! [matcher_pass:relu_fusion] -NGRAPH_RTTI_DEFINITION(ngraph::pass::ReluReluFusionMatcher, "ReluReluFusionMatcher", 0); - -ngraph::pass::ReluReluFusionMatcher::ReluReluFusionMatcher() { +ov::pass::ReluReluFusionMatcher::ReluReluFusionMatcher() { MATCHER_SCOPE(ReluReluFusionMatcher); - auto m_relu1 = ngraph::pattern::wrap_type(pattern::consumers_count(1)); - auto m_relu2 = ngraph::pattern::wrap_type({m_relu1}); + auto m_relu1 = ov::pass::pattern::wrap_type(pattern::consumers_count(1)); + auto m_relu2 = ov::pass::pattern::wrap_type({m_relu1}); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + ov::matcher_pass_callback callback = [=](pattern::Matcher& m) { // Map that helps to connect labels with matched outputs auto& node_to_output = m.get_pattern_value_map(); // Create new Relu operation and add register it for additional execution auto new_relu = - register_new_node(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0)); + register_new_node(node_to_output.at(m_relu1).get_node_shared_ptr()->input_value(0)); // Copy runtime info attributes to newly created operation - ngraph::copy_runtime_info(m.get_matched_nodes(), new_relu); + ov::copy_runtime_info(m.get_matched_nodes(), new_relu); // Save last Relu name to new Relu operation new_relu->set_friendly_name(m.get_match_root()->get_friendly_name()); // Replace Relu->Relu with Relu - ngraph::replace_node(m.get_match_root(), new_relu); + ov::replace_node(m.get_match_root(), new_relu); // Return true as the root node was changed return true; }; // Register pattern with Relu operation as a pattern root node - auto m = std::make_shared(m_relu2, "ReluReluFusion"); + auto m = std::make_shared(m_relu2, "ReluReluFusion"); // Register Matcher register_matcher(m, callback); } // ! [matcher_pass:relu_fusion] -void run_matcher_on_node(std::shared_ptr node) { +void run_matcher_on_node(std::shared_ptr node) { // ! [matcher_pass:run_on_node] - if (ngraph::pass::DecomposeDivideMatcher().apply(node)) { + if (ov::pass::DecomposeDivideMatcher().apply(node)) { // successful execution (root node was replaced) } // ! [matcher_pass:run_on_node] } -void run_matcher_with_manager(std::shared_ptr f) { +void run_matcher_with_manager(std::shared_ptr f) { // ! [matcher_pass:manager] // Two matchers will run independently (two independent graph traversals) // pass::Manager automatically creates GraphRewrite container for each MatcherPass - pass::Manager manager; - manager.register_pass(); - manager.register_pass(); + ov::pass::Manager manager; + manager.register_pass(); + manager.register_pass(); manager.run_passes(f); // ! [matcher_pass:manager] } -void run_matcher_with_manager2(std::shared_ptr f) { +void run_matcher_with_manager2(std::shared_ptr f) { // ! [matcher_pass:manager2] // Register anchor GraphRewrite pass inside manager that will execute two matchers simultaneously - pass::Manager manager; - auto anchor = manager.register_pass(); - anchor->add_matcher(); - anchor->add_matcher(); + ov::pass::Manager manager; + auto anchor = manager.register_pass(); + anchor->add_matcher(); + anchor->add_matcher(); manager.run_passes(f); // ! [matcher_pass:manager2] } -void run_matcher_with_manager3(std::shared_ptr f) { +void run_matcher_with_manager3(std::shared_ptr f) { // ! [matcher_pass:manager3] - pass::Manager manager; - manager.register_pass(); + ov::pass::Manager manager; + manager.register_pass(); // Two matchers will run independently (two independent graph traversals) // pass::Manager automatically creates GraphRewrite container for each MatcherPass - manager.register_pass(); - manager.register_pass(); + manager.register_pass(); + manager.register_pass(); manager.run_passes(f); // ! [matcher_pass:manager3] } -void run_matcher_with_gr(std::shared_ptr f) { +void run_matcher_with_gr(std::shared_ptr f) { // ! [matcher_pass:graph_rewrite] // Two matcher passes will run simultaneously in a single graph traversal - ngraph::pass::GraphRewrite pass; - pass.add_matcher(); - pass.add_matcher(); + ov::pass::GraphRewrite pass; + pass.add_matcher(); + pass.add_matcher(); pass.run_on_model(f); // ! [matcher_pass:graph_rewrite] } // ! [manual_constant_folding] template -Output eltwise_fold(const Output& input0, const Output& input1) { +ov::Output eltwise_fold(const ov::Output& input0, const ov::Output& input1) { auto eltwise = std::make_shared(input0, input1); - OutputVector output(eltwise->get_output_size()); + ov::OutputVector output(eltwise->get_output_size()); // If constant folding wasn't successful return eltwise output if (!eltwise->constant_fold(output, {input0, input1})) { return eltwise->output(0); diff --git a/docs/template_plugin/src/transformations/template_pattern_transformation.hpp b/docs/template_plugin/src/transformations/template_pattern_transformation.hpp index 9329ed92ead..7f62a4bd916 100644 --- a/docs/template_plugin/src/transformations/template_pattern_transformation.hpp +++ b/docs/template_plugin/src/transformations/template_pattern_transformation.hpp @@ -4,16 +4,16 @@ #pragma once -#include +#include -namespace ngraph { +namespace ov { namespace pass { class DecomposeDivideMatcher; class ReluReluFusionMatcher; } // namespace pass -} // namespace ngraph +} // namespace ov // ! [graph_rewrite:template_transformation_hpp] // transformations/template_pattern_transformation.hpp @@ -21,15 +21,15 @@ class ReluReluFusionMatcher; * @ingroup ie_transformation_common_api * @brief Add transformation description. */ -class ngraph::pass::DecomposeDivideMatcher : public ngraph::pass::MatcherPass { +class ov::pass::DecomposeDivideMatcher : public ov::pass::MatcherPass { public: - NGRAPH_RTTI_DECLARATION; + OPENVINO_RTTI("DecomposeDivideMatcher", "0"); DecomposeDivideMatcher(); }; // ! [graph_rewrite:template_transformation_hpp] -class ngraph::pass::ReluReluFusionMatcher : public ngraph::pass::MatcherPass { +class ov::pass::ReluReluFusionMatcher : public ov::pass::MatcherPass { public: - NGRAPH_RTTI_DECLARATION; + OPENVINO_RTTI("ReluReluFusionMatcher", "0"); ReluReluFusionMatcher(); }; diff --git a/src/common/conditional_compilation/include/openvino/cc/ngraph/itt.hpp b/src/common/conditional_compilation/include/openvino/cc/ngraph/itt.hpp index 710020e6069..9c60ca4d200 100644 --- a/src/common/conditional_compilation/include/openvino/cc/ngraph/itt.hpp +++ b/src/common/conditional_compilation/include/openvino/cc/ngraph/itt.hpp @@ -4,36 +4,4 @@ #pragma once -#include -#include - -OV_CC_DOMAINS(ngraph_pass); - -/* - * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass - * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied - */ -#if defined(SELECTIVE_BUILD_ANALYZER) - -#define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ngraph_pass, OV_PP_CAT(region, _run_on_function)) -#define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) - -#elif defined(SELECTIVE_BUILD) - -#define MATCHER_SCOPE_(scope, region) \ - if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0) \ - throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) + \ - " is disabled!") - -#define MATCHER_SCOPE(region) \ - const std::string matcher_name(OV_PP_TOSTRING(region)); \ - if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ngraph_pass, _, region)) == 0) \ - return -#define RUN_ON_FUNCTION_SCOPE(region) \ - MATCHER_SCOPE_(ngraph_pass, OV_PP_CAT(region, _run_on_function)) - -#else - -#define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) -#define RUN_ON_FUNCTION_SCOPE(region) -#endif +#include diff --git a/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp b/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp new file mode 100644 index 00000000000..dd3c23f9f5b --- /dev/null +++ b/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +OV_CC_DOMAINS(ov_pass); + +/* + * RUN_ON_MODEL_SCOPE macro allows to disable the run_on_function pass + * RUN_ON_FUNCTION_SCOPE macro allows to disable the run_on_function pass + * MATCHER_SCOPE macro allows to disable the MatcherPass if matcher isn't applied + */ +#if defined(SELECTIVE_BUILD_ANALYZER) + +#define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ov_pass, OV_PP_CAT(region, _run_on_function)) +#define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) +#define RUN_ON_MODEL_SCOPE(region) OV_SCOPE(ov_pass, OV_PP_CAT(region, _run_on_model)) + +#elif defined(SELECTIVE_BUILD) + +#define MATCHER_SCOPE_(scope, region) \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0) \ + throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) + \ + " is disabled!") + +#define MATCHER_SCOPE(region) \ + const std::string matcher_name(OV_PP_TOSTRING(region)); \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ov_pass, _, region)) == 0) \ + return +#define RUN_ON_FUNCTION_SCOPE(region) \ + MATCHER_SCOPE_(ov_pass, OV_PP_CAT(region, _run_on_function)) + +#define RUN_ON_MODEL_SCOPE(region) \ + MATCHER_SCOPE_(ov_pass, OV_PP_CAT(region, _run_on_model)) + +#else + +#define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) +#define RUN_ON_FUNCTION_SCOPE(region) +#define RUN_ON_MODEL_SCOPE(region) +#endif + diff --git a/src/core/include/ngraph/rt_info.hpp b/src/core/include/ngraph/rt_info.hpp index 79b3b0d6466..f72389ab56a 100644 --- a/src/core/include/ngraph/rt_info.hpp +++ b/src/core/include/ngraph/rt_info.hpp @@ -9,22 +9,11 @@ #include "ngraph/ngraph_visibility.hpp" #include "ngraph/node.hpp" #include "ngraph/type.hpp" +#include "openvino/core/rt_info.hpp" namespace ngraph { -NGRAPH_API -void copy_runtime_info(std::shared_ptr from, std::shared_ptr to); - -NGRAPH_API -void copy_runtime_info(std::shared_ptr from, ngraph::NodeVector to); - -NGRAPH_API -void copy_runtime_info(const ngraph::NodeVector& from, std::shared_ptr to); - -NGRAPH_API -void copy_runtime_info(const ngraph::NodeVector& from, ngraph::NodeVector to); - -NGRAPH_API -void copy_output_runtime_info(const ngraph::OutputVector& from, ngraph::OutputVector to); +using ov::copy_output_runtime_info; +using ov::copy_runtime_info; } // namespace ngraph using ngraph::copy_runtime_info; diff --git a/src/core/include/openvino/core/rt_info.hpp b/src/core/include/openvino/core/rt_info.hpp new file mode 100644 index 00000000000..d930d20cc7d --- /dev/null +++ b/src/core/include/openvino/core/rt_info.hpp @@ -0,0 +1,28 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/core/core_visibility.hpp" +#include "openvino/core/node.hpp" +#include "openvino/core/type.hpp" + +namespace ov { +OPENVINO_API +void copy_runtime_info(const std::shared_ptr& from, const std::shared_ptr& to); + +OPENVINO_API +void copy_runtime_info(const std::shared_ptr& from, ov::NodeVector to); + +OPENVINO_API +void copy_runtime_info(const ov::NodeVector& from, const std::shared_ptr& to); + +OPENVINO_API +void copy_runtime_info(const ov::NodeVector& from, ov::NodeVector to); + +OPENVINO_API +void copy_output_runtime_info(const ov::OutputVector& from, ov::OutputVector to); +} // namespace ov diff --git a/src/core/src/rt_info.cpp b/src/core/src/rt_info.cpp index b735340877a..4baa061fbb2 100644 --- a/src/core/src/rt_info.cpp +++ b/src/core/src/rt_info.cpp @@ -2,14 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/rt_info.hpp" +#include "openvino/core/rt_info.hpp" -#include "ngraph/node.hpp" #include "ngraph/variant.hpp" namespace { -std::unordered_map> get_copyable_attrs(const ngraph::OutputVector& outputs) { +std::unordered_map> get_copyable_attrs(const ov::OutputVector& outputs) { std::unordered_map> attrs; for (const auto& output : outputs) { for (const auto& item : output.get_rt_info()) { @@ -25,7 +24,7 @@ std::unordered_map> get_copyable_attrs(const n return attrs; } -std::unordered_map> get_copyable_attrs(const ngraph::NodeVector& nodes) { +std::unordered_map> get_copyable_attrs(const ov::NodeVector& nodes) { std::unordered_map> attrs; for (const auto& node : nodes) { for (const auto& item : node->get_rt_info()) { @@ -42,10 +41,10 @@ std::unordered_map> get_copyable_attrs(const n } template -ngraph::Node::RTMap mergeRuntimeInfo(const T& items) { +ov::Node::RTMap mergeRuntimeInfo(const T& items) { std::unordered_map> attrs = get_copyable_attrs(items); - ngraph::Node::RTMap merged_attrs; + ov::Node::RTMap merged_attrs; for (auto& item : attrs) { auto attr = *item.second.begin(); if (item.second.size() == 1) { @@ -63,7 +62,7 @@ ngraph::Node::RTMap mergeRuntimeInfo(const T& items) { return merged_attrs; } -ov::Any get_opset(const ngraph::Node::RTMap& rt_info) { +ov::Any get_opset(const ov::Node::RTMap& rt_info) { auto it = rt_info.find("opset"); if (it != rt_info.end()) { return it->second; @@ -71,7 +70,7 @@ ov::Any get_opset(const ngraph::Node::RTMap& rt_info) { return nullptr; } -void assign_runtime_info(const ngraph::Node::RTMap& from, ngraph::Node::RTMap& to) { +void assign_runtime_info(const ov::Node::RTMap& from, ov::Node::RTMap& to) { auto opset = get_opset(to); for (auto& item : from) { to[item.first] = item.second; @@ -83,7 +82,7 @@ void assign_runtime_info(const ngraph::Node::RTMap& from, ngraph::Node::RTMap& t } // namespace -void ngraph::copy_runtime_info(std::shared_ptr from, std::shared_ptr to) { +void ov::copy_runtime_info(const std::shared_ptr& from, const std::shared_ptr& to) { auto& attrs = to->get_rt_info(); auto opset = get_opset(attrs); @@ -102,18 +101,18 @@ void ngraph::copy_runtime_info(std::shared_ptr from, std::shared_p } } -void ngraph::copy_runtime_info(std::shared_ptr from, ngraph::NodeVector to) { +void ov::copy_runtime_info(const std::shared_ptr& from, ov::NodeVector to) { for (auto& op : to) { copy_runtime_info(from, op); } } -void ngraph::copy_runtime_info(const ngraph::NodeVector& from, std::shared_ptr to) { +void ov::copy_runtime_info(const ov::NodeVector& from, const std::shared_ptr& to) { auto& rtInfoTo = to->get_rt_info(); assign_runtime_info(mergeRuntimeInfo(from), rtInfoTo); } -void ngraph::copy_runtime_info(const ngraph::NodeVector& from, ngraph::NodeVector to) { +void ov::copy_runtime_info(const ov::NodeVector& from, ov::NodeVector to) { auto mergedInfo = mergeRuntimeInfo(from); for (auto& node : to) { auto& rtInfoTo = node->get_rt_info(); @@ -121,7 +120,7 @@ void ngraph::copy_runtime_info(const ngraph::NodeVector& from, ngraph::NodeVecto } } -void ngraph::copy_output_runtime_info(const ngraph::OutputVector& from, ngraph::OutputVector to) { +void ov::copy_output_runtime_info(const ov::OutputVector& from, ov::OutputVector to) { auto mergedInfo = mergeRuntimeInfo(from); for (auto& node : to) { auto& rtInfoTo = node.get_rt_info(); From c1919a0f1d0721ec9efc464933baafaaea84826f Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Wed, 23 Feb 2022 10:53:37 +0100 Subject: [PATCH 086/310] update documents for Paddle inclusion (#10613) Introduce PaddlePaddle articles and include PP references in other articles --- docs/HOWTO/Custom_Layers_Guide.md | 43 ++++++++++--------- .../Supported_Frameworks_Layers.md | 32 +++++++------- .../Convert_Model_From_Paddle.md | 20 ++++----- .../convert_model/Converting_Model.md | 1 + .../Customize_Model_Optimizer.md | 15 ++++--- docs/OV_Runtime_UG/network_state_intro.md | 8 ++-- docs/OV_Runtime_UG/openvino_intro.md | 6 +-- docs/_static/images/ov_chart.png | 4 +- 8 files changed, 65 insertions(+), 64 deletions(-) diff --git a/docs/HOWTO/Custom_Layers_Guide.md b/docs/HOWTO/Custom_Layers_Guide.md index 2315acb0637..d7c63b66c30 100644 --- a/docs/HOWTO/Custom_Layers_Guide.md +++ b/docs/HOWTO/Custom_Layers_Guide.md @@ -1,19 +1,19 @@ # Custom Operations Guide {#openvino_docs_HOWTO_Custom_Layers_Guide} -The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks including -TensorFlow*, Caffe*, MXNet*, Kaldi* and ONNX* file format. The list of supported operations (layers) is different for +The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks, including +TensorFlow, Caffe, MXNet, Kaldi, PaddlePaddle, and ONNX. The list of supported operations (layers) is different for each of the supported frameworks. To see the operations supported by your framework, refer to [Supported Framework Layers](../MO_DG/prepare_model/Supported_Frameworks_Layers.md). Custom operations, that is those not included in the list, are not recognized by Model Optimizer out-of-the-box. Therefore, creating Intermediate Representation (IR) for a model using them requires additional steps. This guide illustrates the workflow for running inference on topologies featuring custom operations, allowing you to plug in your own implementation for existing or completely new operations. -> **NOTE**: *Layer* is a legacy term for *operation* which came from Caffe\* framework. Currently it is not used. +> **NOTE**: *Layer* is a legacy term for *operation* which came from Caffe framework. Currently it is not used. > Refer to the [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../MO_DG/IR_and_opsets.md) > for more information on the topic. ## Terms Used in This Guide -- *Intermediate Representation (IR)* — OpenVINO's Neural Network format used by Inference Engine. It abstracts different frameworks and describs model topology, operations parameters, and weights. +- *Intermediate Representation (IR)* — OpenVINO's Neural Network format used by Inference Engine. It abstracts different frameworks and describes model topology, operations parameters, and weights. - *Operation* — an abstract concept of a math function selected for a specific purpose. Operations supported by OpenVINO™ are listed in the supported operation set provided in the [Available Operations Sets](../ops/opset.md). @@ -28,8 +28,8 @@ Custom operations, that is those not included in the list, are not recognized by ## Custom Operation Support Overview There are three steps to support inference of a model with custom operation(s): -1. Add support for a custom operation in the [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) so -the Model Optimizer can generate the IR with the operation. +1. Add support for a custom operation in [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) so +that it can generate the IR with the operation. 2. Create an operation set and implement a custom nGraph operation in it as described in the [Custom nGraph Operation](../OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md). 3. Implement a customer operation in one of the [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md) @@ -59,7 +59,7 @@ operation. Refer to the "Operation Extractor" section of > **NOTE**: In some cases you may need to implement some transformation to support the operation. This topic is covered in the "Graph Transformation Extensions" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). -## Custom Operations Extensions for the Inference Engine +## Custom Operation Extensions for the Inference Engine Inference Engine provides an extension mechanism to support new operations. This mechanism is described in [Inference Engine Extensibility Mechanism](../OV_Runtime_UG/Extensibility_DG/Intro.md). @@ -80,8 +80,8 @@ operation and correctly infer output tensor shape and type. ## Enabling Magnetic Resonance Image Reconstruction Model This chapter provides step-by-step instructions on how to enable the magnetic resonance image reconstruction model implemented in the [repository](https://github.com/rmsouza01/Hybrid-CS-Model-MRI/) using a custom operation on CPU. The example is prepared for a model generated from the repository with hash `2ede2f96161ce70dcdc922371fe6b6b254aafcc8`. -### Download and Convert the Model to a Frozen TensorFlow\* Model Format -The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to be converted to TensorFlow\* frozen model format first. +### Download and Convert the Model to a Frozen TensorFlow Model Format +The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to be converted to TensorFlow frozen model format first. 1. Download repository `https://github.com/rmsouza01/Hybrid-CS-Model-MRI`:
```bash @@ -117,11 +117,11 @@ Keras==2.2.4) which should be executed from the root of the cloned repository:/wnet_20.pb -b 1 ``` -> **NOTE**: This conversion guide is applicable for the 2021.3 release of OpenVINO and that starting from 2021.4 -> the OpenVINO supports this model out of the box. +> **NOTE**: This conversion guide is applicable for the 2021.3 release of OpenVINO and starting from 2021.4 +> OpenVINO has supported this model out of the box. Model Optimizer produces the following error: ```bash @@ -172,12 +172,12 @@ additional parameter `--log_level DEBUG`. It is worth to mention the following l This is a part of the log of the partial inference phase of the model conversion. See the "Partial Inference" section on the [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for more information about this phase. Model Optimizer inferred output shape for the unknown operation of type "Complex" -using a "fallback" to TensorFlow\*. However, it is not enough to generate the IR because Model Optimizer doesn't know +using a "fallback" to TensorFlow. However, it is not enough to generate the IR because Model Optimizer doesn't know which attributes of the operation should be saved to IR. So it is necessary to implement Model Optimizer extensions to support these operations. Before going into the extension development it is necessary to understand what these unsupported operations do according -to the TensorFlow\* framework specification. +to the TensorFlow framework specification. * "Complex" - returns a tensor of complex type constructed from two real input tensors specifying real and imaginary part of a complex number. @@ -342,8 +342,9 @@ python3 mri_reconstruction_demo.py \ ## Converting Models: -- [Convert Your Caffe* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md) -- [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md) -- [Convert Your MXNet* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md) -- [Convert Your Kaldi* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md) -- [Convert Your ONNX* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md) +- [Convert Your Caffe Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md) +- [Convert Your TensorFlow Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md) +- [Convert Your MXNet Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md) +- [Convert Your Kaldi Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md) +- [Convert Your ONNX Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md) +- [Convert Your PaddlePaddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md) diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md index b43910dd12c..272062f828f 100644 --- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md +++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md @@ -1,9 +1,9 @@ # Supported Framework Layers {#openvino_docs_MO_DG_prepare_model_Supported_Frameworks_Layers} -## Caffe\* Supported Layers +## Caffe Supported Layers -| Layer Name in Caffe\* | Limitations | +| Layer Name in Caffe | Limitations | |:---------- | :----------| | Axpy | | | BN | | @@ -47,10 +47,10 @@ | Tile | | -## MXNet\* Supported Symbols +## MXNet Supported Symbols -| Symbol Name in MXNet\*| Limitations| +| Symbol Name in MXNet| Limitations| | :----------| :----------| | _Plus | | | _contrib_arange_like | | @@ -119,7 +119,7 @@ | Concat | | | Convolution | | | Crop | "center_crop" = 1 is not supported | -| Custom | [Custom Layers in the Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md) | +| Custom | [Custom Layers in Model Optimizer](customize_model_optimizer/Customize_Model_Optimizer.md) | | Deconvolution | | | DeformableConvolution | | | DeformablePSROIPooling | | @@ -149,12 +149,12 @@ | zeros_like | | -## TensorFlow\* Supported Operations +## TensorFlow Supported Operations -Some TensorFlow\* operations do not match to any Inference Engine layer, but are still supported by the Model Optimizer and can be used on constant propagation path. These layers are labeled 'Constant propagation' in the table. +Some TensorFlow operations do not match to any Inference Engine layer, but are still supported by the Model Optimizer and can be used on constant propagation path. These layers are labeled 'Constant propagation' in the table. -| Operation Name in TensorFlow\* | Limitations| +| Operation Name in TensorFlow | Limitations| | :----------| :----------| | Abs | | | Acosh | | @@ -348,10 +348,10 @@ Some TensorFlow\* operations do not match to any Inference Engine layer, but are | ZerosLike | | -## TensorFlow 2 Keras\* Supported Operations +## TensorFlow 2 Keras Supported Operations -| Operation Name in TensorFlow 2 Keras\* | Limitations| +| Operation Name in TensorFlow 2 Keras | Limitations| | :----------| :----------| | ActivityRegularization | | | Add | | @@ -431,10 +431,10 @@ Some TensorFlow\* operations do not match to any Inference Engine layer, but are | ZeroPadding2D | | | ZeroPadding3D | | -## Kaldi\* Supported Layers +## Kaldi Supported Layers -| Symbol Name in Kaldi\*| Limitations| +| Symbol Name in Kaldi| Limitations| | :----------| :----------| | addshift | | | affinecomponent | | @@ -478,10 +478,10 @@ Some TensorFlow\* operations do not match to any Inference Engine layer, but are | timeheightconvolutioncomponent | | -## ONNX\* Supported Operators +## ONNX Supported Operators -| Symbol Name in ONNX\*| Limitations| +| Symbol Name in ONNX| Limitations| | :----------| :----------| | Abs | | | Acos | | @@ -621,11 +621,11 @@ Some TensorFlow\* operations do not match to any Inference Engine layer, but are | Xor | | -## PaddlePaddle\* Supported Operators +## PaddlePaddle Supported Operators paddlepaddle>=2.1 -| Operator Name in PaddlePaddle\*| Limitations| +| Operator Name in PaddlePaddle| Limitations| | :----------| :----------| | adpative_pool2d | 'NHWC' data_layout is not supported | | arg_max | 'int32' output data_type is not supported | diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md index c7ae7277c7f..ddf5a3313c7 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md @@ -1,9 +1,9 @@ -# Converting a Paddle* Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle} +# Converting a PaddlePaddle Model {#openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Paddle} -A summary of the steps for optimizing and deploying a model trained with Paddle\*: +A summary of the steps for optimizing and deploying a model trained with PaddlePaddle: -1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Paddle\*. -2. [Convert a Paddle\* Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases. +1. [Configure Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for PaddlePaddle. +2. [Convert a PaddlePaddle Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases. 3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). 4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment. @@ -29,11 +29,11 @@ A summary of the steps for optimizing and deploying a model trained with Paddle\ > **NOTE:** The verified models are exported from the repository of branch release/2.1. -## Convert a Paddle* Model +## Convert a PaddlePaddle Model -To convert a Paddle\* model: +To convert a PaddlePaddle model: -1. Activate environment with installed OpenVINO if needed +1. Activate environment with installed OpenVINO™ if needed 2. Use the `mo` script to simply convert a model, specifying the framework, the path to the input model `.pdmodel` file and the path to an output directory with write permissions: ```sh mo --input_model .pdmodel --output_dir --framework=paddle @@ -44,13 +44,13 @@ Parameters to convert your model: * [Framework-agnostic parameters](Converting_Model.md): These parameters are used to convert a model trained with any supported framework. > **NOTE:** `--scale`, `--scale_values`, `--mean_values` are not supported in the current version of mo_paddle. -### Example of Converting a Paddle* Model -Below is the example command to convert yolo v3 Paddle\* network to OpenVINO IR network with Model Optimizer. +### Example of Converting a PaddlePaddle Model +Below is the example command to convert yolo v3 PaddlePaddle network to OpenVINO IR network with Model Optimizer. ```sh mo --model_name yolov3_darknet53_270e_coco --output_dir --framework=paddle --data_type=FP32 --reverse_input_channels --input_shape=[1,3,608,608],[1,2],[1,2] --input=image,im_shape,scale_factor --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1 --input_model=yolov3.pdmodel ``` -## Supported Paddle\* Layers +## Supported PaddlePaddle Layers Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers. ## Frequently Asked Questions (FAQ) diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md index b0fdd565f19..468688f3f4e 100644 --- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md @@ -37,6 +37,7 @@ Framework-specific parameters for: * [TensorFlow](Convert_Model_From_TensorFlow.md) * [MXNet](Convert_Model_From_MxNet.md) * [ONNX](Convert_Model_From_ONNX.md) +* [PaddlePaddle](Convert_Model_From_Paddle.md) * [Kaldi](Convert_Model_From_Kaldi.md) diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md index d3252704549..dd98faefda7 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md @@ -257,11 +257,13 @@ More information on how to develop middle transformations and dedicated API desc [Middle Phase Transformations](#middle-phase-transformations). ### NHWC to NCHW Layout Change -There are several middle transformations responsible for changing model layout from NHWC to NCHW. These transformations -are triggered by default for TensorFlow\* models only because it is the only framework with Convolution operations in -NHWC layout. This layout change is disabled if the model does not have operations that OpenVINO&trade needs to execute in -NCHW layout, for example, Convolutions in NHWC layout. It is still possible to force Model Optimizer to do layout change -using `--disable_nhwc_to_nchw` command-line parameter. + +There are several middle transformations responsible for changing model layout from NHWC to NCHW. These transformations are triggered by default for TensorFlow models as TensorFlow supports Convolution operations in the NHWC layout. + +This layout change is disabled automatically if the model does not have operations that OpenVINO&trade needs to execute in the NCHW layout, for example, Convolutions in NHWC layout. + +It is still possible to force Model Optimizer to do layout change, using `--disable_nhwc_to_nchw` command-line parameter, although it is not advised. + The layout change is a complex problem and detailed explanation of it is out of this document scope. A very brief explanation of this process is provided below: @@ -741,8 +743,7 @@ sub-graph of the original graph isomorphic to the specified pattern. 2. [Specific Operation Front Phase Transformations](#specific-operation-front-phase-transformations) triggered for the node with a specific `op` attribute value. 3. [Generic Front Phase Transformations](#generic-front-phase-transformations). -4. Manually enabled transformation defined with a JSON configuration file (for TensorFlow\*, ONNX\* and MXNet\* models -only) specified using the `--transformations_config` command line parameter: +4. Manually enabled transformation defined with a JSON configuration file (for TensorFlow, ONNX, MXNet, and PaddlePaddle models) specified using the `--transformations_config` command line parameter: 1. [Node Name Pattern Front Phase Transformations](#node-name-pattern-front-phase-transformation). 2. [Front Phase Transformations Using Start and End Points](#start-end-points-front-phase-transformations). 3. [Generic Front Phase Transformations Enabled with Transformations Configuration File](#generic-transformations-config-front-phase-transformations). diff --git a/docs/OV_Runtime_UG/network_state_intro.md b/docs/OV_Runtime_UG/network_state_intro.md index 2a04dd05dc9..5d39b56d32d 100644 --- a/docs/OV_Runtime_UG/network_state_intro.md +++ b/docs/OV_Runtime_UG/network_state_intro.md @@ -15,7 +15,7 @@ The section additionally provides small examples of stateful network and code to between data portions should be addressed. For that, networks save some data between inferences - state. When one dependent sequence is over, state should be reset to initial value and new sequence can be started. - Several frameworks have special API for states in networks. For example, Keras have special option for RNNs `stateful` that turns on saving state + Several frameworks have special API for states in networks. For example, Keras has special option for RNNs `stateful` that turns on saving state between inferences. Kaldi contains special specifier `Offset` to define time offset in a network. OpenVINO also contains special API to simplify work with networks with states. State is automatically saved between inferences, @@ -196,9 +196,7 @@ sink from `ngraph::Function` after deleting the node from graph with the `delete Let's take an IR from the previous section example. The example below demonstrates inference of two independent sequences of data. State should be reset between these sequences. -One infer request and one thread -will be used in this example. Using several threads is possible if you have several independent sequences. Then each sequence can be processed in its own infer -request. Inference of one sequence in several infer requests is not recommended. In one infer request state will be saved automatically between inferences, but +One infer request and one thread will be used in this example. Using several threads is possible if you have several independent sequences. Then each sequence can be processed in its own infer request. Inference of one sequence in several infer requests is not recommended. In one infer request state will be saved automatically between inferences, but if the first step is done in one infer request and the second in another, state should be set in new infer request manually (using `IVariableState::SetState` method). @snippet openvino/docs/snippets/InferenceEngine_network_with_state_infer.cpp part1 @@ -213,7 +211,7 @@ Decsriptions can be found in [Samples Overview](./Samples_Overview.md) If the original framework does not have a special API for working with states, after importing the model, OpenVINO representation will not contain Assign/ReadValue layers. For example, if the original ONNX model contains RNN operations, IR will contain TensorIterator operations and the values will be obtained only after execution of the whole TensorIterator primitive. Intermediate values from each iteration will not be available. To enable you to work with these intermediate values of each iteration and receive them with a low latency after each infer request, special LowLatency and LowLatency2 transformations were introduced. -### How to get TensorIterator/Loop operaions from different frameworks via ModelOptimizer. +### How to get TensorIterator/Loop operations from different frameworks via ModelOptimizer. **ONNX and frameworks supported via ONNX format:** *LSTM, RNN, GRU* original layers are converted to the TensorIterator operation. TensorIterator body contains LSTM/RNN/GRU Cell. Peepholes, InputForget modifications are not supported, sequence_lengths optional input is supported. *ONNX Loop* layer is converted to the OpenVINO Loop operation. diff --git a/docs/OV_Runtime_UG/openvino_intro.md b/docs/OV_Runtime_UG/openvino_intro.md index e6ce0f9c6c3..fd8e88f8c15 100644 --- a/docs/OV_Runtime_UG/openvino_intro.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -26,10 +26,10 @@ @endsphinxdirective ## Introduction -OpenVINO Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the OpenVINO Runtime API to read the Intermediate Representation (IR), ONNX, PDPD file formats and execute the model on devices. - -OpenVINO runtime uses a plugin architecture. Inference plugin is a software component that contains complete implementation for inference on a certain Intel® hardware device: CPU, GPU, VPU, GNA, etc. Each plugin implements the unified API and provides additional hardware-specific APIs to configure device or interoperability API between OpenVINO Runtime and underlaying plugin backend. +OpenVINO Runtime is a set of C++ libraries with C and Python bindings providing a common API to deliver inference solutions on the platform of your choice. Use the OpenVINO Runtime API to read an Intermediate Representation (IR), ONNX, or PaddlePaddle model and execute it on preferred devices. +OpenVINO Runtime uses a plugin architecture. Its plugins are software components that contain complete implementation for inference on a particular Intel® hardware device: CPU, GPU, VPU, etc. Each plugin implements the unified API and provides additional hardware-specific APIs, for configuring devices, or API interoperability between OpenVINO Runtime and underlying plugin backend. + The scheme below illustrates the typical workflow for deploying a trained deep learning model: diff --git a/docs/_static/images/ov_chart.png b/docs/_static/images/ov_chart.png index f91923443c3..fa25daf3601 100644 --- a/docs/_static/images/ov_chart.png +++ b/docs/_static/images/ov_chart.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f9a0d138f7f6d2546f0e48d9240d6a90aec18dc6d8092e1082b2fc3125f1ce3d -size 108434 +oid sha256:83f0013e02ea792b553b5bd0a5630fb456a6fefc8dd701cd4430fc83d75cbff7 +size 78205 From 9dec8db96490aea4954ddbd628f58b038af68759 Mon Sep 17 00:00:00 2001 From: Anton Pankratov Date: Wed, 23 Feb 2022 13:03:37 +0300 Subject: [PATCH 087/310] Common OV configuration tests (#10286) * Used new config for streams and threads * Fixed review coments in ba * format fix * fixed hello_query_device * Added STL string io * fixed tests * Fixed test * Fixed build * fixed format * Fixed build * try fix win * other any io specialization * Fixed after merge * renamed streams * build fixed * fixed build * fixed format * fix for old mac build * Fixed type of exception * test fix * Added ov configuration test * Added common OV properties tests * fix mklnn * fixed foramat * merge conflicts * Remoed compile_model tests * removed duplicated test --- .../behavior/ov_plugin/properties_tests.cpp | 120 ++++++++++++++++++ .../include/openvino/runtime/properties.hpp | 10 +- .../behavior/ov_plugin/properties_tests.hpp | 52 ++++++++ .../behavior/ov_plugin/properties_tests.cpp | 101 +++++++++++++++ 4 files changed, 278 insertions(+), 5 deletions(-) create mode 100644 docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp create mode 100644 src/tests/functional/plugin/shared/include/behavior/ov_plugin/properties_tests.hpp create mode 100644 src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp new file mode 100644 index 00000000000..d6a87759e01 --- /dev/null +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -0,0 +1,120 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/properties_tests.hpp" +#include "openvino/runtime/properties.hpp" + +using namespace ov::test::behavior; + +namespace { + +const std::vector inproperties = { + {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, +}; + +const std::vector hetero_inproperties = { + {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, +}; + +const std::vector multi_inproperties = { + {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, +}; + + +const std::vector auto_inproperties = { + {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, +}; + + +const std::vector auto_batch_inproperties = { + {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, +}; + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_BehaviorTests, OVPropertiesIncorrectTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + ::testing::ValuesIn(inproperties)), + OVPropertiesIncorrectTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Hetero_BehaviorTests, OVPropertiesIncorrectTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_HETERO), + ::testing::ValuesIn(hetero_inproperties)), + OVPropertiesIncorrectTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Multi_BehaviorTests, OVPropertiesIncorrectTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(multi_inproperties)), + OVPropertiesIncorrectTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Auto_BehaviorTests, OVPropertiesIncorrectTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_AUTO), + ::testing::ValuesIn(auto_inproperties)), + OVPropertiesIncorrectTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_AutoBatch_BehaviorTests, OVPropertiesIncorrectTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_BATCH), + ::testing::ValuesIn(auto_batch_inproperties)), + OVPropertiesIncorrectTests::getTestCaseName); + +const std::vector default_properties = { + {ov::enable_profiling(true)}, + {ov::device::id(0)}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesDefaultTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + ::testing::ValuesIn(default_properties)), + OVPropertiesDefaultTests::getTestCaseName); + +const std::vector properties = { + {ov::enable_profiling(true)}, + {ov::device::id(0)}, +}; + +const std::vector hetero_properties = { + {ov::device::priorities(CommonTestUtils::DEVICE_TEMPLATE), ov::enable_profiling(true)}, + {ov::device::priorities(CommonTestUtils::DEVICE_TEMPLATE), ov::device::id(0)}, +}; + + +const std::vector multi_properties = { + {ov::device::priorities(CommonTestUtils::DEVICE_TEMPLATE), ov::enable_profiling(true)}, + {ov::device::priorities(CommonTestUtils::DEVICE_TEMPLATE), ov::device::id(0)}, +}; + +const std::vector auto_batch_properties = { + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_TEMPLATE}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_TEMPLATE}, + {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), + ::testing::ValuesIn(properties)), + OVPropertiesTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Hetero_BehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_HETERO), + ::testing::ValuesIn(hetero_properties)), + OVPropertiesTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_Multi_BehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(multi_properties)), + OVPropertiesTests::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(DISABLED_smoke_AutoBatch_BehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_BATCH), + ::testing::ValuesIn(auto_batch_properties)), + OVPropertiesTests::getTestCaseName); +} // namespace diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index b3935c0f96e..1c8632db316 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -138,7 +138,7 @@ class Property : public util::BaseProperty { template struct Forward { template ::value && + typename std::enable_if::type, std::string>::value && std::is_convertible::value, bool>::type = true> explicit operator U() { @@ -146,15 +146,15 @@ class Property : public util::BaseProperty { } template ::value && + typename std::enable_if::type, std::string>::value && !std::is_convertible::value, bool>::type = true> explicit operator U() { - return Any{value}.as(); + return Any{value}.as(); } template ::value && + typename std::enable_if::type, std::string>::value && std::is_convertible::value, bool>::type = true> explicit operator U() { @@ -162,7 +162,7 @@ class Property : public util::BaseProperty { } template ::value && + typename std::enable_if::type, std::string>::value && !std::is_convertible::value, bool>::type = true> explicit operator U() { diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/properties_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/properties_tests.hpp new file mode 100644 index 00000000000..3f90e4b68ca --- /dev/null +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/properties_tests.hpp @@ -0,0 +1,52 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "base/ov_behavior_test_utils.hpp" +#include "openvino/runtime/properties.hpp" +#include "openvino/util/common_util.hpp" + +#include "common_test_utils/test_assertions.hpp" +#include "common_test_utils/file_utils.hpp" +#include "common_test_utils/unicode_utils.hpp" + +namespace ov { +namespace test { +namespace behavior { + +class OVPropertiesBase : public CommonTestUtils::TestsCommon { +public: + std::shared_ptr core = utils::PluginCache::get().core(); + std::shared_ptr model; + std::string device_name; + AnyMap properties; +}; + +class OVEmptyPropertiesTests : public testing::WithParamInterface, + public OVPropertiesBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + + void SetUp() override; +}; + +using PropertiesParams = std::tuple; + +class OVPropertiesTests : public testing::WithParamInterface, + public OVPropertiesBase { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); + + void SetUp() override; + + void TearDown() override; +}; + +using OVPropertiesIncorrectTests = OVPropertiesTests; +using OVPropertiesDefaultTests = OVPropertiesTests; + +} // namespace behavior +} // namespace test +} // namespace ov \ No newline at end of file diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp new file mode 100644 index 00000000000..a7d4083c99a --- /dev/null +++ b/src/tests/functional/plugin/shared/src/behavior/ov_plugin/properties_tests.cpp @@ -0,0 +1,101 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/properties_tests.hpp" +#include "openvino/runtime/properties.hpp" +#include + +namespace ov { +namespace test { +namespace behavior { + +std::string OVEmptyPropertiesTests::getTestCaseName(testing::TestParamInfo obj) { + return "device_name=" + obj.param; +} + +void OVEmptyPropertiesTests::SetUp() { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + device_name = this->GetParam(); + model = ngraph::builder::subgraph::makeConvPoolRelu(); +} + +std::string OVPropertiesTests::getTestCaseName(testing::TestParamInfo obj) { + std::string device_name; + AnyMap properties; + std::tie(device_name, properties) = obj.param; + std::ostringstream result; + result << "device_name=" << device_name << "_"; + if (!properties.empty()) { + result << "properties=" << util::join(util::split(util::to_string(properties), ' '), "_"); + } + return result.str(); +} + +void OVPropertiesTests::SetUp() { + SKIP_IF_CURRENT_TEST_IS_DISABLED(); + std::tie(device_name, properties) = this->GetParam(); + model = ngraph::builder::subgraph::makeConvPoolRelu(); +} + +void OVPropertiesTests::TearDown() { + if (!properties.empty()) { + utils::PluginCache::get().reset(); + } +} + +TEST_P(OVEmptyPropertiesTests, SetEmptyProperties) { + OV_ASSERT_NO_THROW(core->get_property(device_name, ov::supported_properties)); + OV_ASSERT_NO_THROW(core->set_property(device_name, AnyMap{})); +} + +// Setting correct properties doesn't throw +TEST_P(OVPropertiesTests, SetCorrectProperties) { + OV_ASSERT_NO_THROW(core->set_property(device_name, properties)); +} + +TEST_P(OVPropertiesTests, canSetPropertyAndCheckGetProperty) { + core->set_property(device_name, properties); + for (const auto& property_item : properties) { + Any property; + OV_ASSERT_NO_THROW(property = core->get_property(device_name, property_item.first)); + ASSERT_FALSE(property.empty()); + std::cout << property_item.first << ":" << property.as() << std::endl; + } +} + +TEST_P(OVPropertiesIncorrectTests, SetPropertiesWithIncorrectKey) { + ASSERT_THROW(core->set_property(device_name, properties), ov::Exception); +} + +TEST_P(OVPropertiesIncorrectTests, CanNotCompileModelWithIncorrectProperties) { + ASSERT_THROW(core->compile_model(model, device_name, properties), ov::Exception); +} + +TEST_P(OVPropertiesDefaultTests, CanSetDefaultValueBackToPlugin) { + std::vector supported_properties; + OV_ASSERT_NO_THROW(supported_properties = core->get_property(device_name, ov::supported_properties)); + for (auto& supported_property : supported_properties) { + Any property; + OV_ASSERT_NO_THROW(property = core->get_property(device_name, supported_property)); + if (supported_property.is_mutable()) { + OV_ASSERT_NO_THROW(core->set_property(device_name, {{ supported_property, property}})); + } + } +} + +TEST_P(OVPropertiesDefaultTests, CheckDefaultValues) { + std::vector supported_properties; + OV_ASSERT_NO_THROW(supported_properties = core->get_property(device_name, ov::supported_properties)); + for (auto&& default_property : properties) { + auto supported = util::contains(supported_properties, default_property.first); + ASSERT_TRUE(supported) << "default_property=" << default_property.first; + Any property; + OV_ASSERT_NO_THROW(property = core->get_property(device_name, default_property.first)); + ASSERT_EQ(default_property.second, property); + } +} + +} // namespace behavior +} // namespace test +} // namespace ov From e544f5e66fbb7493c09806c21bb0b53f9f9449d9 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Wed, 23 Feb 2022 18:29:12 +0300 Subject: [PATCH 088/310] Enable einsum shape inferenxe test (#10603) --- src/core/tests/type_prop/einsum.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/core/tests/type_prop/einsum.cpp b/src/core/tests/type_prop/einsum.cpp index 6fdbf854279..ac55f742862 100644 --- a/src/core/tests/type_prop/einsum.cpp +++ b/src/core/tests/type_prop/einsum.cpp @@ -120,7 +120,7 @@ TEST(type_prop, einsum_dynamicshape_dotproduct) { auto I2 = make_shared(element::f32, input2_shape); auto O = make_shared(OutputVector{I1, I2}, equation); ASSERT_EQ(O->get_element_type(), element::f32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_dynamicshape_diagextraction) { @@ -130,13 +130,10 @@ TEST(type_prop, einsum_dynamicshape_diagextraction) { auto I1 = make_shared(element::i32, input1_shape); auto O = make_shared(OutputVector{I1}, equation); ASSERT_EQ(O->get_element_type(), element::i32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } -TEST(type_prop, DISABLED_einsum_dynamicshape_ellipsis1) { - // TODO: fix bug #53518 - PartialShape::broadcast_merge_into or Dimension::broadcast_merge - // to support broadcasting between Dimension(3, 5) and Dimension(1, 3) - // for which the result must be Dimension(3, 5) +TEST(type_prop, einsum_dynamicshape_ellipsis1) { std::string equation = "a...b,b...->a..."; const auto input1_shape = PartialShape{11, 1, Dimension(3, 5), 3}; const auto input2_shape = PartialShape{3, 11, 7, Dimension(1, 3)}; @@ -145,7 +142,7 @@ TEST(type_prop, DISABLED_einsum_dynamicshape_ellipsis1) { auto I2 = make_shared(element::f32, input2_shape); auto O = make_shared(OutputVector{I1, I2}, equation); ASSERT_EQ(O->get_element_type(), element::f32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_implicitmode_mixedcaseletters) { @@ -156,7 +153,7 @@ TEST(type_prop, einsum_implicitmode_mixedcaseletters) { const auto out_shape = PartialShape{1, Dimension(4, 5), Dimension(2, 3)}; auto O = make_shared(OutputVector{I1}, equation); ASSERT_EQ(O->get_element_type(), element::f32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_implicitmode_mixedcaseletters2) { @@ -169,7 +166,7 @@ TEST(type_prop, einsum_implicitmode_mixedcaseletters2) { auto I2 = make_shared(element::f32, input2_shape); auto O = make_shared(OutputVector{I1, I2}, equation); ASSERT_EQ(O->get_element_type(), element::f32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_implicitmode_repeatedlabels) { @@ -182,7 +179,7 @@ TEST(type_prop, einsum_implicitmode_repeatedlabels) { auto I2 = make_shared(element::f32, input2_shape); auto O = make_shared(OutputVector{I1, I2}, equation); ASSERT_EQ(O->get_element_type(), element::f32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_implicitmode_innerprod) { @@ -195,7 +192,7 @@ TEST(type_prop, einsum_implicitmode_innerprod) { auto I2 = make_shared(element::f32, input2_shape); auto O = make_shared(OutputVector{I1, I2}, equation); ASSERT_EQ(O->get_element_type(), element::f32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_dynamicrank_multimatmul) { @@ -223,7 +220,7 @@ TEST(type_prop, einsum_dynamicrank_multimatmul2) { auto I3 = make_shared(element::i32, input3_shape); auto O = make_shared(OutputVector{I1, I2, I3}, equation); ASSERT_EQ(O->get_element_type(), element::i32); - ASSERT_TRUE(O->get_output_partial_shape(0).same_scheme(out_shape)); + ASSERT_EQ(O->get_output_partial_shape(0), out_shape); } TEST(type_prop, einsum_incorrectequation_subscriptnumber) { From 23b74840c1ba61dfe44eb8ac2c7c27048da6b71c Mon Sep 17 00:00:00 2001 From: Vladimir Dudnik Date: Thu, 24 Feb 2022 10:49:38 +0300 Subject: [PATCH 089/310] renamed streams property (#10620) --- thirdparty/open_model_zoo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/open_model_zoo b/thirdparty/open_model_zoo index 2792b159890..bc92ceb1d3f 160000 --- a/thirdparty/open_model_zoo +++ b/thirdparty/open_model_zoo @@ -1 +1 @@ -Subproject commit 2792b159890a6c7bf0b4059f32f96439c776848d +Subproject commit bc92ceb1d3f685f4c98eccd9320aff25401feee2 From 506303cc790f4d4a51be51d8c55a30e23f9fc498 Mon Sep 17 00:00:00 2001 From: Ivan Novoselov Date: Thu, 24 Feb 2022 11:54:15 +0300 Subject: [PATCH 090/310] [Snippets][CPU] Fix empty shapes handling in canonicalization (#10632) --- src/common/snippets/src/op/subgraph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index e73dc6b4bce..c5d8ce47d27 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -184,7 +184,7 @@ Shape snippets::op::Subgraph::canonicalize(const BlockedShapeVector& outputShape auto skipStartEndOnes = [](const Shape& shape) { auto begin = shape.begin(); auto end = shape.end(); - while (*begin == 1 && begin != end) + while (begin != end && *begin == 1) begin++; while (begin != end && *(end-1) == 1) end--; From 4c01d6c50c6d314373dffd2a8ddbc294011b2508 Mon Sep 17 00:00:00 2001 From: Alina Kladieva Date: Thu, 24 Feb 2022 12:03:36 +0300 Subject: [PATCH 091/310] Skip canRun3SyncRequestsConsistentlyFromThreads sporadic on Myriad (#10598) --- .../plugin/myriad/shared_tests_instances/skip_tests_config.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp index e892cecbc8b..af3fe8cbb26 100644 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/myriad/shared_tests_instances/skip_tests_config.cpp @@ -82,5 +82,8 @@ std::vector disabledTestPatterns() { ".*InferRequestCheckTensorPrecision.*type=u8.*", ".*InferRequestCheckTensorPrecision.*type=u16.*", ".*InferRequestCheckTensorPrecision.*type=u64.*", + + // TODO: Issue 76209 + R"(.*MultithreadingTests.*canRun3SyncRequestsConsistentlyFromThreads.*MYRIAD.*)", }; } From 3de428c7139fef69e37b406c3490c26b67b48026 Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Thu, 24 Feb 2022 14:37:03 +0300 Subject: [PATCH 092/310] Auto-batch ConvertLike enabled (#10631) --- .../common_optimizations/dimension_tracking.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp b/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp index 197f7e38ecf..547ea7e5b42 100644 --- a/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp @@ -143,7 +143,8 @@ P2Btype ov::batch_util::find_batch(const std::shared_ptr& f) { for (const auto& output : curr_node->outputs()) { // we do not need to walk through shape-of sub-graphs for (const auto& t_input : output.get_target_inputs()) { - if (ov::is_type(t_input.get_node()) || + if (ov::is_type(t_input.get_node()) || + ov::is_type(t_input.get_node()) || ov::is_type(t_input.get_node())) continue; nodes.push_back(t_input.get_node()); @@ -206,7 +207,7 @@ bool ov::batch_util::check_batch_tracks_through_all_the_nodes(const std::shared_ all_outputs_has_batch &= name_stays; // && others_are_static; } if (any_input_has_batch && !all_outputs_has_batch && !ov::is_type(node) && - !ov::is_type(node)) { + !ov::is_type(node) && !ov::is_type(node)) { failed_to_propagate_batch = true; node->validate_and_infer_types(); } From 85707198b32b58ec83e15020615af4bef29a4e91 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 24 Feb 2022 15:22:08 +0300 Subject: [PATCH 093/310] Revert "Disable reshape for new API (#10064)" (#10634) This reverts commit 3f4e384d5d69d2e8a6ed84cdb5e3ba7ae3bf4c4e. --- src/inference/src/cnn_network_ngraph_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inference/src/cnn_network_ngraph_impl.cpp b/src/inference/src/cnn_network_ngraph_impl.cpp index 777d80bc0d8..cbea1f4b443 100644 --- a/src/inference/src/cnn_network_ngraph_impl.cpp +++ b/src/inference/src/cnn_network_ngraph_impl.cpp @@ -441,7 +441,7 @@ void CNNNetworkNGraphImpl::reshape(const std::map specialized_ngraph_function = nullptr; - if (outputs_are_static || _new_api) { + if (outputs_are_static) { specialized_ngraph_function = _ngraph_function; } else { specialized_ngraph_function = ngraph::clone_function(*_ngraph_function); From a2f99630451422a1d7f5fc5bd54118543fb19c3e Mon Sep 17 00:00:00 2001 From: Maksim Derbasov Date: Thu, 24 Feb 2022 15:33:30 +0300 Subject: [PATCH 094/310] Fix warnings from builders.hpp (#10568) --- .../include/ngraph_functions/builders.hpp | 22 +++++++------------ .../ngraph_functions/utils/data_utils.hpp | 12 ++++++++-- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp index 1b41cfa9dcf..24a8e5ebe90 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp +++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp @@ -41,22 +41,16 @@ std::shared_ptr makeConstant(const element::Type &type, const std::vector< case TYPE: \ weightsNode = std::make_shared( \ type, shape, \ - random ? NGraphFunctions::Utils::generateVector(ngraph::shape_size(shape), upTo, startFrom, seed) : \ - NGraphFunctions::Utils::castVector::value_type >(data)); \ + random ? NGraphFunctions::Utils::generateVector( \ + ngraph::shape_size(shape), \ + ngraph::helpers::nGraphTypesTrait::value_type(upTo), \ + ngraph::helpers::nGraphTypesTrait::value_type(startFrom), \ + seed) \ + : NGraphFunctions::Utils::castVector::value_type >(data)); \ break; switch (type) { - case ngraph::element::Type_t::bf16: - weightsNode = std::make_shared( - type, shape, - random ? NGraphFunctions::Utils::generateBF16Vector(ngraph::shape_size(shape), upTo, startFrom) : - NGraphFunctions::Utils::castVector(data)); - break; - case ngraph::element::Type_t::f16: - weightsNode = std::make_shared( - type, shape, - random ? NGraphFunctions::Utils::generateF16Vector(ngraph::shape_size(shape), upTo, startFrom) : - NGraphFunctions::Utils::castVector(data)); - break; + makeNode(ngraph::element::Type_t::bf16); + makeNode(ngraph::element::Type_t::f16); makeNode(ngraph::element::Type_t::f32); makeNode(ngraph::element::Type_t::f64); makeNode(ngraph::element::Type_t::i8); diff --git a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp index ee7d6ed7184..3c6390ec965 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp +++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/utils/data_utils.hpp @@ -54,8 +54,12 @@ generateVector(size_t vec_len, } } +template<> std::vector inline -generateF16Vector(size_t vec_len, ngraph::float16 upTo = 10, ngraph::float16 startFrom = 1, int32_t seed = 1) { +generateVector(size_t vec_len, + ngraph::float16 upTo, + ngraph::float16 startFrom, + int32_t seed) { std::vector res(vec_len); std::mt19937 gen(seed); // chose values between this range to avoid type overrun (e.g. in case of I8 precision) @@ -69,8 +73,12 @@ generateF16Vector(size_t vec_len, ngraph::float16 upTo = 10, ngraph::float16 sta return res; } +template<> std::vector inline -generateBF16Vector(size_t vec_len, ngraph::bfloat16 upTo = 10, ngraph::bfloat16 startFrom = 1, int32_t seed = 1) { +generateVector(size_t vec_len, + ngraph::bfloat16 upTo, + ngraph::bfloat16 startFrom, + int32_t seed) { std::vector res(vec_len); std::mt19937 gen(seed); From 1c18733adeb7d53771fca6dc703417ed52faa5db Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 24 Feb 2022 15:50:31 +0300 Subject: [PATCH 095/310] Changed location of extensibility guide (#10433) * Changed location of extensibility guide * Removed hardware kernels legacy documentation * Changed all extension guild to new API * Removed Custom_Layers_Guide * Fixed build * Fixed some moments * Update docs/Extensibility_UG/Intro.md * Fixed build * Added more examples * Fixed typo * Fixed comments * Extend library topic * Fixed typo --- docs/Extensibility_UG/Intro.md | 114 +++ docs/Extensibility_UG/add_openvino_ops.md | 62 ++ docs/HOWTO/Custom_Layers_Guide.md | 350 --------- .../prepare_model/Model_Optimizer_FAQ.md | 18 +- .../Convert_Model_From_TensorFlow.md | 2 +- .../convert_model/Converting_Model.md | 2 +- .../Customize_Model_Optimizer.md | 2 +- .../Extensibility_DG/AddingNGraphOps.md | 82 --- .../Extensibility_DG/Building.md | 19 - .../Extensibility_DG/CPU_Kernel.md | 71 -- .../Extensibility_DG/Custom_ONNX_Ops.md | 78 -- .../Extensibility_DG/Extension.md | 29 - .../Extensibility_DG/GPU_Kernel.md | 233 ------ docs/OV_Runtime_UG/Extensibility_DG/Intro.md | 60 -- .../Extensibility_DG/VPU_Kernel.md | 682 ------------------ docs/OV_Runtime_UG/ShapeInference.md | 4 +- docs/OV_Runtime_UG/model_representation.md | 6 +- docs/OV_Runtime_UG/openvino_temporary.md | 1 - docs/OV_Runtime_UG/supported_plugins/CPU.md | 2 +- .../supported_plugins/Supported_Devices.md | 2 +- docs/OV_Runtime_UG/supported_plugins/VPU.md | 2 +- docs/documentation.md | 4 +- docs/how_tos/how-to-links.md | 2 +- docs/snippets/CMakeLists.txt | 1 + docs/snippets/ov_extensions.cpp | 26 + docs/snippets/ov_extensions.py | 15 + docs/template_extension/new/CMakeLists.txt | 8 +- docs/template_extension/new/identity.hpp | 11 + docs/template_extension/new/ov_extension.cpp | 8 +- .../openvino/frontend/extension/op.hpp | 4 +- 30 files changed, 268 insertions(+), 1632 deletions(-) create mode 100644 docs/Extensibility_UG/Intro.md create mode 100644 docs/Extensibility_UG/add_openvino_ops.md delete mode 100644 docs/HOWTO/Custom_Layers_Guide.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/Building.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/CPU_Kernel.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/Custom_ONNX_Ops.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/Extension.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/GPU_Kernel.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/Intro.md delete mode 100644 docs/OV_Runtime_UG/Extensibility_DG/VPU_Kernel.md create mode 100644 docs/snippets/ov_extensions.cpp create mode 100644 docs/snippets/ov_extensions.py diff --git a/docs/Extensibility_UG/Intro.md b/docs/Extensibility_UG/Intro.md new file mode 100644 index 00000000000..a738f3dfd77 --- /dev/null +++ b/docs/Extensibility_UG/Intro.md @@ -0,0 +1,114 @@ +# OpenVINO Extensibility Mechanism {#openvino_docs_Extensibility_UG_Intro} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_Extensibility_UG_add_openvino_ops + +@endsphinxdirective + +The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks including +TensorFlow, Caffe, MXNet, Kaldi, PaddlePaddle, and ONNX. The list of supported operations (layers) is different for +each of the supported frameworks. To see the operations supported by your framework, refer to +[Supported Framework Operations](../MO_DG/prepare_model/Supported_Frameworks_Layers.md). + +Custom operations, that is those not included in the list, are not recognized by OpenVINO™ out-of-the-box. Therefore, creating Intermediate Representation (IR) for a model using them requires additional steps. This guide illustrates the workflow for running inference on topologies featuring custom operations, allowing you to plug in your own implementation for existing or completely new operations. + +If your model contains operations not normally supported by OpenVINO™, the OpenVINO™ Extensibility API lets you add support for those custom operations and use one implementation for Model Optimizer and OpenVINO™ Runtime. + +There are two steps to support inference of a model with custom operation(s): +1. Add support for a [custom operation in the Model Optimizer](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) so +the Model Optimizer can generate the IR with the operation. +2. Create a custom operation in it as described in the [Custom Operation](add_openvino_ops.md). + +## OpenVINO™ Extensions + +An OpenVINO™ provides extensions for: + + * [Custom OpenVINO™ Operation](add_openvino_ops.md): + - Enables the creation of unsupported operations + - Enables the use of `ov::Core::read_model` to read models with unsupported operations + - Provides a shape inference mechanism for custom operations + - Provides an evaluate method which allow to support the operation on CPU or perform constant folding + +> **NOTE**: This documentation is written based on the [Template extension](https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension/new), which demonstrates extension development details. You can review the complete code, which is fully compilable and up-to-date, to see how it works. + +## Load extensions to OpenVINO™ Runtime + +To load the extensions to the `ov::Core` object, use the `ov::Core::add_extension` method, this method allows to load library with extensions or extensions from the code. + +### Load extensions to core + +Extensions can be loaded from code with `ov::Core::add_extension` method: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_extensions.cpp + :language: cpp + :fragment: add_extension + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_extensions.py + :language: python + :fragment: add_extension + +@endsphinxdirective + +### Create library with extensions + +You need to create extension library in following cases: + - Load extensions to Model Optimizer + - Load extensions to Python application + +If you want to create an extension library, for example in order to load these extensions to the Model Optimizer, you need to do next steps: +Create an entry point for extension library. OpenVINO™ provides an `OPENVINO_CREATE_EXTENSIONS()` macro, which allows to define an entry point to a library with OpenVINO™ Extensions. +This macro should have a vector of all OpenVINO™ Extensions as an argument. + +Based on that, the declaration of an extension class can look as follows: + +@snippet template_extension/new/ov_extension.cpp ov_extension:entry_point + +To configure the build of your extension library, use the following CMake script: + +@snippet template_extension/new/CMakeLists.txt cmake:extension + +This CMake script finds the OpenVINO™ using the `find_package` CMake command. + +To build the extension library, run the commands below: + +```sh +$ cd docs/template_extension/new +$ mkdir build +$ cd build +$ cmake -DOpenVINO_DIR= ../ +$ cmake --build . +``` + +After the build you can use path to your extension library to load your extensions to OpenVINO™ Runtime: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_extensions.cpp + :language: cpp + :fragment: add_extension_lib + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_extensions.py + :language: python + :fragment: add_extension_lib + +@endsphinxdirective + +## See Also + +* [Using Inference Engine Samples](../OV_Runtime_UG/Samples_Overview.md) +* [Hello Shape Infer SSD sample](../../samples/cpp/hello_reshape_ssd/README.md) diff --git a/docs/Extensibility_UG/add_openvino_ops.md b/docs/Extensibility_UG/add_openvino_ops.md new file mode 100644 index 00000000000..7c5ed06f1fd --- /dev/null +++ b/docs/Extensibility_UG/add_openvino_ops.md @@ -0,0 +1,62 @@ +# Custom OpenVINO™ Operations {#openvino_docs_Extensibility_UG_add_openvino_ops} + +OpenVINO™ Extension API allows you to register custom operations to support models with operations which OpenVINO™ does not support out-of-the-box. + +## Operation Class + +To add your custom operation, create a new class that extends `ov::Op`, which is in turn derived from `ov::Node`, the base class for all graph operations in OpenVINO™. To add `ov::Op` please include next file: + +@snippet template_extension/new/identity.hpp op:common_include + +Follow the steps below to add a custom operation: + +1. Add the `OPENVINO_OP` macro which defines a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an operation currently consists of a string operation identifier and a string for operation version. + +2. Implement default constructor and constructors that optionally take the operation inputs and attributes as parameters. + +3. Override the shape inference method `validate_and_infer_types`. This method is called multiple times during graph manipulations to determine the shapes and element types of the operations outputs. To access the input shapes and input element types, use the `get_input_partial_shape()` and `get_input_element_type()` methods of `ov::Node`. Set the inferred shape and element type of the output using `set_output_type`. + +4. Override the `clone_with_new_inputs` method, which enables graph manipulation routines to create copies of this operation and connect it to different nodes during optimization. + +5. Override the `visit_attributes` method, which enables serialization and deserialization of operation attributes. An `AttributeVisitor` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware `on_attribute` helper. Helpers are already implemented for standard C++ types like `int64_t`, `float`, `bool`, `vector`, and for existing OpenVINO defined types. + +6. Override `evaluate`, which is an optional method that enables fallback of some devices to this implementation and the application of constant folding if there is a custom operation on the constant branch. If your operation contains `evaluate` method you also need to override the `has_evaluate` method, this method allow to get information about availability of `evaluate` method for the operation. + +7. Add the `OPENVINO_FRAMEWORK_MAP` macro if you want to map custom operation to framework operation with the same name. It is an optional macro which can be used for one to one mapping. In order to use this macro please include frontend specific headers: + @snippet template_extension/new/identity.hpp op:frontend_include + +Based on that, declaration of an operation class can look as follows: + +@snippet template_extension/new/identity.hpp op:header + +### Operation Constructors + +OpenVINO™ operation contains two constructors: +* Default constructor, which enables you to create an operation without attributes +* Constructor that creates and validates an operation with specified inputs and attributes + +@snippet template_extension/new/identity.cpp op:ctor + +### `validate_and_infer_types()` + +`ov::Node::validate_and_infer_types` method validates operation attributes and calculates output shapes using attributes of the operation. + +@snippet template_extension/new/identity.cpp op:validate + +### `clone_with_new_inputs()` + +`ov::Node::clone_with_new_inputs` method creates a copy of the operation with new inputs. + +@snippet template_extension/new/identity.cpp op:copy + +### `visit_attributes()` + +`ov::Node::visit_attributes` method enables you to visit all operation attributes. + +@snippet template_extension/new/identity.cpp op:visit_attributes + +### `evaluate()` and `has_evaluate()` + +`ov::Node::evaluate` method enables you to apply constant folding to an operation. + +@snippet template_extension/new/identity.cpp op:evaluate diff --git a/docs/HOWTO/Custom_Layers_Guide.md b/docs/HOWTO/Custom_Layers_Guide.md deleted file mode 100644 index d7c63b66c30..00000000000 --- a/docs/HOWTO/Custom_Layers_Guide.md +++ /dev/null @@ -1,350 +0,0 @@ -# Custom Operations Guide {#openvino_docs_HOWTO_Custom_Layers_Guide} - -The Intel® Distribution of OpenVINO™ toolkit supports neural network models trained with multiple frameworks, including -TensorFlow, Caffe, MXNet, Kaldi, PaddlePaddle, and ONNX. The list of supported operations (layers) is different for -each of the supported frameworks. To see the operations supported by your framework, refer to -[Supported Framework Layers](../MO_DG/prepare_model/Supported_Frameworks_Layers.md). - -Custom operations, that is those not included in the list, are not recognized by Model Optimizer out-of-the-box. Therefore, creating Intermediate Representation (IR) for a model using them requires additional steps. This guide illustrates the workflow for running inference on topologies featuring custom operations, allowing you to plug in your own implementation for existing or completely new operations. - -> **NOTE**: *Layer* is a legacy term for *operation* which came from Caffe framework. Currently it is not used. -> Refer to the [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../MO_DG/IR_and_opsets.md) -> for more information on the topic. - -## Terms Used in This Guide - -- *Intermediate Representation (IR)* — OpenVINO's Neural Network format used by Inference Engine. It abstracts different frameworks and describes model topology, operations parameters, and weights. - -- *Operation* — an abstract concept of a math function selected for a specific purpose. Operations supported by - OpenVINO™ are listed in the supported operation set provided in the [Available Operations Sets](../ops/opset.md). - Examples of the operations are: [ReLU](../ops/activation/ReLU_1.md), [Convolution](../ops/convolution/Convolution_1.md), - [Add](../ops/arithmetic/Add_1.md), etc. - -- *Kernel* — The implementation of an operation function in the OpenVINO™ plugin, in this case, the math programmed (in - C++ and OpenCL) to perform the operation for a target hardware (CPU or GPU). - -- *Inference Engine Extension* — Device-specific module implementing custom operations (a set of kernels). - -## Custom Operation Support Overview - -There are three steps to support inference of a model with custom operation(s): -1. Add support for a custom operation in [Model Optimizer](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) so -that it can generate the IR with the operation. -2. Create an operation set and implement a custom nGraph operation in it as described in the -[Custom nGraph Operation](../OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md). -3. Implement a customer operation in one of the [OpenVINO™ Runtime](../OV_Runtime_UG/openvino_intro.md) -plugins to support inference of this operation using a particular target hardware (CPU, GPU or VPU). - -To see the operations that are supported by each device plugin for the Inference Engine, refer to the -[Supported Devices](../OV_Runtime_UG/supported_plugins/Supported_Devices.md). - -> **NOTE**: If a device doesn't support a particular operation, an alternative to creating a new operation is to target -> an additional device using the HETERO device. The [Heterogeneous execution](../OV_Runtime_UG/hetero_execution.md) may be -> used to run an inference model on multiple devices allowing the unsupported operations on one device to "fallback" to -> run on another device (e.g., CPU) that does support those operations. - -### Custom Operation Support for the Model Optimizer - -Model Optimizer model conversion pipeline is described in detail in "Model Conversion Pipeline" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). It is best to read that article first for a better understanding of the following material. - -Model Optimizer provides an extensions mechanism to support new operations and implement custom model transformations to generate optimized IR. This mechanism is described in the "Model Optimizer Extensions" section of -[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). - -Two types of Model Optimizer extensions should be implemented to support custom operations, at a minimum: -1. Operation class for a new operation. This class stores information about the operation, its attributes, shape inference function, attributes to be saved to an IR and some others internally used attributes. Refer to the "Model Optimizer Operation" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for detailed instructions on how to implement it. -2. Operation attributes extractor. The extractor is responsible for parsing framework-specific representation of the -operation and uses corresponding operation class to update graph node attributes with necessary attributes of the -operation. Refer to the "Operation Extractor" section of -[Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for detailed instructions on how to implement it. - -> **NOTE**: In some cases you may need to implement some transformation to support the operation. This topic is covered in the "Graph Transformation Extensions" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). - -## Custom Operation Extensions for the Inference Engine - -Inference Engine provides an extension mechanism to support new operations. This mechanism is described in [Inference Engine Extensibility Mechanism](../OV_Runtime_UG/Extensibility_DG/Intro.md). - -Each device plugin includes a library of optimized implementations to execute known operations which must be extended to execute a custom operation. The custom operation extension is implemented according to the target device: - -- Custom Operation CPU Extension - - A compiled shared library (`.so` or `.dll`) needed by the CPU Plugin for executing the custom operation - on a CPU. Refer to the [How to Implement Custom CPU Operations](../OV_Runtime_UG/Extensibility_DG/CPU_Kernel.md) for more - details. -- Custom Operation GPU Extension - - OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the GPU along with an operation description file (.xml) needed by the GPU Plugin for the custom operation kernel. Refer to the [How to Implement Custom GPU Operations](../OV_Runtime_UG/Extensibility_DG/GPU_Kernel.md) for more details. -- Custom Operation VPU Extension - - OpenCL source code (.cl) for the custom operation kernel that will be compiled to execute on the VPU along with an operation description file (.xml) needed by the VPU Plugin for the custom operation kernel. Refer to [How to Implement Custom Operations for VPU](../OV_Runtime_UG/Extensibility_DG/VPU_Kernel.md) for more details. - -Also, it is necessary to implement nGraph custom operation according to [Custom nGraph Operation](../OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md) so the Inference Engine can read an IR with this -operation and correctly infer output tensor shape and type. - -## Enabling Magnetic Resonance Image Reconstruction Model -This chapter provides step-by-step instructions on how to enable the magnetic resonance image reconstruction model implemented in the [repository](https://github.com/rmsouza01/Hybrid-CS-Model-MRI/) using a custom operation on CPU. The example is prepared for a model generated from the repository with hash `2ede2f96161ce70dcdc922371fe6b6b254aafcc8`. - -### Download and Convert the Model to a Frozen TensorFlow Model Format -The original pre-trained model is provided in the hdf5 format which is not supported by OpenVINO directly and needs to be converted to TensorFlow frozen model format first. - -1. Download repository `https://github.com/rmsouza01/Hybrid-CS-Model-MRI`:
-```bash - git clone https://github.com/rmsouza01/Hybrid-CS-Model-MRI - git checkout 2ede2f96161ce70dcdc922371fe6b6b254aafcc8 -``` - -2. Convert pre-trained `.hdf5` to a frozen `.pb` graph using the following script (tested with TensorFlow==1.15.0 and -Keras==2.2.4) which should be executed from the root of the cloned repository:
-```py - import keras as K - import numpy as np - import Modules.frequency_spatial_network as fsnet - import tensorflow as tf - - under_rate = '20' - - stats = np.load("Data/stats_fs_unet_norm_" + under_rate + ".npy") - var_sampling_mask = np.load("Data/sampling_mask_" + under_rate + "perc.npy") - - model = fsnet.wnet(stats[0], stats[1], stats[2], stats[3], kshape = (5,5), kshape2=(3,3)) - model_name = "Models/wnet_" + under_rate + ".hdf5" - model.load_weights(model_name) - - inp = np.random.standard_normal([1, 256, 256, 2]).astype(np.float32) - np.save('inp', inp) - - sess = K.backend.get_session() - sess.as_default() - graph_def = sess.graph.as_graph_def() - graph_def = tf.graph_util.convert_variables_to_constants(sess, graph_def, ['conv2d_44/BiasAdd']) - with tf.gfile.FastGFile('wnet_20.pb', 'wb') as f: - f.write(graph_def.SerializeToString()) -``` - -As a result the TensorFlow frozen model file "wnet_20.pb" is generated. - -### Convert the Frozen TensorFlow Model to Intermediate Representation - -Firstly, open the model in TensorBoard or other TensorFlow model visualization tool. The model supports dynamic -batch dimension because the value for the batch dimension is not hardcoded in the model. Model Optimizer need to set all -dynamic dimensions to some specific value to create the IR, therefore specify the command line parameter `-b 1` to set -the batch dimension equal to 1. The actual batch size dimension can be changed at runtime using the Inference Engine API -described in the [Using Shape Inference](../OV_Runtime_UG/ShapeInference.md). Also refer to the General Conversion Parameters section in [Converting a Model to Intermediate Representation (IR)](../MO_DG/prepare_model/convert_model/Converting_Model.md) and [Convert Your TensorFlow* Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md) -for more details and command line parameters used for the model conversion. - -```sh -mo --input_model /wnet_20.pb -b 1 -``` - -> **NOTE**: This conversion guide is applicable for the 2021.3 release of OpenVINO and starting from 2021.4 -> OpenVINO has supported this model out of the box. - -Model Optimizer produces the following error: -```bash -[ ERROR ] List of operations that cannot be converted to Inference Engine IR: -[ ERROR ] Complex (1) -[ ERROR ] lambda_2/Complex -[ ERROR ] IFFT2D (1) -[ ERROR ] lambda_2/IFFT2D -[ ERROR ] ComplexAbs (1) -[ ERROR ] lambda_2/Abs -[ ERROR ] Part of the nodes was not converted to IR. Stopped. -``` - -The error means that the Model Optimizer doesn't know how to handle 3 types of TensorFlow\* operations: "Complex", -"IFFT2D" and "ComplexAbs". In order to see more details about the conversion process run the model conversion with -additional parameter `--log_level DEBUG`. It is worth to mention the following lines from the detailed output: - -```bash -[ INFO ] Called "tf_native_tf_node_infer" for node "lambda_2/Complex" -[ ] [ DEBUG ] [ tf:228 ] Added placeholder with name 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder' -[ ] [ DEBUG ] [ tf:228 ] Added placeholder with name 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder' -[ ] [ DEBUG ] [ tf:241 ] update_input_in_pbs: replace input 'lambda_2/lambda_3/strided_slice' with input 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder' -[ ] [ DEBUG ] [ tf:249 ] Replacing input '0' of the node 'lambda_2/Complex' with placeholder 'lambda_2/lambda_3/strided_slice_port_0_ie_placeholder' -[ ] [ DEBUG ] [ tf:241 ] update_input_in_pbs: replace input 'lambda_2/lambda_4/strided_slice' with input 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder' -[ ] [ DEBUG ] [ tf:249 ] Replacing input '1' of the node 'lambda_2/Complex' with placeholder 'lambda_2/lambda_4/strided_slice_port_0_ie_placeholder' -[ ] [ DEBUG ] [ tf:148 ] Inferred shape of the output tensor with index '0' of the node 'lambda_2/Complex': '[ 1 256 256]' -[ ] [ DEBUG ] [ infer:145 ] Outputs: -[ ] [ DEBUG ] [ infer:32 ] output[0]: shape = [ 1 256 256], value = -[ ] [ DEBUG ] [ infer:129 ] -------------------- -[ ] [ DEBUG ] [ infer:130 ] Partial infer for lambda_2/IFFT2D -[ ] [ DEBUG ] [ infer:131 ] Op: IFFT2D -[ ] [ DEBUG ] [ infer:132 ] Inputs: -[ ] [ DEBUG ] [ infer:32 ] input[0]: shape = [ 1 256 256], value = -``` - -This is a part of the log of the partial inference phase of the model conversion. See the "Partial Inference" section on -the [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for -more information about this phase. Model Optimizer inferred output shape for the unknown operation of type "Complex" -using a "fallback" to TensorFlow. However, it is not enough to generate the IR because Model Optimizer doesn't know -which attributes of the operation should be saved to IR. So it is necessary to implement Model Optimizer extensions to -support these operations. - -Before going into the extension development it is necessary to understand what these unsupported operations do according -to the TensorFlow framework specification. - -* "Complex" - returns a tensor of complex type constructed from two real input tensors specifying real and imaginary -part of a complex number. -* "IFFT2D" - returns a tensor with inverse 2-dimensional discrete Fourier transform over the inner-most 2 dimensions of - an input. -* "ComplexAbs" - returns a tensor with absolute values of input tensor with complex numbers. - -The part of the model with all three unsupported operations is depicted below: - -![Unsupported sub-graph](img/unsupported_subgraph.png) - -This model uses complex numbers during the inference but Inference Engine does not support tensors of this data type. So -it is necessary to find a way how to avoid using tensors of such a type in the model. Fortunately, the complex tensor -appear as a result of "Complex" operation, is used as input in the "IFFT2D" operation then is passed to "ComplexAbs" -which produces real value tensor as output. So there are just 3 operations consuming/producing complex tensors in the -model. - -Let's design an OpenVINO operation "FFT" which get a single real number tensor describing the complex number and -produces a single real number tensor describing output complex tensor. This way the fact that the model uses complex -numbers is hidden inside the "FFT" operation implementation. The operation gets a tensor of shape `[N, H, W, 2]` and -produces the output tensor with the same shape, where the innermost dimension contains pairs of real numbers describing -the complex number (its real and imaginary part). As we will see further this operation will allow us to support the -model. The implementation of the Model Optimizer operation should be saved to `mo_extensions/ops/FFT.py` file: - -@snippet FFT.py fft:operation - -The attribute `inverse` is a flag specifying type of the FFT to apply: forward or inverse. - -See the "Model Optimizer Operation" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for detailed instructions on how to implement the operation. - -Now it is necessary to implement extractor for the "IFFT2D" operation according to the -"Operation Extractor" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md). The -following snippet provides two extractors: one for "IFFT2D", another one for "FFT2D", however only on of them is used in this example. The implementation should be saved to the file `mo_extensions/front/tf/FFT_ext.py`. - -@snippet FFT_ext.py fft_ext:extractor - -> **NOTE**: The graph is in inconsistent state after extracting node attributes because according to original operation -> "IFFT2D" semantic it should have an input consuming a tensor of complex numbers, but the extractor instantiated an -> operation "FFT" which expects a real tensor with specific layout. But the inconsistency will be resolved during -> applying front phase transformations discussed below. - -The output shape of the operation "AddV2" from the picture above is `[N, H, W, 2]`. Where the innermost dimension -contains pairs of real numbers describing the complex number (its real and imaginary part). The following "StridedSlice" -operations split the input tensor into 2 parts to get a tensor of real and a tensor of imaginary parts which are then -consumed with the "Complex" operation to produce a tensor of complex numbers. These "StridedSlice" and "Complex" -operations can be removed so the "FFT" operation will get a real value tensor encoding complex numbers. To achieve this -we implement the front phase transformation which searches for a pattern of two "StridedSlice" operations with specific -attributes producing data to "Complex" operation and removes it from the graph. Refer to the -"Pattern-Defined Front Phase Transformations" section of [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) for more -information on how this type of transformation works. The code snippet should be saved to the file -`mo_extensions/front/tf/Complex.py`. - -@snippet Complex.py complex:transformation - -> **NOTE**: The graph is in inconsistent state because the "ComplexAbs" operation consumes complex value tensor but -> "FFT" produces real value tensor. - -Now lets implement a transformation which replace a "ComplexAbs" operation with a sub-graph of primitive operations -which calculate the result using the following formulae: \f$module(z) = \sqrt{real(z) \cdot real(z) + imag(z) \cdot imag(z)}\f$. -Original "IFFT2D" operation produces tensor of complex values, but the "FFT" operation produces a real value tensor with -the same format and shape as the input for the operation. So the input shape for the "ComplexAbs" will be `[N, H, W, 2]` -with the innermost dimension containing tuple with real and imaginary part of a complex number. In order to calculate -absolute values for the complex tensor we do the following: -1. Raise all elements in the power of 2. -2. Calculate a reduced sum over the innermost dimension. -3. Calculate a square root. - -The implementation should be saved to the file `mo_extensions/front/tf/ComplexAbs.py` and provided below: - -@snippet ComplexAbs.py complex_abs:transformation - -Now it is possible to convert the model using the following command line: -```sh -mo --input_model /wnet_20.pb -b 1 --extensions mo_extensions/ -``` - -The sub-graph corresponding to the originally non-supported one is depicted in the image below: - -![Converted sub-graph](img/converted_subgraph.png) - -> **NOTE**: Model Optimizer performed conversion of the model from NHWC to NCHW layout that is why the dimension with -> the value 2 moved to another position. - -### Inference Engine Extension Implementation -Now it is necessary to implement the extension for the CPU plugin with operation "FFT" introduced previously. The code -below is based on the template extension described in [Inference Engine Extensibility Mechanism](../OV_Runtime_UG/Extensibility_DG/Intro.md). - -#### CMake Build File -The first step is to create a CMake configuration file which builds the extension. The content of the "CMakeLists.txt" -file is the following: - -@snippet template_extension/old/CMakeLists.txt cmake:extension - -The CPU FFT kernel implementation uses OpenCV to perform the FFT that is why the extension library is linked with -`opencv_core` which comes with the OpenVINO. - -#### Custom nGraph Operation "FFT" Implementation -The next step is to create the nGraph operation FFT. The header file "fft_op.hpp" has the following content: - -@snippet template_extension/old/fft_op.hpp fft_op:header - -The operation has just one boolean attribute `inverse`. Implementation of the necessary nGraph operation functions are -in the `fft_op.cpp` file with the following content: - -@snippet template_extension/old/fft_op.cpp fft_op:implementation - -Refer to the [Custom nGraph Operation](../OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md) for more details. - -#### CPU FFT Kernel Implementation -The operation implementation for CPU plugin uses OpenCV to perform the FFT. The header file "fft_kernel.hpp" has the -following content: - -@snippet template_extension/old/fft_kernel.hpp fft_kernel:header - -The "fft_kernel.cpp" with the implementation of the CPU has the following content: - -@snippet template_extension/old/fft_kernel.cpp fft_kernel:implementation - -Refer to the [How to Implement Custom CPU Operations](../OV_Runtime_UG/Extensibility_DG/CPU_Kernel.md) for more details. - -#### Extension Library Implementation -The last step is to create an extension library "extension.cpp" and "extension.hpp" which will include the FFT -operation for the CPU plugin. The code of the library is described in the [Extension Library](../OV_Runtime_UG/Extensibility_DG/Extension.md). - -### Building and Running the Custom Extension -To build the extension, run the following:
-```bash -mkdir build && cd build -source /opt/intel/openvino_2022/setupvars.sh -cmake .. -DCMAKE_BUILD_TYPE=Release -make --jobs=$(nproc) -``` - -The result of this command is a compiled shared library (`.so` or `.dll`). It should be loaded in the -application using `Core` class instance method `AddExtension` like this -`core.AddExtension(std::make_shared(compiled_library_file_name), "CPU");`. - -To test that the extension is implemented correctly we can run the "mri_reconstruction_demo" with the following content: - -@snippet mri_reconstruction_demo.py mri_demo:demo - -The script can be executed using the following command line: -```bash -python3 mri_reconstruction_demo.py \ - -m /wnet_20.xml \ - -i .npy \ - -p /Data/sampling_mask_20perc.npy \ - -l /libtemplate_extension.so \ - -d CPU -``` - -## Additional Resources - -- Intel® Distribution of OpenVINO™ toolkit home page: [https://software.intel.com/en-us/openvino-toolkit](https://software.intel.com/en-us/openvino-toolkit) -- OpenVINO™ toolkit online documentation: [https://docs.openvino.ai](https://docs.openvino.ai) -- [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) -- [Model Optimizer Extensibility](../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) -- [Inference Engine Extensibility Mechanism](../OV_Runtime_UG/Extensibility_DG/Intro.md) -- [OpenVINO™ Toolkit Samples Overview](../OV_Runtime_UG/Samples_Overview.md) -- [Overview of OpenVINO™ Toolkit Pre-Trained Models](@ref omz_models_group_intel) -- For IoT Libraries and Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). - -## Converting Models: - -- [Convert Your Caffe Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md) -- [Convert Your TensorFlow Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md) -- [Convert Your MXNet Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md) -- [Convert Your Kaldi Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md) -- [Convert Your ONNX Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md) -- [Convert Your PaddlePaddle Model](../MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md) diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md index 91106d0ee86..dd131a5c972 100644 --- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md +++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md @@ -214,7 +214,7 @@ One of the layers in the specified topology might not have inputs or values. Ple #### 24. What does the message "Part of the nodes was not translated to IE. Stopped" mean? -Some of the layers are not supported by the Inference Engine and cannot be translated to an Intermediate Representation. You can extend the Model Optimizer by allowing generation of new types of layers and implement these layers in the dedicated Inference Engine plugins. For more information, refer to the [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md) and [Inference Engine Extensibility Mechanism](../../OV_Runtime_UG/Extensibility_DG/Intro.md) +Some of the layers are not supported by the Inference Engine and cannot be translated to an Intermediate Representation. You can extend the Model Optimizer by allowing generation of new types of layers and implement these layers in the dedicated Inference Engine plugins. For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md) #### 25. What does the message "While creating an edge from .. to .. : node name is undefined in the graph. Check correctness of the input model" mean? @@ -268,7 +268,7 @@ Model Optimizer tried to write an event file in the specified directory but fail #### 37. What does the message "There is no registered 'infer' function for node with op = .. . Please implement this function in the extensions" mean? -Most likely, you tried to extend Model Optimizer with a new primitive, but did not specify an infer function. For more information on extensions, see [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md). +Most likely, you tried to extend Model Optimizer with a new primitive, but did not specify an infer function. For more information on extensions, see [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 38. What does the message "Stopped shape/value propagation at node" mean? @@ -300,7 +300,7 @@ Most likely, there is a problem with the specified file for model. The file exis #### 45. What does the message "Found custom layer. Model Optimizer does not support this layer. Please, register it in CustomLayersMapping.xml or implement extension" mean? -This means that the layer `{layer_name}` is not supported in the Model Optimizer. You can find a list of all unsupported layers in the corresponding section. You should implement the extensions for this layer ([Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md)). +This means that the layer `{layer_name}` is not supported in the Model Optimizer. You can find a list of all unsupported layers in the corresponding section. You should implement the extensions for this layer ([OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md)). #### 46. What does the message "Custom replacement configuration file does not exist" mean? @@ -308,7 +308,7 @@ Path to the custom replacement configuration file was provided with the `--trans #### 47. What does the message "Extractors collection have case insensitive duplicates" mean? -When extending Model Optimizer with new primitives keep in mind that their names are case insensitive. Most likely, another operation with the same name is already defined. For more information, see [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md). +When extending Model Optimizer with new primitives keep in mind that their names are case insensitive. Most likely, another operation with the same name is already defined. For more information, see [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 48. What does the message "Input model name is not in an expected format, cannot extract iteration number" mean? @@ -340,7 +340,7 @@ Please, make sure that inputs are defined and have correct shapes. You can use ` #### 55. What does the message "Attempt to register of custom name for the second time as class. Note that custom names are case-insensitive" mean? -When extending Model Optimizer with new primitives keep in mind that their names are case insensitive. Most likely, another operation with the same name is already defined. For more information, see [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md). +When extending Model Optimizer with new primitives keep in mind that their names are case insensitive. Most likely, another operation with the same name is already defined. For more information, see [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 56. What does the message "Both --input_shape and --batch were provided. Please, provide only one of them" mean? @@ -492,7 +492,7 @@ For more information, refer to [Converting a MXNet* Model](convert_model/Convert Model Optimizer tried to load the model that contains some unsupported operations. If you want to convert model that contains unsupported operations you need to prepare extension for all such operations. -For more information, refer to [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md). +For more information, refer to [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 87. What does the message "Can not register Op ... Please, call function 'register_caffe_python_extractor' with parameter 'name'" mean? @@ -538,7 +538,7 @@ Note that the first call register_caffe_python_extractor(ProposalPythonExa The second call prevents Model Optimizer from using this extension as if it is an extension for a layer with type `Proposal`. Otherwise, this layer can be chosen as an implementation of extension that can lead to potential issues. -For more information, refer to the [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md). +For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 88. What does the message "Model Optimizer is unable to calculate output shape of Memory node .." mean? @@ -573,7 +573,7 @@ This message means that if you have model with custom layers and its json file h lower than 1.0.0, Model Optimizer does not support such topologies. If you want to convert it you have to rebuild MXNet with unsupported layers or generate new json with MXNet version 1.0.0 and higher. Also you need to implement Inference Engine extension for used custom layers. -For more information, refer to the [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md). +For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 97. What does the message "Graph contains a cycle. Can not proceed .." mean? @@ -586,7 +586,7 @@ For Tensorflow: For all frameworks: 1. [Replace cycle containing Sub-graph in Model Optimizer](customize_model_optimizer/Subgraph_Replacement_Model_Optimizer.md) -2. [Custom Layers Guide](../../HOWTO/Custom_Layers_Guide.md) +2. [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md) or * Edit network in original framework to exclude cycle. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md index 80fd4faa207..2628131a5e4 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -354,7 +354,7 @@ TensorFlow*-specific parameters: mo --input_model inception_v1.pb -b 1 --tensorboard_logdir /tmp/log_dir --output_dir ``` -* Launching the Model Optimizer for a model with custom TensorFlow operations (refer to the [TensorFlow* documentation](https://www.tensorflow.org/extend/adding_an_op)) implemented in C++ and compiled into the shared library `my_custom_op.so`. Model Optimizer falls back to TensorFlow to infer output shape of operations implemented in the library if a custom TensorFlow operation library is provided. If it is not provided, a custom operation with an inference function is needed. For more information about custom operations, refer to the [Custom Layers Guide](../../../HOWTO/Custom_Layers_Guide.md). +* Launching the Model Optimizer for a model with custom TensorFlow operations (refer to the [TensorFlow* documentation](https://www.tensorflow.org/extend/adding_an_op)) implemented in C++ and compiled into the shared library `my_custom_op.so`. Model Optimizer falls back to TensorFlow to infer output shape of operations implemented in the library if a custom TensorFlow operation library is provided. If it is not provided, a custom operation with an inference function is needed. For more information about custom operations, refer to the [OpenVINO™ Extensibility Mechanism](../../../Extensibility_UG/Intro.md). ```sh mo --input_model custom_model.pb --tensorflow_custom_layer_libraries ./my_custom_op.so --output_dir ``` diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md index 468688f3f4e..c5c3bb1695a 100644 --- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md @@ -269,7 +269,7 @@ mo --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto - file. For more information about extensions, please refer to the [Custom Layers Guide](../../../HOWTO/Custom_Layers_Guide.md). + file. For more information about extensions, please refer to the [OpenVINO™ Extensibility Mechanism](../../../Extensibility_UG/Intro.md). ```sh mo --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto --output_dir ``` diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md index dd98faefda7..77ed9d02cb7 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md @@ -1261,5 +1261,5 @@ Refer to the `extensions/back/GatherNormalizer.py` for the example of a such typ * [Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™](../../IR_and_opsets.md) * [Converting a Model to Intermediate Representation (IR)](../convert_model/Converting_Model.md) * [OpenVINO Model Representation](../../../OV_Runtime_UG/model_representation.md) -* [Inference Engine Extensibility Mechanism](../../../OV_Runtime_UG/Extensibility_DG/Intro.md) +* [OpenVINO™ Extensibility Mechanism](../../../Extensibility_UG/Intro.md) * [Extending the Model Optimizer with Caffe* Python Layers](Extending_Model_Optimizer_with_Caffe_Python_Layers.md) diff --git a/docs/OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md b/docs/OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md deleted file mode 100644 index 79ab802a5a1..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md +++ /dev/null @@ -1,82 +0,0 @@ -# Custom nGraph Operations {#openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps} - -Inference Engine Extension API allows you to register operation sets (opsets) with custom nGraph operations to support models with operations which OpenVINO™ does not support out-of-the-box. - -Besides creating custom nGraph operations, to [support custom operations](../../HOWTO/Custom_Layers_Guide.md) in your model you must also create a Model Optimizer extension for the custom operations and an Inference Engine device plugin extension for the device you will use for inference. - -## Operation Class - -To add your custom nGraph operation, create a new class that extends `ngraph::Op`, which is in turn derived from `ngraph::Node`, the base class for all graph operations in nGraph. Follow the steps below to add a custom nGraph operation: - -1. Add the `NGRAPH_RTTI_DECLARATION` and `NGRAPH_RTTI_DEFINITION` macros which define a `NodeTypeInfo` object that identifies the type of the operation to the graph users and helps with dynamic type resolution. The type info of an nGraph operation currently consists of a string identifier and a version number, but this may change in the future. - -2. Implement constructors that optionally take the operation inputs and attributes as parameters. - -3. Override the shape inference method `validate_and_infer_types`. This method is called multiple times during graph manipulations to determine the shapes and element types of the operations outputs. To access the input shapes and input element types, use the `get_input_partial_shape()` and `get_input_element_type()` methods of `ngraph::Node`. Set the inferred shape and element type of the output using `set_output_type`. - -4. Override the `clone_with_new_inputs` method, which enables graph manipulation routines to create copies of this operation and connect it to different nodes during optimization. - -5. Override the `visit_attributes` method, which enables serialization and deserialization of operation attributes. An `AttributeVisitor` is passed to the method, and the implementation is expected to walk over all the attributes in the op using the type-aware `on_attribute` helper. Helpers are already implemented for standard C++ types like `int64_t`, `float`, `bool`, `vector`, and for existing nGraph defined types. - -6. Override `evaluate`, which is an optional method that enables the application of constant folding if there is a custom operation on the constant branch. If your operation contains `evaluate` method you also need to override the `has_evaluate` method, this method allow to get information about availability of `evaluate` method for the operation. - -Based on that, declaration of an operation class can look as follows: - -@snippet template_extension/old/op.hpp op:header - -### Class Fields - -The provided implementation has several fields: - - * `add` of type `int64_t` is an attribute of a custom operation - * `type_info` of type `ngraph::NodeTypeInfo` defines type and version of an operation - -### Operation Constructors - -nGraph operation contains two constructors: -* Default constructor, which enables you to create an operation without attributes -* Constructor that creates and validates an operation with specified inputs and attributes - -@snippet template_extension/old/op.cpp op:ctor - -### `validate_and_infer_types()` - -`ngraph::Node::validate_and_infer_types` method validates operation attributes and calculates output shapes using attributes of the operation. - -@snippet template_extension/old/op.cpp op:validate - -### `clone_with_new_inputs()` - -`ngraph::Node::clone_with_new_inputs` method creates a copy of the nGraph operation with new inputs. - -@snippet template_extension/old/op.cpp op:copy - -### `visit_attributes()` - -`ngraph::Node::visit_attributes` method enables you to visit all operation attributes. - -@snippet template_extension/old/op.cpp op:visit_attributes - -### `evaluate()` and `has_evaluate()` - -`ngraph::Node::evaluate` method enables you to apply constant folding to an operation. - -@snippet template_extension/old/op.cpp op:evaluate - -## Register Custom Operations in Extension Class - -To add custom operations to the [Extension](Extension.md) class, create an operation set with custom operations and implement the `InferenceEngine::IExtension::getOpSets` method: - -@snippet template_extension/old/extension.cpp extension:getOpSets - -This method returns a map of opsets that exist in the [extension library](Extension.md). -nGraph provides an opset mechanism to group operations into clusters. Different opsets distinguish between different versions of one operation. - -When specifying opset names, follow the rules below: -* Use unique opset names. -* Do not use the following built-in opset names: `extension`, `experimental`, `opset1`, `opset2`, `opset3`, ... , `opsetN`. -* [Make sure that the Model Optimizer](../../HOWTO/Custom_Layers_Guide.md) and your extension use the same opset names. -* IR v10 operations have the mandatory `version` attribute specifying the opset. -Operations from the default opset cannot be redefined. - -Use a custom opset to create a new operation or extend functionality of an existing operation from another opset. diff --git a/docs/OV_Runtime_UG/Extensibility_DG/Building.md b/docs/OV_Runtime_UG/Extensibility_DG/Building.md deleted file mode 100644 index b4cf5a1f84d..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/Building.md +++ /dev/null @@ -1,19 +0,0 @@ -# Build Extension Library Using CMake* {#openvino_docs_IE_DG_Extensibility_DG_Building} - -Inference Engine build infrastructure provides the Inference Engine Package for application development. - -To configure the build of your extension library, use the following CMake script: - -@snippet template_extension/old/CMakeLists.txt cmake:extension - -This CMake script finds the Inference Engine and nGraph using the `find_package` CMake command. - -To build the extension library, run the commands below: - -```sh -$ cd template_extension/old -$ mkdir build -$ cd build -$ cmake -DOpenVINO_DIR=[OpenVINO_DIR] ../ -$ cmake --build . -``` diff --git a/docs/OV_Runtime_UG/Extensibility_DG/CPU_Kernel.md b/docs/OV_Runtime_UG/Extensibility_DG/CPU_Kernel.md deleted file mode 100644 index 223a1401600..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/CPU_Kernel.md +++ /dev/null @@ -1,71 +0,0 @@ -# CPU Kernel Custom Operations {#openvino_docs_IE_DG_Extensibility_DG_CPU_Kernel} - -To enable operations not supported by OpenVINO™ out of the box, you need a custom extension for Model Optimizer, a custom nGraph operation set, and a custom kernel for the device you will target. This page describes custom kernel support for the CPU device. - -The primary means of the performance of the CPU codepath in the Inference Engine is the Intel® Math Kernel Library for Deep Neural Networks (Intel® MKL-DNN), and new CPU kernels extend the Inference Engine plugin for the Intel MKL-DNN. Implementing the InferenceEngine::ILayerExecImpl API call defines a general CPU-side extension. There are no Intel MKL-DNN specifics in the way you need to implement a kernel. - -## Implementation Class - -All custom kernels for the CPU plugin should be inherited from the InferenceEngine::ILayerExecImpl interface. -Based on that, declaration of a kernel implementation class can look as follows: - -@snippet template_extension/old/cpu_kernel.hpp cpu_implementation:header - -### Class Fields - -The provided implementation has several fields: - - * `add` of the type `int64_t` is an attribute of a custom operation. - * `inShape` of the type `ngraph::Shape` is an input shape. - * `outShape` of the type `ngraph::Shape` is an output shape. - * `error` of the type `std::string` is a field to handle errors from a constructor. - -### Constructor of Implementation - -An implementation constructor checks parameters of an nGraph operation, stores required attributes, and stores an error message in case of an error. - -@snippet template_extension/old/cpu_kernel.cpp cpu_implementation:ctor - -### `getSupportedConfigurations` - -The InferenceEngine::ILayerExecImpl::getSupportedConfigurations method returns all supported configuration formats (input/output tensor layouts) for your implementation. To specify formats of data, use InferenceEngine::TensorDesc. - -@snippet template_extension/old/cpu_kernel.cpp cpu_implementation:getSupportedConfigurations - -### `init` - -The InferenceEngine::ILayerExecImpl::init method gets a runtime-selected configuration from a vector that is populated from the `getSupportedConfigurations` method and checks the parameters: - -@snippet template_extension/old/cpu_kernel.cpp cpu_implementation:init - -### `execute` - -The InferenceEngine::ILayerExecImpl::execute method accepts and processes the actual tensors as input/output blobs: - -@snippet template_extension/old/cpu_kernel.cpp cpu_implementation:execute - -## Register Implementation in `Extension` Class - -To register custom kernel implementation in the [Extension](Extension.md) class, implement the following methods: - -* getImplTypes -* getImplementation - -### getImplTypes - -InferenceEngine::IExtension::getImplTypes returns a vector of implementation types for an operation. - -@snippet template_extension/old/extension.cpp extension:getImplTypes - -### getImplementation - -InferenceEngine::IExtension::getImplementation returns the kernel implementation with a specified type for an operation. - -@snippet template_extension/old/extension.cpp extension:getImplementation - - -## Load Extension with Executable Kernels to Plugin - -Use the `AddExtension` method of the general plugin interface to load your primitives: - -@snippet snippets/CPU_Kernel.cpp part0 diff --git a/docs/OV_Runtime_UG/Extensibility_DG/Custom_ONNX_Ops.md b/docs/OV_Runtime_UG/Extensibility_DG/Custom_ONNX_Ops.md deleted file mode 100644 index 772bfb9da90..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/Custom_ONNX_Ops.md +++ /dev/null @@ -1,78 +0,0 @@ -# Custom ONNX* Operators {#openvino_docs_IE_DG_Extensibility_DG_Custom_ONNX_Ops} - -The ONNX\* importer provides a mechanism to register custom ONNX operators based on predefined or custom nGraph operations. -The function responsible for registering a new operator is called `ngraph::onnx_import::register_operator` and defined in the `onnx_import/onnx_utils.hpp` file. - -## Register Custom ONNX Operator Based on Predefined nGraph Operations - -The steps below explain how to register a custom ONNX operator, for example, CustomRelu, in a domain called `com.example`. -CustomRelu is defined as follows: -``` -x >= 0 => f(x) = x * alpha -x < 0 => f(x) = x * beta -``` -where `alpha` and `beta` are float constants. - -1. Include headers: - -@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:headers - -2. Register the CustomRelu operator in the ONNX importer: - -@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:register_operator - -The `register_operator` function takes four arguments: op_type, opset version, domain, and a function object. -The function object is a user-defined function that takes `ngraph::onnx_import::Node` as an input and based on that, returns a graph with nGraph operations. -The `ngraph::onnx_import::Node` class represents a node in an ONNX model. It provides functions to fetch input node(s) using `get_ng_inputs`, attribute value using `get_attribute_value`, and many more. See the `onnx_import/core/node.hpp` file for the full class declaration. - -New operator registration must happen before an ONNX model is read. For example, if an model uses the `CustomRelu` operator, call `register_operator("CustomRelu", ...)` before InferenceEngine::Core::ReadNetwork. -Reregistering ONNX operators within the same process is supported. If you register an existing operator, you get a warning. - -The example below demonstrates an exemplary model that requires a previously created `CustomRelu` operator: -``` -@include onnx_custom_op/custom_relu_model.prototxt -``` - -This model is in text format, so before it can be passed to Inference Engine, it has to be converted to binary using: -```py -from google.protobuf import text_format -import onnx - -with open("custom_relu_model.prototxt") as in_file: - proto = onnx.ModelProto() - text_format.Parse(in_file.read(), proto, allow_field_number=True) - s = onnx._serialize(proto) - onnx._save_bytes(s, "custom_relu_model.onnx") -``` - - -To create a graph with nGraph operations, visit [Custom nGraph Operations](AddingNGraphOps.md). -For a complete list of predefined nGraph operators, visit [Available Operations Sets](../../ops/opset.md). - -If you do not need an operator anymore, unregister it by calling `unregister_operator`. The function takes three arguments: `op_type`, `version`, and `domain`. - -@snippet onnx_custom_op/onnx_custom_op.cpp onnx_custom_op:unregister_operator - -## Register Custom ONNX Operator Based on Custom nGraph Operations - -The same principles apply when registering a custom ONNX operator based on custom nGraph operations. -This example shows how to register a custom ONNX operator based on `Operation` presented in [this tutorial](AddingNGraphOps.md), which is used in [TemplateExtension](Extension.md): - -@snippet template_extension/old/extension.cpp extension:ctor - -Here, the `register_operator` function is called in the constructor of Extension. The constructor makes sure that the function is called before InferenceEngine::Core::ReadNetwork, because InferenceEngine::Core::AddExtension must be called before a model with a custom operator is read. - -The example below demonstrates how to unregister an operator from the destructor of Extension: -@snippet template_extension/old/extension.cpp extension:dtor - -> **REQUIRED**: It is mandatory to unregister a custom ONNX operator if it is defined in a dynamic shared library. - -## Requirements for Building with CMake - -A program that uses the `register_operator` functionality requires `openvino::core` and `openvino::frontend::onnx` libraries in addition to the OpenVINO Inference Runtime. -The `openvino::frontend::onnx` is a component of the `OpenVINO` package , so `find_package(OpenVINO REQUIRED COMPONENTS ONNX)` can find both. -Those libraries need to be passed to the `target_link_libraries` command in the CMakeLists.txt file. - -See CMakeLists.txt below for reference: - -@snippet onnx_custom_op/CMakeLists.txt cmake:onnx_custom_op diff --git a/docs/OV_Runtime_UG/Extensibility_DG/Extension.md b/docs/OV_Runtime_UG/Extensibility_DG/Extension.md deleted file mode 100644 index 1925ce9e25e..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/Extension.md +++ /dev/null @@ -1,29 +0,0 @@ -# Extension Library {#openvino_docs_IE_DG_Extensibility_DG_Extension} - -Inference Engine provides an InferenceEngine::IExtension interface, which defines the interface for Inference Engine Extension libraries. -Inherit all extension libraries from this interface. The example below contains an implementation of two operations: `Template` -used as an example in this document and `FFT` used as a more complex example from the [Custom Operations Guide](../../HOWTO/Custom_Layers_Guide.md). - -> **NOTE**: `FFT` operation is implemented using the OpenCV library functions `cv::dft` and `cv::idft`. - -Based on that, the declaration of an extension class can look as follows: - -@snippet template_extension/old/extension.hpp extension:header - -The extension library should use `IE_DEFINE_EXTENSION_CREATE_FUNCTION` macro to export a function, which creates an `Extension` class: - -@snippet template_extension/old/extension.cpp extension:CreateExtension - -Also, an `Extension` object should implement the following methods: - -* InferenceEngine::IExtension::Release deletes an extension object. - -* InferenceEngine::IExtension::GetVersion returns information about the version of the library. - -@snippet template_extension/old/extension.cpp extension:GetVersion - -Implement the InferenceEngine::IExtension::getOpSets method if the extension contains custom layers. -Read [Custom nGraph Operation](AddingNGraphOps.md) for more information. - -To integrate execution kernels to the extension library, read [How to Implement Custom CPU Operations](CPU_Kernel.md). -To register a custom ONNX\* operator to the extension library, read [Custom ONNX Operators](Custom_ONNX_Ops.md). diff --git a/docs/OV_Runtime_UG/Extensibility_DG/GPU_Kernel.md b/docs/OV_Runtime_UG/Extensibility_DG/GPU_Kernel.md deleted file mode 100644 index 3305c00085b..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/GPU_Kernel.md +++ /dev/null @@ -1,233 +0,0 @@ -# How to Implement Custom GPU Operations {#openvino_docs_IE_DG_Extensibility_DG_GPU_Kernel} - -To enable operations not supported by OpenVINO™ out of the box, you need a custom extension for Model Optimizer, a custom nGraph operation set, and a custom kernel for the device you will target. This page describes custom kernel support for the GPU device. - -The GPU codepath abstracts many details about OpenCL\*. You need to provide the kernel code in OpenCL C and an XML configuration file that connects the kernel and its parameters to the parameters of the operation. - -There are two options for using the custom operation configuration file: - -* Include a section with your kernels into the global automatically-loaded `cldnn_global_custom_kernels/cldnn_global_custom_kernels.xml` file, which is hosted in the `/runtime/bin` folder -* Call the `InferenceEngine::Core::SetConfig()` method from your application with the `InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE` key and the configuration file name as a value before loading the network that uses custom operations to the plugin: - -@snippet snippets/GPU_Kernel.cpp part0 - -All Inference Engine samples, except the trivial `hello_classification`, and most Open Model Zoo demos -feature a dedicated command-line option `-c` to load custom kernels. For example, to load custom operations for the classification sample, run the command below: -```sh -$ ./classification_sample -m /bvlc_alexnet_fp16.xml -i ./validation_set/daily/227x227/apron.bmp -d GPU - -c /custom_layer_example.xml -``` - -## Configuration File Format - -The configuration file is expected to follow the `.xml` file structure -with a node of the type `CustomLayer` for every custom operation you provide. - -The definitions described in the sections below use the following notations: - -Notation | Description ----|--- -(0/1) | Can have zero or one instance of this node or attribute -(1) | Must have only one instance of this node or attribute -(0+) | Can have any number of instances of this node or attribute -(1+) | Can have one or more instances of this node or attribute - -### CustomLayer Node and Sub-Node Structure - -`CustomLayer` node contains the entire configuration for a single custom operation. - -| Attribute Name |\# | Description | -|-----|-----|-----| -| `name` | (1) | The name of the operation type to be used. This name should be identical to the type used in the IR.| -| `type` | (1) | Must be `SimpleGPU`. | -| `version` | (1) | Must be `1`. | - -**Sub-nodes**: `Kernel` (1), `Buffers` (1), `CompilerOptions` (0+), -`WorkSizes` (0/1) - -### Kernel Node and Sub-Node Structure - -`Kernel` node contains all kernel source code configuration. No kernel -node structure exists. - -**Sub-nodes**: `Source` (1+), `Define` (0+) - -### Source Node and Sub-Node Structure - -`Source` node points to a single OpenCL source file. - -| Attribute Name | \# |Description| -|-----|-----|-----| -| `filename` | (1) | Name of the file containing OpenCL source code. Note that the path is relative to your executable. Multiple source nodes will have their sources concatenated in order. | - -**Sub-nodes**: None - -### Define Node and Sub-Node Structure - -`Define` node configures a single `#‍define` instruction to be added to -the sources during compilation (JIT). - -| Attribute Name | \# | Description | -|------|-------|------| -| `name` | (1) | The name of the defined JIT. For static constants, this can include the value as well, which is taken as a string. | -| `param` | (0/1) | This parameter value is used as the value of this JIT definition. | -| `type` | (0/1) | The parameter type. Accepted values: `int`, `float`, and `int[]`, `float[]` for arrays. | -| `default` | (0/1) | The default value to be used if the specified parameters are missing from the operation in the IR. | - -**Sub-nodes:** None - -The resulting JIT has the following form: -`#‍define [name] [type] [value/default]`. - -### Buffers Node and Sub-Node Structure - -`Buffers` node configures all input/output buffers for the OpenCL entry -function. No buffers node structure exists. - -**Sub-nodes:** `Data` (0+), `Tensor` (1+) - -### Data Node and Sub-Node Structure - -`Data` node configures a single input with static data, for example, -weights or biases. - -| Attribute Name | \# | Description | -|----|-----|------| -| `name` | (1) | Name of a blob attached to an operation in the IR | -| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to | - -**Sub-nodes**: None - -### Tensor Node and Sub-Node Structure - -`Tensor` node configures a single input or output tensor. - -| Attribute Name | \# | Description | -|------|-------|-------| -| `arg-index` | (1) | 0-based index in the entry function arguments to be bound to. | -| `type` | (1) | `input` or `output` | -| `port-index` | (1) | 0-based index in the operation input/output ports in the IR | -| `format` | (0/1) | Data layout declaration for the tensor. Accepted values: `BFYX`, `BYXF`, `YXFB`, `FYXB`, and same values in all lowercase. Default value: `BFYX` | - -### CompilerOptions Node and Sub-Node Structure - -`CompilerOptions` node configures the compilation flags for the OpenCL -sources. - -| Attribute Name | \# | Description | -|--------|-----|------| -| `options` | (1) | Options string to be passed to the OpenCL compiler | - -**Sub-nodes**: None - -### WorkSizes Node and Sub-Node Structure - -`WorkSizes` node configures the global/local work sizes to be used when -queuing an OpenCL program for execution. - -| Attribute Name | \# | Description | -|-----|------|-----| -| `global`
`local` | (0/1)
(0/1) | An array of up to three integers or formulas for defining OpenCL work-sizes to be used during execution.
The formulas can use the values of the B,F,Y,X dimensions and contain the operators: +,-,/,\*,%. All operators are evaluated in integer arithmetic.
Default value: `global=”B*F*Y*X” local=””` | -| `dim` | (0/1) | A tensor to take the work-size from. Accepted values: `input N`, `output`, where `N` is an index of input tensor starting with 0. Default value: `output` | - -**Sub-nodes**: None - -## Example Configuration File - -The following code sample provides an example configuration file in XML -format. For information on the configuration file structure, see -[Configuration File Format](#config-file-format). -```xml - - - - - - - - - - - - -``` - -## Built-In Definitions for Custom Layers - -The following table includes definitions that are attached before -user sources, where `` is the actual input and output, for -example, `INPUT0` or `OUTPUT0`. - -For an example, see [Example Kernel](#example-kernel). - -| Name | Value | -|---|---| -| `NUM_INPUTS` | Number of the input tensors bound to this kernel | -| `GLOBAL_WORKSIZE` | An array of global work sizes used to execute this kernel | -| `GLOBAL_WORKSIZE_SIZE` | The size of the `GLOBAL_WORKSIZE` array | -| `LOCAL_WORKSIZE` | An array of local work sizes used to execute this kernel | -| `LOCAL_WORKSIZE_SIZE` | The size of the `LOCAL_WORKSIZE` array | -| `_DIMS`| An array of the tensor dimension sizes. Always ordered as `BFYX` | -| `_DIMS_SIZE`| The size of the `_DIMS` array.| -| `_TYPE`| The datatype of the tensor: `float`, `half`, or `char`| -| `_FORMAT_` | The format of the tensor, BFYX, BYXF, YXFB , FYXB, or ANY. The format is concatenated to the defined name. You can use the tensor format to define codepaths in your code with `#‍ifdef/#‍endif`. | -| `_LOWER_PADDING` | An array of padding elements used for the tensor dimensions before they start. Always ordered as BFYX.| -| `_ LOWER_PADDING_SIZE` | The size of the `_LOWER_PADDING` array | -| `_UPPER_PADDING` | An array of padding elements used for the tensor dimensions after they end. Always ordered as BFYX. | -| `_UPPER_PADDING_SIZE` | The size of the `_UPPER_PADDING` array | -| `_PITCHES` | The number of elements between adjacent elements in each dimension. Always ordered as BFYX.| -| `_PITCHES_SIZE`| The size of the `_PITCHES` array | -| `_OFFSET`| The number of elements from the start of the tensor to the first valid element, bypassing the lower padding. | -All `` values are automatically defined for every tensor -bound to this operation, such as `INPUT0`, `INPUT1`, and `OUTPUT0`, as shown -in the following example: - -```sh -#define INPUT0_DIMS_SIZE 4 -#define INPUT0_DIMS (int []){ 1,96,55,55, } -``` - -## Example Kernel - -```c -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -__kernel void example_relu_kernel( - const __global INPUT0_TYPE* input0, - __global OUTPUT0_TYPE* output) -{ - const uint idx = get_global_id(0); - const uint idy = get_global_id(1); - const uint idbf = get_global_id(2);//batches*features, as OpenCL supports 3D nd-ranges only - const uint feature = idbf%OUTPUT0_DIMS[1]; - const uint batch = idbf/OUTPUT0_DIMS[1]; - //notice that pitches are in elements, not in bytes! - const uint in_id = batch*INPUT0_PITCHES[0] + feature*INPUT0_PITCHES[1] + idy*INPUT0_PITCHES[2] + idx*INPUT0_PITCHES[3] + INPUT0_OFFSET; - const uint out_id = batch*OUTPUT0_PITCHES[0] + feature*OUTPUT0_PITCHES[1] + idy*OUTPUT0_PITCHES[2] + idx*OUTPUT0_PITCHES[3] + OUTPUT0_OFFSET; - - INPUT0_TYPE value = input0[in_id]; - //neg_slope (which is non-zero for leaky ReLU) is put automatically as #define, refer to the config xml - output[out_id] = value < 0 ? value * neg_slope : value; -} -``` - -> **NOTE**: As described in the previous section, all items like -> `INPUT0_TYPE` are actually defined as OpenCL (pre-)compiler inputs by -> the Inference Engine for efficiency reasons. See [Debugging -> Tips](#debugging-tips) for information on debugging the results. - -> **NOTE**: Several GPU-targeted kernels are also added to the binaries upon compilation of samples -> so that the sample application can easy load them. -> Refer to the `cldnn_global_custom_kernels` folder in the GPU plugin installation directory. - -## Debugging Tips - -* **Using `printf` in the OpenCL™ Kernels**. -To debug the specific values, you can use `printf` in your kernels. -However, be careful not to output excessively, which -could generate too much data. The `printf` output is typical, so -your output can be truncated to fit the buffer. Also, because of -buffering, you actually get an entire buffer of output when the -execution ends.
- -For more information, refer to the [printf -Function](https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/printfFunction.html). diff --git a/docs/OV_Runtime_UG/Extensibility_DG/Intro.md b/docs/OV_Runtime_UG/Extensibility_DG/Intro.md deleted file mode 100644 index ca3217a26ce..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/Intro.md +++ /dev/null @@ -1,60 +0,0 @@ -# Inference Engine Extensibility Mechanism {#openvino_docs_IE_DG_Extensibility_DG_Intro} - -@sphinxdirective - -.. toctree:: - :maxdepth: 1 - :hidden: - - openvino_docs_IE_DG_Extensibility_DG_AddingNGraphOps - openvino_docs_IE_DG_Extensibility_DG_Custom_ONNX_Ops - CPU Kernels Extensibility - GPU Kernels Extensibility - VPU Kernels Extensibility - openvino_docs_IE_DG_Extensibility_DG_Extension - openvino_docs_IE_DG_Extensibility_DG_Building - -@endsphinxdirective - -If your model contains operations not normally supported by OpenVINO, the Inference Engine Extensibility API lets you add support for those custom operations in a library containing custom nGraph operation sets, corresponding extensions to the Model Optimizer, and a device plugin extension. See the overview in the [Custom Operations Guide](../../HOWTO/Custom_Layers_Guide.md) to learn how these work together. - -To load the Extensibility library to the `InferenceEngine::Core` object, use the `InferenceEngine::Core::AddExtension` method. - -## Inference Engine Extension Library - -An Inference Engine Extension dynamic library contains the following components: - - * [Extension Library](Extension.md): - - Contains custom operation sets - - Provides CPU implementations for custom operations - * [Custom nGraph Operation](AddingNGraphOps.md): - - Enables the use of `InferenceEngine::Core::ReadNetwork` to read Intermediate Representation (IR) with unsupported - operations - - Enables the creation of `ngraph::Function` with unsupported operations - - Provides a shape inference mechanism for custom operations - -> **NOTE**: This documentation is written based on the [Template extension](https://github.com/openvinotoolkit/openvino/tree/master/docs/template_extension), which demonstrates extension development details. You can review the complete code, which is fully compilable and up-to-date, to see how it works. - -## Execution Kernels - -The Inference Engine workflow involves the creation of custom kernels and either custom or existing operations. - -An _operation_ is a network building block implemented in the training framework, for example, `Convolution` in Caffe*. -A _kernel_ is defined as the corresponding implementation in the Inference Engine. - -Refer to the [Model Optimizer Extensibility](../../MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md) -for details on how a mapping between framework operations and Inference Engine kernels is registered. - -In short, you can plug your own kernel implementations into the Inference Engine and map them to the operations in the original framework. - -The following pages describe how to integrate custom _kernels_ into the Inference Engine: - - * [Introduction to development of custom CPU kernels](CPU_Kernel.md) - * [Introduction to development of custom GPU kernels](GPU_Kernel.md) - * [Introduction to development of custom VPU kernels](VPU_Kernel.md) - -## See Also - -* [Build an extension library using CMake*](Building.md) -* [Using Inference Engine Samples](../Samples_Overview.md) -* [Hello Shape Infer SSD sample](../../../samples/cpp/hello_reshape_ssd/README.md) diff --git a/docs/OV_Runtime_UG/Extensibility_DG/VPU_Kernel.md b/docs/OV_Runtime_UG/Extensibility_DG/VPU_Kernel.md deleted file mode 100644 index 86b2ad092bc..00000000000 --- a/docs/OV_Runtime_UG/Extensibility_DG/VPU_Kernel.md +++ /dev/null @@ -1,682 +0,0 @@ -# How to Implement Custom Layers for VPU (Intel® Neural Compute Stick 2) {#openvino_docs_IE_DG_Extensibility_DG_VPU_Kernel} - -To enable operations not supported by OpenVINO™ out of the box, you need a custom extension for Model Optimizer, a custom nGraph operation set, and a custom kernel for the device you will target. This page describes custom kernel support for one the VPU, the Intel® Neural Compute Stick 2 device, which uses the MYRIAD device plugin. - -> **NOTES:** -> * OpenCL\* custom layer support is available in the preview mode. -> * This section assumes you are familiar with developing kernels using OpenCL. - -To customize your topology with an OpenCL layer, carry out the tasks described on this page: - -1. Write and compile your OpenCL code with the standalone offline OpenCL compiler (`clc`). -2. Write a configuration file to bind the OpenCL kernel to the topology file (`.xml`) of the model IR. -3. Pass the configuration file to the Inference Engine with the model IR. - -## Compile OpenCL code for VPU (Intel® Neural Compute Stick 2) - -> **NOTE**: OpenCL compiler, targeting Intel® Neural Compute Stick 2 for the SHAVE* processor only, is redistributed with OpenVINO. -OpenCL support is provided by ComputeAorta* and is distributed under a license agreement between Intel® and Codeplay* Software Ltd. - -The OpenCL toolchain for the Intel® Neural Compute Stick 2 supports offline compilation only, so first compile OpenCL C code using the standalone `clc` compiler. You can find the compiler binary at `/tools/cl_compiler`. - -> **NOTE**: By design, custom OpenCL layers support any OpenCL kernels written assuming OpenCL version 1.2. It also supports half float extension and is optimized for this type, because it is a native type for Intel® Movidius™ VPUs. - -1. Prior to running a compilation, make sure that the following variables are set: - * `SHAVE_MA2X8XLIBS_DIR=/tools/cl_compiler/lib/` - * `SHAVE_LDSCRIPT_DIR=/tools/cl_compiler/ldscripts/` - * `SHAVE_MYRIAD_LD_DIR=/tools/cl_compiler/bin/` - * `SHAVE_MOVIASM_DIR=/tools/cl_compiler/bin/` -2. Run the compilation with the command below. You should use `--strip-binary-header` to make an OpenCL runtime-agnostic binary runnable with the Inference Engine. - ```bash - cd /tools/cl_compiler/bin - ./clc --strip-binary-header custom_layer.cl -o custom_layer.bin - ``` - -## Write a Configuration File - -To tie the topology IR for a layer you customize, prepare a configuration file, so that the Inference Engine can find parameters for your kernel and the execution work grid is described. -For example, consider the following OpenCL kernel signature: -```cpp -__kernel void reorg_nhwc(__global const half *src, __global half *out, int w, int h, int c, int stride); -``` -A configuration file for this kernel might be the following: -```xml - - - - - - - - - - - - - - -``` -Each custom layer is described with the `CustomLayer` node. It has the following nodes and attributes: - - Root node `CustomLayer` contains the following attributes: - - `name` – (Required) The name of the Inference Engine layer to bind the kernel with. - - `type` and `version` – (Required) Reserved for future use. Set them to `MVCL` and `1` respectively. - - `max-shaves` – (Optional) The maximum number of SHAVE cores that should be dedicated for the layer. It is useful for debugging concurrency issues or for resource saving that memory bound kernel does not scale well with the number of cores, so more resources can be left for the rest of a topology. - - Sub-node `Kernel` must contain the following attributes: - - `entry` – The name of your kernel function as you defined it in a source file. In the example above, it is `reorg_nhwc`. - - Node `Source` must contain the following attributes: - - `filename` – The path to a compiled binary relative to the XML configuration file. - - Sub-node `Parameters` – Describes parameters bindings. For more information, see the description below. - - Sub-node `WorkSizes` – Describes local and global work group sizes and the source for dimension deduction as a pair `direction,port`. In the example above, the work group is described relatively to the dimension of the input tensor that comes through port 0 in the IR. `global` and `local` work group configurations support any simple math expressions with +,-,\*,/, and () from `B`(batch), `Y`(height), `X`(width) and `F`(channels). - - Sub-node `Where` – Allows to customize bindings with the `key="value"` attribute. For example, to substitute only 3x3 convolutions, write `` in the binding xml. - - Parameter description supports `Tensor` of one of tensor types such as `input`, `output`, `input_buffer`, `output_buffer` or `data`, `Scalar`, or `Data` nodes and has the following format: - - Each `Tensor` node of `input` or `output` type must contain the following attributes: - - `arg-name` – The name of a kernel parameter in the kernel signature. - - `type` – Node type: `input` or `output` as specified in the IR. - - `port-index` – A number of input/output ports as specified in the IR. - - `format` – The channel order in the tensor. Optional conversion layers are generated if the custom layer format is not compatible with formats of neighboring layers. `BFXY`, `BYXF`, and `ANY` formats are supported currently. - - Each `Tensor` node of `input_buffer` or `output_buffer` type must contain the following attributes: - - `arg-name` – The name of a kernel parameter in the kernel signature. - - `type` – Node type: `input_buffer` or `output_buffer`. Use the appropriate type to bind multiple kernels that correspond to different stages of the same layer. - - `port-index` – The unique identifier to bind by. - - `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings. - - `size` – Amount of bytes needed. Current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and might be expended in the future. - - Here is an example of multi-stage MVN layer binding: - ```xml - - - - - - - - - - - - - - - - - - - - - - - - - - ``` - - Each `Tensor` node that has the type `data` must contain the following attributes: - - `source` – A name of the blob as it is in the IR. Typical example is `weights` for convolution. - - `format` – Specifies the channel order in the tensor. Optional conversion layers are generated if the custom layer format is not. - ```xml - - - - - - - - - - - - - ``` - - Each `Scalar` node must contain the following attributes: - - `arg-name` – The name of a kernel parameter in the kernel signature. - - `type` – `int` or `float` value. It is used for correct argument extraction from IR parameters. - - `source` – Contains the name of the parameter in the IR file or input/output (`I`/`O`, `In`/`On`, where `n` is a port number) - followed by dimension `B`(batch), `Y`(height), `X`(width), or `F`(channels). - - - Each `Data` node must contain the following attributes: - - `arg-name` – The name of a kernel parameter in the kernel signature. - - `type` – Node type. Currently, `local_data` is the only supported value, which defines buffer allocated in fast local on-chip memory. It is limited to 100KB for all `__local` and - `__private` arrays defined inside the kernel as well as all `__local` parameters passed to the kernel. Note that a manual-DMA extension requires double buffering. - If the custom layer is detected to run out of local memory, the inference fails. - - `dim` – The dim source with the same `direction,port` format used for `WorkSizes` bindings. - - `size` – Amount of bytes needed. The current expression syntax supports only expression over dimensions of over selected input/output tensor or constants and may be extended in the future. - The example binding below illustrates a kernel with two local buffers passed to the kernel. - ```xml - - - - - - - - - - - - - - -``` - -## Pass Configuration File to Inference Runtime - -> **NOTE**: If both native and custom layer implementations are present, the custom kernel has a priority over the native one. - -Before loading the network that features the custom layers, provide a separate configuration file and load it using the InferenceEngine::Core::SetConfig() method with the PluginConfigParams::KEY_CONFIG_FILE key and the configuration file name as a value: -```cpp -InferenceEngine::Core core; -// Load custom layers -core.SetConfig({ { InferenceEngine::PluginConfigParams::KEY_CONFIG_FILE, "" } }, "MYRIAD"); -``` -Optionally, set a path to a custom layers description with a pair of `VPU_CUSTOM_LAYERS` and `/path/to/your/customLayers.xml` -as a network configuration: -```cpp -InferenceEngine::Core core; -std::map networkConfig; -config["VPU_CUSTOM_LAYERS"] = "/path/to/your/customLayers.xml"; -// Load custom layers in network config -auto exeNetwork = core.LoadNetwork(cnnNetwork, "MYRIAD", networkConfig); -``` - -## Optimizing Kernels with OpenCL for VPU (Intel® Neural Compute Stick 2) - -This section provides optimization guidelines on writing custom layers with OpenCL for VPU devices. Knowledge about general OpenCL -programming model and OpenCL kernel language is assumed and not a subject of this section. The OpenCL model mapping to VPU is described in the table below. - -| OpenCL Model | VPU Mapping| -|-----|----| -| Device code | Executed on SHAVE cores | -| Private memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed | -| Local memory | Mapped to CMX internal memory, limited to 100KB per work group, valid only while the work group is executed | -| Global memory | Mapped to DDR, used to pass execution preserved parameters for inputs, outputs, and blobs | -| Work group | Executed on a single SHAVE core iterating over multiple work items | - -Note that by the OpenCL specification, the work group execution order is not specified. This means that it is your -responsibility to ensure that race conditions among work groups are not introduced. Custom layer runtime spits evenly -work grid among available compute resources and executes them in an arbitrary order. This static scheduling approach works best if the load is evenly spread out across work groups, which is a typical case for Deep Learning kernels. The following guidelines are recommended to use for work group partitioning: - -1. Split work evenly across work groups. -2. Adjust work group granularity to maintain equal workload for all compute codes. -3. Set the maximum number of cores using the `max-shaves` attribute for the `CustomLayer` node. This keeps more resources for the rest of topology. It is also useful if the kernel scalability reached its limits, which may happen while optimizing memory bound kernels or kernels with poor parallelization. -4. Try an alternate data layout (`BFXY`/`BYXF`) for the kernel if it improves work group partitioning or data access patterns. -Consider not just specific layer boost, but full topology performance because data conversion layers would be automatically inserted -as appropriate. - -Offline OpenCL compiler (`clc`) features automatic vectorization over `get_global_id(0)` usage, if uniform access is detected. -For example, the kernel below could be automatically vectorized: -```cpp -__kernel void cvtf32f16(__global float* restrict inImage, __global half* restrict outImage, - float scale, float bais) -{ - int idx = get_global_id(0) + get_global_id(1) * get_global_size(0) + get_global_id(2) * get_global_size(0) * get_global_size(1); - outImage[idx] = convert_half(inImage[idx]*scale+bais); -} -``` -However, this work-group based vectorizer (WGV) conflicts with the default LLVM vectorizer based on superword level parallelism -(SLP) for the current compiler version. Manual vectorization is recommended to provide the best performance for non-uniform code -patterns. WGV works if and only if vector types are not used in the code. - -Here is a short list of optimization tips: - -1. Help auto-vectorizer ensure non-aliasing pointers for kernel parameters by putting `restrict` where possible. - - This can give a performance boost, especially for kernels with unrolling, like `ocl_grn` from the example below. - - Place `restrict` markers for kernels with manually vectorized codes. In the `ocl_grn` kernel below, the unrolled version without `restrict` is up to 20% slower than the most optimal one, which combines unrolling and `restrict`. -2. Put `#‍pragma unroll N` to your loop header. The compiler does not trigger unrolling by default, so it is your responsibility to -annotate the code with pragmas as appropriate. The `ocl_grn` version with `#‍pragma unroll 4` is up to 50% faster, most of which comes from unrolling the first loop, because LLVM, in general, is better in scheduling 3-stage loops (load-compute-store), while the fist loop - `variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]);` is only 2-stage (load-compute). Pay -attention to unrolling such cases first. Unrolling factor is loop-dependent. Choose the smallest number that -still improves performance as an optimum between the kernel size and execution speed. For this specific kernel, changing the unroll factor from `4` to `6` results in the same performance, so unrolling factor equal to 4 is an optimum. For Intel® Neural Compute Stick 2, unrolling is conjugated with the automatic software pipelining for load, store, and compute stages: -```cpp -__kernel void ocl_grn(__global const half* restrict src_data, __global half* restrict dst_data, int C, float bias) -{ - int x = get_global_id(0); - int W = get_global_size(0); - int y = get_global_id(1); - int H = get_global_size(1); - - float variance = bias + 1e-9f; - - #pragma unroll 4 - for (int c = 0; c < C; c++) - variance += (float)(src_data[c*H*W + y*W + x] * src_data[c*H*W + y*W + x]); - - variance = 1.f / native_sqrt(variance); - - #pragma unroll 4 - for (int c = 0; c < C; c++) - dst_data[c*H*W + y*W + x] = (half)((float)src_data[c*H*W + y*W + x] * variance); -} -``` -To check the efficiency of WGV, you can compare performance of the kernel above with the kernel below, which is manually vectorized over width: -```cpp -__kernel void ocl_grn_line(__global const half* restrict src_data, __global half* restrict dst_data, int C, int W, float bias) -{ - int y = get_global_id(1); - int H = get_global_size(1); - - for (int x = 0; x < W/8; x++) - { - float8 variance = (float8)(bias+1e-9f); - - #pragma unroll 4 - for (int c = 0; c < C; c++) - { - __global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W)); - half8 sh = src_line[x]; - variance += convert_float8(sh*sh); - } - - variance = 1.f/native_sqrt(variance); - - #pragma unroll 4 - for (int c = 0; c < C; c++) - { - __global const half8* restrict src_line = ((__global const half8 * restrict)(src_data + c*H*W + y*W)); - __global half8* restrict dst_line = ((__global half8 * restrict)(dst_data + c*H*W + y*W)); - - dst_line[x] = convert_half8(convert_float8(src_line[x])*variance); - } - } - for (int x = W/8*8; x < W; x++) - { - float variance = bias+1e-9f; - #pragma unroll 4 - for (int c = 0; c < C; c++) - variance += (float)(src_data[c*H*W + y*W + x]*src_data[c*H*W + y*W + x]); - - variance = 1.f/native_sqrt(variance); - - #pragma unroll 4 - for (int c = 0; c < C; c++) - dst_data[c*H*W + y*W + x] = (float)src_data[c*H*W + y*W + x]*variance; - } -} -``` -Both versions perform the same, but the second one has more complex code. - -3. If it is easy to predict the work group size, you can also use the `reqd_work_group_size` kernel attribute to ask the compiler -to unroll the code up to the local size of the work group. Note that if the kernel is actually executed with the -different work group configuration, the result is undefined. - -4. Prefer to use the `half` compute if it keeps reasonable accuracy. 16-bit float is a native type for Intel® Neural Compute Stick 2, most of the functions `half_*` are mapped to a single hardware instruction. -Use the standard `native_*` function for the rest of types. - -5. Prefer to use the `convert_half` function over `vstore_half` if conversion to 32-bit float is required. `convert_half` is mapped to a single hardware instruction. For the `cvtf32f16` kernel above, the line `outImage[idx] = convert_half(inImage[idx]*scale+bais);` is eight times slower than the code with `vstore_half`. - -6. Mind early exits. Early exit can be extremely costly for the current version of the `clc` compiler due to conflicts with the -auto-vectorizer. The generic advice would be to setup local size by `x` dimension equal to inputs or/and outputs width. -If it is impossible to define the work grid that exactly matches inputs or/and outputs to eliminate checks, for example, -`if (get_global_id(0) >= width) return`, use line-wise kernel variant with manual vectorization. -The kernel example below demonstrates the impact of early exits on kernel performance. - ```cpp - // Initial version - __kernel void reorg(const __global half* restrict src, __global half* restrict out, int stride) - { - int w = get_global_id(0); - int W = get_global_size(0); - - int h = get_global_id(1); - int H = get_global_size(1); - - int c = get_global_id(2); - int C = get_global_size(2); - - int C2 = C/(stride*stride); - int offset = c / C2; - int c2 = c - C2 * offset; - - int H2 = H*stride; - int W2 = W*stride; - - int h2 = h*stride + offset / stride; - int w2 = w*stride + offset - stride * (offset / stride); - - out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2]; - } - ``` -This `reorg` kernel is auto-vectorizable, but an input for YOLO v2 topology is `NCHW=<1,64,26,26>` and it is not multiple of vector width, which is `8` for `half` data type. As a result, the Inference Engine does not select the auto-vectorized kernel. -To compare performance of auto-vectorized and scalar version of the kernel, change the input size to`NCHW=<1,64,26,32>`. This enables the auto-vectorized version to be selected by the Inference Engine and can give you about 30% uplift. -Since the auto-vectorized version is faster, it makes sense to enable it for the YOLO v2 topology input size by setting the local size multiple of vector, for example, 32, and adjust global sizes accordingly. As a result, the execution work grid exceeds actual input dimension, so out-of-bound checks should be inserted. See the updated kernel version below: - ```cpp - // Version with out-of-bound checks added - __kernel void reorg(const __global half* restrict src, __global half* restrict out, int W, int stride) - { - int w = get_global_id(0); - w = min(w, W-1); - - int h = get_global_id(1); - int H = get_global_size(1); - - int c = get_global_id(2); - int C = get_global_size(2); - - int C2 = C/(stride*stride); - int offset = c / C2; - int c2 = c - C2 * offset; - - int H2 = H*stride; - int W2 = W*stride; - - int h2 = h*stride + offset / stride; - int w2 = w*stride + offset - stride * (offset / stride); - - out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2]; - } - ``` -This code performs the same as the initial kernel above (scalar) due to branching overhead. If you replace min/max expression `w = min(w, W-1);` with `if (w >= W) return;`, runtime increases up to 2x against to code without branching (initial version).
-If branching is inevitable for your element-based kernel, it is recommended to change the scheme to line-based. See the kernel variant below: -```cpp -// Line-wise version -__kernel void reorg(const __global half* restrict src, __global half* restrict out, int H, int W, int stride) -{ - int h = min((int)get_global_id(0), H-1); - - int c = get_global_id(1); - int C = get_global_size(1); - int C2 = C/(stride*stride); - int offset = c / C2; - int c2 = c - C2 * offset; - - int H2 = H*stride; - int W2 = W*stride; - - for (int w = 0; w < W; ++w) - { - int h2 = h*stride + offset / stride; - int w2 = w*stride + offset - stride * (offset / stride); - - out[W*H*c + W*h + w] = src[W2*H2*c2 + W2*h2 + w2]; - } -} -``` -This decreases the execution time up to 40% against the best performing vectorized kernel without early exits (initial version). -7. Reuse computations among work items by using line-based kernels or sharing values though `__local` memory. -8. Improve data access locality. Most of custom kernels are memory bound while convolution and fully connected layers are hardware-implemented. The code below demonstrates a further optimized version of the `reorg` kernel unrolled by `stride`: - ```cpp - // Unrolled line-wise version - __kernel void reorg_unrolled_by_stride(const __global half* restrict src, __global half* restrict dst, - int H, int W, int stride) - { - int h = min((int)get_global_id(0), H-1); - - int c2 = get_global_id(1); - int C2 = get_global_size(1); - int C = C2*stride*stride; - - int H2 = H*stride; - int W2 = W*stride; - - for (int stride_y = 0; stride_y < stride; stride_y++) - for (int stride_x = 0; stride_x < stride; stride_x++) - for (int w2 = 0, w = 0; w < W; w2 += stride, w++) - dst[W*H*C2*(stride_y*stride+stride_x) + W*H*c2 + W*h + w] = src[W2*H2*c2 + W2*h*stride + W2*stride_y + w2 + stride_x]; - } - ``` -`scr` data in this case loaded only once. As the result, the cycle count drops up to 45% against the line-wise version. - -9. Copy data from `__dlobal` to `__local` or `__private` memory if the data is accessed more than once. Access to -`__dlobal` memory is orders of magnitude slower than access to `__local`/`__private` due to statically scheduled pipeline, which -stalls completely on memory access without any prefetch. The same recommendation is applicable for scalar load/store -from/to a `__blobal` pointer since work-group copying could be done in a vector fashion. - -10. Use a manual DMA extension. Local (on-chip) memory throughput is up to 24x higher than DDR throughput. Starting from OpenVINO™ 2020.1, VPU OpenCL features manual-DMA kernel extension to copy sub-tensor used by work group into local memory and performing compute without DDR evolved. Here is the simple GRN kernel implementation that runs over DDR. Local size is in the form (width of the input tensor, 1, 1) to define a large enough work group to get code automatically vectorized and unrolled, while global size is (width of the input tensor, height of the input tensor, 1): - ```cpp - __kernel void grn_NCHW( - __global const half* restrict src_data, - __global half* restrict dst_data, - int C, - float bias) - { - float variance = bias + 1e-9f; - - #pragma unroll 4 - for (int c = 0; c < C; c++) - { - float val = (float) src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)]; - variance += val*val; - } - - half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f); - - #pragma unroll 4 - for (int c = 0; c < C; c++) - { - dst_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] - = src_data[c*get_global_size(1)*get_global_size(0) + get_global_id(1)*get_global_size(0) + get_global_id(0)] * hvariance; - } - } - ``` - -This kernel can be rewritten to introduce special data binding `__dma_preload` and `__dma_postwrite intrinsics`. This means that instead of one kernel, a group of three kernels should be implemented: `kernelName`, `__dma_preload_kernelName`, and `__dma_postwrite_kernelName`. `__dma_preload_kernelName` for a particular work group `n` is guaranteed to be executed before the `n`-th work group itself, while `__dma_postwrite_kernelName` is guaranteed to be executed after a corresponding work group. You can define one of those functions that are intended to be used to copy data from-to `__global` and `__local` memory. The syntactics requires exact functional signature match. The example below illustrates how to prepare your kernel for manual-DMA. - - ```cpp - __kernel void __dma_preload_grn_NCHW( - __global const half* restrict src, - __global half* restrict dst, - __local half* restrict local_src, - __local half* restrict local_dst, - int C, - float bias) - { - // ToDO: copy required piece of src tensor into local_src - } - - __kernel void __dma_postwrite_grn_NCHW( - __global const half* restrict src, - __global half* restrict dst, - __local const half* restrict local_src, - __local half* restrict local_dst, - int C, - float bias) - { - // ToDO: copy back computed piece of local_dst into dst - } - - __kernel void grn_NCHW( - __global const half* restrict src_data, - __global half* restrict dst_data, - __local half* restrict src, - __local half* restrict dst, - int C, - float bias) - { - // same as the example above - } - ``` -The GRN kernel operates on channel-major tensors to compute average over full channel range and then normalizes input elements to produce the output. -As a part of the manual DMA extension, a group of work group copy functions are introduced in addition to `async_work_group_copy`, which is also mapped to a DMA call. - -Here is the list of supported functions: -```cpp -// 2D sub-tensor copy -event_t WorkGroupDmaCreateStrideTransaction( - const local T *src, - global T *dst, - size_t src_width, // width of the line of source in bytes - size_t dst_width, // width of the line of destination in bytes - size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes - size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes - size_t size, // total number of bytes loaded for all lines from source to destination - event_t event) __OVERLOAD; - - -event_t WorkGroupDmaCreateStrideTransaction( - const global T *src, - local T *dst, - size_t src_width, // width of the line of source in bytes - size_t dst_width, // width of the line of destination in bytes - size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes - size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes - size_t size, // total number of bytes loaded for all lines from source to destination - event_t event) __OVERLOAD; - -// 3D sub-tensor copy -event_t WorkGroupDmaCreate3DTransaction( - const local T *src, - global T *dst, - size_t src_width, // width of the line of source in bytes - size_t dst_width, // width of the line of destination in bytes - size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes - size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes - size_t num_planes, // number of planes to be copied - size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes - size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes - size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case - event_t event) __OVERLOAD; - -event_t WorkGroupDmaCreate3DTransaction( - const global T *src, - local T *dst, - size_t src_width, // width of the line of source in bytes - size_t dst_width, // width of the line of destination in bytes - size_t src_stride, // stride between corresponding 2 consecutive lines of source in bytes - size_t dst_stride, // stride between corresponding 2 consecutive lines of destination in bytes - size_t num_planes, // number of planes to be copied - size_t src_plane_stride, // stride between corresponding 2 consecutive planes of source in bytes - size_t dst_plane_stride, // stride between corresponding 2 consecutive planes of destination in bytes - size_t size, // size of the loaded plane in bytes, analogues to the size in 2D case - event_t event) __OVERLOAD; -``` -where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`. - -Modified version of the GRN kernel could be the following: -```cpp -__kernel void __dma_preload_grn_NCHW( - __global const half* restrict src, - __global half* restrict dst, - __local half* restrict local_src, - __local half* restrict local_dst, - int C, - float bias) -{ - WorkGroupDmaCreate3DTransaction( - src + get_group_id(0)*get_local_size(0) - + get_group_id(1)*get_local_size(1)*get_global_size(0), // src - local_src, // dst - get_local_size(0) * sizeof(half), // src width - get_local_size(0) * sizeof(half), // dst width - get_global_size(0) * sizeof(half), // src stride - get_local_size(0) * sizeof(half), // dst stride - C, // num planes - get_global_size(0) * get_global_size(1) * sizeof(half), // src plane stride - get_local_size(0) * get_local_size(1) * sizeof(half), // dst plane stride - get_local_size(0) * get_local_size(1) * sizeof(half), // plane size - 0); -} - -__kernel void __dma_postwrite_grn_NCHW( - __global const half* restrict src, - __global half* restrict dst, - __local const half* restrict local_src, - __local half* restrict local_dst, - int C, - float bias) -{ - WorkGroupDmaCreate3DTransaction( - local_dst, // src - dst + get_group_id(0)*get_local_size(0) - + get_group_id(1)*get_local_size(1)*get_global_size(0), // dst - get_local_size(0) * sizeof(half), // src width - get_local_size(0) * sizeof(half), // dst width - get_local_size(0) * sizeof(half), // src stride - get_global_size(0) * sizeof(half), // dst stride - C, // num planes - get_local_size(0) * get_local_size(1) * sizeof(half), // src plane stride - get_global_size(0) * get_global_size(1) * sizeof(half), // dst plane stride - get_local_size(0) * get_local_size(1) * sizeof(half), // plane size - 0); -} - -__kernel void grn_NCHW( - __global const half* restrict src_data, - __global half* restrict dst_data, - __local half* restrict src, - __local half* restrict dst, - int C, - float bias) -{ - float variance = bias + 1e-9f; - - #pragma unroll 8 - for (int c = 0; c < C; c++) - { - float val = (float) src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)]; - variance += val*val; - } - - half hvariance = (half)(native_rsqrt((half)(variance/16.f))*0.25f); - - #pragma unroll 8 - for (int c = 0; c < C; c++) - { - dst[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] - = src[c*get_local_size(1)*get_local_size(0) + get_local_id(1)*get_local_size(0) + get_local_id(0)] * hvariance; - } -} -``` - -Note the `get_local_size` and `get_local_id` usage inside the kernel. 21x speedup is expected for a kernel on enet-curbs setup because it was completely limited by memory usage. - -An alternative method to using DMA is to use work item copy extension. Those functions are executed inside a kernel and requires work groups equal to single work item. - -Here is the list of supported work item functions: -```cpp -item_dma_event_t WorkItemDmaCreateTransaction( - const global T *src, - private T *dst, - size_t size, - item_dma_event_t event) __OVERLOAD; - -item_dma_event_t WorkItemDmaCreateTransaction( - const private T *src, - global T *dst, - size_t size, - item_dma_event_t event) __OVERLOAD; - -item_dma_event_t WorkItemDmaCreateStrideTransaction( - const global T *src, - private T *dst, - size_t src_width, - size_t dst_width, - size_t src_stride, - size_t dst_stride, - size_t size, - item_dma_event_t event) __OVERLOAD; - -item_dma_event_t WorkItemDmaCreateStrideTransaction( - const private T *src, - global T *dst, - size_t src_width, - size_t dst_width, - size_t src_stride, - size_t dst_stride, - size_t size, - item_dma_event_t event) __OVERLOAD; - -item_dma_event_t WorkItemDmaCreate3DTransaction( - const global T *src, - private T *dst, - size_t src_width, - size_t dst_width, - size_t src_stride, - size_t dst_stride, - size_t num_planes, - size_t src_plane_stride, - size_t dst_plane_stride, - size_t size, - item_dma_event_t event) __OVERLOAD; - -item_dma_event_t WorkItemDmaCreate3DTransaction( - const private T *src, - global T *dst, - size_t src_width, - size_t dst_width, - size_t src_stride, - size_t dst_stride, - size_t num_planes, - size_t src_plane_stride, - size_t dst_plane_stride, - size_t size, - item_dma_event_t event) __OVERLOAD; -``` -where `T` can be `uchar`, `char`, `short`, `ushort`, `int`, `uint`, `long`, `ulong`, `half` or `float`. diff --git a/docs/OV_Runtime_UG/ShapeInference.md b/docs/OV_Runtime_UG/ShapeInference.md index 1c50659b262..4cb274ad827 100644 --- a/docs/OV_Runtime_UG/ShapeInference.md +++ b/docs/OV_Runtime_UG/ShapeInference.md @@ -112,7 +112,7 @@ To keep the model valid after the reshape, choose a new input shape that satisfi For details, refer to the Tensorflow Object Detection API models resizing techniques. ### Extensibility -The Inference Engine provides a special mechanism that allows adding support of shape inference for custom operations. This mechanism is described in the [Extensibility documentation](Extensibility_DG/Intro.md) +The Inference Engine provides a special mechanism that allows adding support of shape inference for custom operations. This mechanism is described in the [Extensibility documentation](../Extensibility_UG/Intro.md) ## Introduction (Python) @@ -218,7 +218,7 @@ exec_net = ie.load_network(network=net, device_name="CPU") ``` ### Extensibility -The Inference Engine provides a special mechanism that allows adding support of shape inference for custom operations. This mechanism is described in the [Extensibility documentation](Extensibility_DG/Intro.md) +The Inference Engine provides a special mechanism that allows adding support of shape inference for custom operations. This mechanism is described in the [Extensibility documentation](../Extensibility_UG/Intro.md) ### See Also: diff --git a/docs/OV_Runtime_UG/model_representation.md b/docs/OV_Runtime_UG/model_representation.md index eedf4291104..9f24bc0d9c3 100644 --- a/docs/OV_Runtime_UG/model_representation.md +++ b/docs/OV_Runtime_UG/model_representation.md @@ -14,7 +14,7 @@ For details on how to build a model in OpenVINO™ Runtime, see the [Build a Mod ## Operations -The `ov::Op` class represents any abstract operation in the model representation. Use this class to create [custom operations](../OV_Runtime_UG/Extensibility_DG/AddingNGraphOps.md). +The `ov::Op` class represents any abstract operation in the model representation. Use this class to create [custom operations](../Extensibility_UG/add_openvino_ops). ## Operation Sets @@ -39,7 +39,7 @@ Operation set `opsetX` integrates a list of pre-compiled operations that work For a complete list of operation sets supported in OpenVINO™ toolkit, see [Available Operations Sets](../ops/opset.md). -To add suport of custom operations, see the [Add Custom OpenVINO Operations](../OV_Runtime_UG/Extensibility_DG/Intro.md) document. +To add support of custom operations, see the [Add Custom OpenVINO Operations](../Extensibility_UG/Intro.md) document. To build an `ov::Model` instance from `opset8` operations, include the following files: @@ -88,4 +88,4 @@ The following code creates a model with several outputs: ## See Also * [Available Operation Sets](../ops/opset.md) -* [OpenVINO™ Runtime Extensibility Developer Guide](../OV_Runtime_UG/Extensibility_DG/Intro.md) +* [OpenVINO™ Runtime Extensibility Developer Guide](../Extensibility_UG/Intro.md) diff --git a/docs/OV_Runtime_UG/openvino_temporary.md b/docs/OV_Runtime_UG/openvino_temporary.md index ed102170cac..073ca36706a 100644 --- a/docs/OV_Runtime_UG/openvino_temporary.md +++ b/docs/OV_Runtime_UG/openvino_temporary.md @@ -13,7 +13,6 @@ openvino_docs_IE_DG_Int8Inference openvino_docs_IE_DG_Bfloat16Inference openvino_docs_transformations - openvino_docs_IE_DG_Extensibility_DG_Intro @endsphinxdirective diff --git a/docs/OV_Runtime_UG/supported_plugins/CPU.md b/docs/OV_Runtime_UG/supported_plugins/CPU.md index 692e490afaf..950b46c2dec 100644 --- a/docs/OV_Runtime_UG/supported_plugins/CPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/CPU.md @@ -6,7 +6,7 @@ The CPU plugin was developed to achieve high performance of neural networks on C Currently, the CPU plugin uses Intel® Threading Building Blocks (Intel® TBB) in order to parallelize calculations. Please refer to the [Optimization Guide](../../optimization_guide/dldt_optimization_guide.md) for associated performance considerations. -The set of supported layers can be expanded with [the Extensibility mechanism](../Extensibility_DG/Intro.md). +The set of supported layers can be expanded with [the Extensibility mechanism](../../Extensibility_UG/Intro.md). ## Supported Platforms diff --git a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md index 51ed5358068..54aee5a84be 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md +++ b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md @@ -271,6 +271,6 @@ The following layers are supported by the plugins and by [Shape Inference featur \*- support is limited to the specific parameters. Refer to "Known Layers Limitation" section for the device [from the list of supported](Supported_Devices.md). -\*\*- support is implemented via [Extensibility mechanism](../Extensibility_DG/Intro.md). +\*\*- support is implemented via [Extensibility mechanism](../../Extensibility_UG/Intro.md). \*\*\*- supports NCDHW layout. diff --git a/docs/OV_Runtime_UG/supported_plugins/VPU.md b/docs/OV_Runtime_UG/supported_plugins/VPU.md index 11f3aaa9799..e7a0ce80c33 100644 --- a/docs/OV_Runtime_UG/supported_plugins/VPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/VPU.md @@ -138,7 +138,7 @@ In a perfect pipeline this time should be near zero, which means that the data w **Get the following message when running inference with the VPU plugin: "[VPU] Cannot convert layer due to unsupported layer type "** -This means that your topology has a layer that is unsupported by your target VPU plugin. To resolve this issue, you can implement the custom layer for the target device using the [Inference Engine Extensibility mechanism](../Extensibility_DG/Intro.md). Or, to quickly get a working prototype, you can use the heterogeneous scenario with the default fallback policy (see the [Heterogeneous execution](../hetero_execution.md) section). Use the HETERO mode with a fallback device that supports this layer, for example, CPU: `HETERO:MYRIAD,CPU`. +This means that your topology has a layer that is unsupported by your target VPU plugin. To resolve this issue, you can implement the custom layer for the target device using the [OpenVINO™ Extensibility mechanism](../../Extensibility_UG/Intro.md). Or, to quickly get a working prototype, you can use the heterogeneous scenario with the default fallback policy (see the [Heterogeneous execution](../hetero_execution.md) section). Use the HETERO mode with a fallback device that supports this layer, for example, CPU: `HETERO:MYRIAD,CPU`. For a list of VPU-supported layers, see the Supported Layers section of the [Supported Devices](Supported_Devices.md) page. ## Known Layers Limitations diff --git a/docs/documentation.md b/docs/documentation.md index ce888b46304..c42f93adaf4 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -8,7 +8,6 @@ :hidden: openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide - openvino_docs_HOWTO_Custom_Layers_Guide omz_tools_downloader @@ -68,9 +67,10 @@ .. toctree:: :maxdepth: 1 - :caption: Developing Inference Engine Plugins + :caption: OpenVINO Extensibility :hidden: + openvino_docs_Extensibility_UG_Intro Inference Engine Plugin Developer Guide groupie_dev_api Plugin Transformation Pipeline diff --git a/docs/how_tos/how-to-links.md b/docs/how_tos/how-to-links.md index 598c3e6e2c8..e808efa1ef9 100644 --- a/docs/how_tos/how-to-links.md +++ b/docs/how_tos/how-to-links.md @@ -13,7 +13,7 @@ * [Accelerate Vision-based AI with Intel® Distribution of OpenVINO™ Toolkit](https://www.intel.ai/accelerate-vision-based-ai-with-intel-distribution-of-openvino-toolkit/) ## Custom Operations Guide -To learn about what is *custom operation* and how to work with them in the Deep Learning Deployment Toolkit, see the [Custom Operations Guide](../HOWTO/Custom_Layers_Guide.md). +To learn about what is *custom operation* and how to work with them in the Deep Learning Deployment Toolkit, see the [Custom Operations Guide](../Extensibility_UG/Intro.md). ## Introducing OpenVINO™ and Computer Vision | IoT Developer Show Season 2 | Intel Software diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt index 2e4ef05c1a7..ce32a90200e 100644 --- a/docs/snippets/CMakeLists.txt +++ b/docs/snippets/CMakeLists.txt @@ -35,6 +35,7 @@ list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/dldt_optimization_guide2.c # create a static library add_library(${TARGET_NAME} STATIC ${SOURCES}) +target_include_directories(${TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../template_extension/new/") if(CLDNN__IOCL_ICD_INCDIRS) target_include_directories(${TARGET_NAME} SYSTEM PRIVATE ${CLDNN__IOCL_ICD_INCDIRS}) diff --git a/docs/snippets/ov_extensions.cpp b/docs/snippets/ov_extensions.cpp new file mode 100644 index 00000000000..0abab9d3bfa --- /dev/null +++ b/docs/snippets/ov_extensions.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include +#include + +int main() { +{ +//! [add_extension] +ov::Core core; +// Use operation type to add operation extension +core.add_extension(); +// or you can add operation extension to this method +core.add_extension(ov::OpExtension()); +//! [add_extension] +} +{ +//! [add_extension_lib] +ov::Core core; +// Load extensions library to ov::Core +core.add_extension("openvino_template_extension.so"); +//! [add_extension_lib] +} +return 0; +} diff --git a/docs/snippets/ov_extensions.py b/docs/snippets/ov_extensions.py new file mode 100644 index 00000000000..4f53700c746 --- /dev/null +++ b/docs/snippets/ov_extensions.py @@ -0,0 +1,15 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +import openvino.runtime as ov + +#! [add_extension] +# Not implemented +#! [add_extension] + +#! [add_extension_lib] +core = ov.Core() +# Load extensions library to ov::Core +core.add_extension("openvino_template_extension.so") +#! [add_extension_lib] diff --git a/docs/template_extension/new/CMakeLists.txt b/docs/template_extension/new/CMakeLists.txt index 32bdeda4ea7..20b81dc1adf 100644 --- a/docs/template_extension/new/CMakeLists.txt +++ b/docs/template_extension/new/CMakeLists.txt @@ -14,7 +14,13 @@ set(SRC identity.cpp ov_extension.cpp) add_library(${TARGET_NAME} MODULE ${SRC}) target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_OPENVINO_EXTENSION_API) -target_link_libraries(${TARGET_NAME} PRIVATE openvino::core) +target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime) + +# To map custom operation to framework +if(OpenVINO_Frontend_ONNX_FOUND) + target_link_libraries(${TARGET_NAME} PRIVATE openvino::frontend::onnx) + target_compile_definitions(${TARGET_NAME} PRIVATE OPENVINO_ONNX_FRONTEND_ENABLED) +endif() # [cmake:extension] # Enable code style check diff --git a/docs/template_extension/new/identity.hpp b/docs/template_extension/new/identity.hpp index f31ab239c34..b8c5160014d 100644 --- a/docs/template_extension/new/identity.hpp +++ b/docs/template_extension/new/identity.hpp @@ -4,7 +4,14 @@ #pragma once +//! [op:common_include] #include +//! [op:common_include] +//! [op:frontend_include] +#ifdef OPENVINO_ONNX_FRONTEND_ENABLED +# include +#endif +//! [op:frontend_include] //! [op:header] namespace TemplateExtension { @@ -13,6 +20,10 @@ class Identity : public ov::op::Op { public: OPENVINO_OP("Identity"); +#ifdef OPENVINO_ONNX_FRONTEND_ENABLED + OPENVINO_FRAMEWORK_MAP(onnx) +#endif + Identity() = default; Identity(const ov::Output& arg); void validate_and_infer_types() override; diff --git a/docs/template_extension/new/ov_extension.cpp b/docs/template_extension/new/ov_extension.cpp index 79d414a82e4..d2fa1e35361 100644 --- a/docs/template_extension/new/ov_extension.cpp +++ b/docs/template_extension/new/ov_extension.cpp @@ -7,5 +7,11 @@ #include "identity.hpp" +// clang-format off +//! [ov_extension:entry_point] OPENVINO_CREATE_EXTENSIONS( - std::vector({std::make_shared>()})); + std::vector({ + std::make_shared>() + })); +//! [ov_extension:entry_point] +// clang-format on diff --git a/src/frontends/common/include/openvino/frontend/extension/op.hpp b/src/frontends/common/include/openvino/frontend/extension/op.hpp index bfbb8938317..1c6f63eb77b 100644 --- a/src/frontends/common/include/openvino/frontend/extension/op.hpp +++ b/src/frontends/common/include/openvino/frontend/extension/op.hpp @@ -99,7 +99,7 @@ public: const std::string& target_name = p_name != m_attr_names_map.end() ? p_name->second : name; try { adapter.set_as_any(m_context.get_attribute_as_any(target_name)); - } catch (::ov::AssertFailure ex) { + } catch (::ov::AssertFailure& ex) { OPENVINO_ASSERT(false, ex.what(), "\nValue for attribute \"", @@ -259,4 +259,4 @@ using OpExtension = ov::frontend::OpExtensionBase Date: Thu, 24 Feb 2022 22:07:33 +0900 Subject: [PATCH 096/310] [GPU] Fix activation fusing issue(#10636) (#10636) --- .../src/graph/graph_optimizer/prepare_primitive_fusing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 8a0579af82b..e2b3b6044ec 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -1076,7 +1076,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) { (!(user->is_type() && user->get_primitive()->input.size() == 2 && (std::find(supported_modes.begin(), supported_modes.end(), (user->as()).get_primitive()->mode) != supported_modes.end())) && - !(user->is_type() && user->get_primitive()->input.size() == 1))); + !(user->is_type() && user->get_dependency(0).get_users().size() == 1))); }); if (invalid_user_iter != curr_users.end()) { From e906b3581f67dc62b87e334013b8f3e8ba797a2d Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Thu, 24 Feb 2022 16:41:43 +0300 Subject: [PATCH 097/310] [GPU] Replace handle_permute optimization pass with proper Reorder adding instead of Permute primitive (#10569) --- .../include/intel_gpu/graph/program.hpp | 1 - .../graph/graph_optimizer/handle_permute.cpp | 42 ----------------- .../src/graph/include/pass_manager.h | 8 ---- src/plugins/intel_gpu/src/graph/program.cpp | 2 - .../intel_gpu/src/plugin/ops/transpose.cpp | 25 ++++++++++ .../fusions/deconvolution_fusion_test.cpp | 3 +- .../test_cases/handle_permute_gpu_test.cpp | 46 ------------------- 7 files changed, 27 insertions(+), 100 deletions(-) delete mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/handle_permute.cpp delete mode 100644 src/plugins/intel_gpu/tests/test_cases/handle_permute_gpu_test.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index c1e1e2ae6db..7c5cba3a587 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -44,7 +44,6 @@ struct program { friend class prepare_conv_eltw_fusing; // to be removed when possible friend class reorder_inputs; // to be removed when possible friend class remove_redundant_reorders; // to be removed when possible - friend class handle_permute; // to be removed when possible friend class program_wrapper; // this class is intended to extend the interface of program for // the usage within tests_core_internal project only public: diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_permute.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_permute.cpp deleted file mode 100644 index 6359ccb2191..00000000000 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/handle_permute.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (C) 2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -#include "pass_manager.h" -#include "program_helpers.h" -#include "permute_inst.h" -#include "program_node.h" -#include "intel_gpu/graph/program.hpp" - -#include -#include -#include - -using namespace cldnn; - -void handle_permute::run(program& p) { - auto itr = p.get_processing_order().begin(); - while (itr != p.get_processing_order().end()) { - auto& node = (*itr++); - if (!node->is_type()) - continue; - - auto& perm_node = node->as(); - auto& prev_node = perm_node.get_dependencies().front(); - if (prev_node->get_output_layout().format == format::byxf && - perm_node.get_permute_order() == std::vector{ 0, 2, 3, 1 }) { - layout reorder_layout = perm_node.get_output_layout(); - reorder_layout.format = format::bfyx; - std::string reorder_name = perm_node.id() + "_converted_to_reorder"; - - auto new_reorder = std::make_shared(reorder_name, prev_node->id(), reorder_layout); - auto& new_reorder_node = p.get_or_create(new_reorder); - - p.replace(perm_node, new_reorder_node); - p.rename(new_reorder_node, reorder_name); - new_reorder_node.recalc_output_layout(); - } - } -} diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 59c89e79cef..7812dc481c0 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -132,14 +132,6 @@ private: void run(program& p) override; }; -class handle_permute : public base_pass { -public: - handle_permute() : base_pass("handle_permute") {} - -private: - void run(program& p) override; -}; - class mark_nodes : public base_pass { public: mark_nodes() : base_pass("analyzed_graph") {} diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 73fcf10162e..e9fdcca1890 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -483,8 +483,6 @@ void program::pre_optimize_graph(bool is_internal) { // handle symmetric and asymmetric padding for input apply_opt_pass(); - apply_opt_pass(); - processing_order.calculate_BFS_processing_order(); // this method makes sense only for OOOQ (out of order execution queue) apply_opt_pass(); diff --git a/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp b/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp index 85a70a93c52..8def02e4d3a 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/transpose.cpp @@ -9,6 +9,7 @@ #include "ngraph/op/constant.hpp" #include "intel_gpu/primitives/permute.hpp" +#include "intel_gpu/primitives/reorder.hpp" namespace ov { namespace runtime { @@ -28,6 +29,30 @@ static void CreateTransposeOp(Program& p, const std::shared_ptrcast_vector(); } + auto is_convert_color_type = [](const std::shared_ptr &node) { + return ngraph::is_type(node) || + ngraph::is_type(node) || + ngraph::is_type(node) || + ngraph::is_type(node); + }; + + // Handle Transpose operation related to ConvertColor operation: + // In case of ConvertColor operation we have NHWC (byxf) input format which should be converted to + // NCHW (bfyx) by this Permute, so we replace Permute with Reorder (to bfyx) primitve + auto input = op->input(0).get_source_output().get_node_shared_ptr(); + if (is_convert_color_type(input) && ie_order == std::vector{0, 3, 1, 2}) { + auto precision = input->get_element_type(); + p.AddPrimitive(cldnn::reorder(layerName, + inputPrimitives[0], + cldnn::format::bfyx, + DataTypeFromPrecision(precision), + std::vector(), + cldnn::reorder_mean_mode::none, + op->get_friendly_name())); + p.AddPrimitiveToProfiler(op); + return; + } + int rank = std::max(4, static_cast(op->get_input_shape(0).size())); if (ie_order.empty()) { // if order size is less than 4 - fill the rest with just copy diff --git a/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp index b7b66adfb90..5a2effbbf82 100644 --- a/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/deconvolution_fusion_test.cpp @@ -700,7 +700,8 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, deconv_scale_actv_quant_u8_eltw_scale_actv deconv_test_params{ CASE_DECONV_FP32_3D_1, 2, 9 }, // deconv_test_params{ CASE_DECONV_FP32_3D_2, 2, 9 }, - deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 9 }, + // Commented out due to sporadic CI failures + // deconv_test_params{ CASE_DECONV_FP32_3D_3, 2, 9 }, deconv_test_params{ CASE_DECONV_FP32_3D_4, 2, 9 }, deconv_test_params{ CASE_DECONV_FP32_3D_5, 2, 9 }, deconv_test_params{ CASE_DECONV_FP32_3D_6, 2, 9 }, diff --git a/src/plugins/intel_gpu/tests/test_cases/handle_permute_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/handle_permute_gpu_test.cpp deleted file mode 100644 index cee17b43627..00000000000 --- a/src/plugins/intel_gpu/tests/test_cases/handle_permute_gpu_test.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (C) 2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "test_utils.h" - -#include -#include -#include -#include - -using namespace cldnn; -using namespace ::tests; - -TEST(handle_permute, convert_permute_to_reorder) { - auto& engine = get_test_engine(); - - int32_t width = 224; - int32_t height = 448; - int32_t input_height = height + height / 2; - - auto input = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, input_height } }); - - std::vector input_data = generate_random_1d(width * input_height, 0, 255); - set_values(input, input_data); - - layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height }); - - topology topology; - topology.add(input_layout("input", input->get_layout())); - topology.add(convert_color("convert_color", { "input" }, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB, - cldnn::convert_color::memory_type::buffer, output_layout)); - topology.add(permute("permute", "convert_color", { 0, 2, 3, 1 })); - topology.add(resample("resample", "permute", { 1, 3, width, height })); - - network network(engine, topology); - network.set_input_data("input", input); - - auto outputs = network.execute(); - std::vector expected_shape = { 1, 3, width, height }; - std::vector output_shape = outputs.at("resample").get_memory()->get_layout().size.sizes(); - - for (size_t i = 0; i < expected_shape.size(); ++i) { - EXPECT_EQ(output_shape[i], expected_shape[i]); - } -} From f2bbd5bbb8278dc670146dcd684a1b7604f87d75 Mon Sep 17 00:00:00 2001 From: Anastasia Kazantaeva Date: Thu, 24 Feb 2022 19:13:21 +0300 Subject: [PATCH 098/310] Add original contribution guide to root (#10644) --- CONTRIBUTING.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..45a41c374ee --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,68 @@ +# How to contribute to the OpenVINO repository + +We suppose that you are an enthusiastic coder, want to contribute some code. For that purpose OpenVINO project now has a repository on the GitHub, to simplify everybody's life! All the bug fixes, new functionality, new tutorials etc. should be submitted via the GitHub's mechanism of pull requests. + +If you are not familiar with the mechanism - do not worry, it's very simple. Keep reading. + +## Before you start contributing you should + +- Make sure you agree to contribute your code under [OpenVINO (Apache 2.0)](https://github.com/openvinotoolkit/openvino/blob/master/LICENSE) license. +- If you are submitting a new module, you should go into [openvino_contrib](https://github.com/openvinotoolkit/openvino_contrib) repository by default. +- If you are going to fix a bug, check that it's still exists. This can be done by building the latest [releases/2020/3](https://github.com/openvinotoolkit/openvino/tree/releases/2020/3) branch (LTS release) or the latest master branch, and make sure that the error is still reproducible there. We do not fix bugs that only affect older non-LTS releases like 2020.2 for example (more details about [branching strategy](https://github.com/openvinotoolkit/openvino/wiki/Branches)) +- Make sure that nobody beat you into fixing or reporting the issue by doing a search on the [Github OpenVINO issues](https://github.com/openvinotoolkit/openvino/issues) page, and making sure that there isn't someone working on it. In the latter case you might provide support or suggestion in the issue or in the linked pull request. +- If you have a question about the software, then this is **NOT** the right place. You should open up a question at the [OpenVINO forum](https://community.intel.com/t5/Intel-Distribution-of-OpenVINO/bd-p/distribution-openvino-toolkit). In order to post a decent question from the start, feel free to read the official forum guidelines. + +Before you open up anything on the OpenVINO GitHub page, be sure that you are at the right place with your problem. + +## "Fork & Pull Request model" for code contribution + +### [](https://github.com/openvinotoolkit/openvino/wiki/Contribute#the-instruction-in-brief)The instruction in brief + +- Register at GitHub. Create your fork of OpenVINO repository [https://github.com/openvinotoolkit/openvino](https://github.com/openvinotoolkit/openvino) (see [https://help.github.com/articles/fork-a-repo](https://help.github.com/articles/fork-a-repo) for details). +- Install Git. + - Set your user name and email address in a Git configuration according to GitHub account (see [https://git-scm.com/book/en/v2/Getting-Started-First-Time-Git-Setup](https://git-scm.com/book/en/v2/Getting-Started-First-Time-Git-Setup) for details). +- Choose a task for yourself. It could be a bugfix or some new code. +- Choose a base branch for your work. More details about branches and policies are here: [Branches](https://github.com/openvinotoolkit/openvino/wiki/Branches) +- Clone your fork to your computer. +- Create a new branch (with a meaningful name) from the base branch you chose. +- Modify / add the code following our [Coding Style Guide](https://github.com/openvinotoolkit/openvino/wiki/CodingStyleGuideLines) and [Documentation guidelines](https://github.com/openvinotoolkit/openvino/wiki/CodingStyleGuideLinesDocumentation). +- If you want to add a new sample, please look at this [Guide for contributing to C++/C/Python IE samples](https://github.com/openvinotoolkit/openvino/wiki/SampleContribute) +- Run testsuite locally: + - execute each test binary from the artifacts directory, e.g. `/bin/intel64/Release/ieFuncTests` +- If you contribute to the documentation and want to add a new guide: + - Create a new markdown file in an appropriate folder. + - **REQUIRED:** The document title must contain a document label in a form: `{#openvino_docs_}`. For example: `Deep Learning Network Intermediate Representation and Operation Sets in OpenVINO™ {#openvino_docs_MO_DG_IR_and_opsets}`. + - Add your file to the documentation structure. Open the documentation structure file [`docs/doxygen/ie_docs.xml`](https://github.com/openvinotoolkit/openvino/blob/master/docs/doxygen/ie_docs.xml) and add your file path to the appropriate section. +- When you are done, make sure that your branch is to date with latest state of the branch you want to contribute to (e.g. `git fetch upstream && git merge upstream/master`), push your branch to your GitHub fork; then create a pull request from your branch to the base branch (see [https://help.github.com/articles/using-pull-requests](https://help.github.com/articles/using-pull-requests) for details). + +## Making a good pull request + +Following these guidelines will increase the likelihood of your pull request being accepted: + +- Before pushing your PR to the repository, make sure that it builds perfectly fine on your local system. +- Add enough information, like a meaningful title, the reason why you made the commit and a link to the issue page if you opened one for this PR. +- Scope your PR to one issue. Before submitting, make sure the diff contains no unrelated changes. If you want to cover more than one issue, submit your changes for each as separate pull requests. +- If you have added new functionality, you should update/create the relevant documentation, as well as add tests for it to the testsuite. +- Try not to include "oops" commits - ones that just fix an error in the previous commit. If you have those, then before submitting [squash](https://github.com/openvinotoolkit/openvino/wiki/Contribute#https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Squashing-Commits) those fixes directly into the commits where they belong. +- Make sure to choose the right base branch and to follow the [Coding Style Guide](https://github.com/openvinotoolkit/openvino/wiki/CodingStyleGuideLines) for your code or [Documentation guidelines](https://github.com/openvinotoolkit/openvino/wiki/CodingStyleGuideLinesDocumentation) you are changing documentation files. +- Make sure to add test for new functionality or test that reproduces fixed bug with related test data. Please do not add extra images or videos, if some of existing media files are suitable. + +## Testing and merging pull requests + +- Your pull request will be automatically tested by OpenVINO's precommit (testing status are automatically reported as "green" or "red" circles in precommit steps on PR's page). If any builders have failed, you should fix the issue. To rerun the automatic builds just push changes to your branch on GitHub. No need to close pull request and open a new one! +- Once all the builders are "green", one of OpenVINO developers will review your code. Reviewer could ask you to modify your pull request. Please provide timely response for reviewers (within weeks, not months), otherwise you submission could be postponed or even rejected. + +## PR review good practices + +- Originator is responsible for driving the review of changes and should ping reviewers periodically. +- Originator should close comments from the Reviewer when it is resolved. The Reviewer may re-open the comment if he does not agree with the resolution. +- Originator should request re-review from the Reviewer when all comments are resolved by pushing the button in the “Reviewers” section. +- If it is still WIP and you want to check CI test results early then use _Draft_ PR. +- Do **NOT** rewrite history (push -f) once you converted draft PR into regular one, add new commits instead. Looking at diffs makes review easier. +- Write meaningful description of commits resulting from review. _"Addressing review comments"_ is **NOT** a good description! Having a quick look at good descriptions can tell you much what is going on in PR without a need to go through all of resolved comments. + +## Merging PR + +As soon as the reviewer is fine with the pull request and Precommit likes your code and shows "green" status, the "Approved" review status is put, which signals OpenVINO maintainers that they can merge your pull request. + +© Copyright 2018-2022, OpenVINO team \ No newline at end of file From 806ce968998e2c7ff56fd18b19f522ce7ba41eb9 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 24 Feb 2022 19:41:47 +0300 Subject: [PATCH 099/310] Remove onnx_custom_op doc (#10638) * Remove onnx_custom_op doc * Remove test * Fixed tests --- cmake/test_model_zoo.cmake | 5 -- docs/CMakeLists.txt | 3 - docs/Doxyfile.config | 1 - docs/onnx_custom_op/CMakeLists.txt | 18 ------ .../onnx_custom_op/custom_relu_model.prototxt | 52 ----------------- docs/onnx_custom_op/onnx_custom_op.cpp | 57 ------------------- docs/onnx_custom_op/onnx_custom_op.hpp | 8 --- .../functional/plugin/cpu/CMakeLists.txt | 5 +- .../plugin/cpu/extension/extension.cpp | 32 ----------- 9 files changed, 1 insertion(+), 180 deletions(-) delete mode 100644 docs/onnx_custom_op/CMakeLists.txt delete mode 100644 docs/onnx_custom_op/custom_relu_model.prototxt delete mode 100644 docs/onnx_custom_op/onnx_custom_op.cpp delete mode 100644 docs/onnx_custom_op/onnx_custom_op.hpp diff --git a/cmake/test_model_zoo.cmake b/cmake/test_model_zoo.cmake index 176e1b55220..18442560350 100644 --- a/cmake/test_model_zoo.cmake +++ b/cmake/test_model_zoo.cmake @@ -86,11 +86,6 @@ ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_import" ie_onnx_import_out_files) -set(rel_path "docs/onnx_custom_op") -ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}" - "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/docs/models" - docs_onnx_out_files) - if(ENABLE_TESTS) if(ENABLE_OV_ONNX_FRONTEND AND ENABLE_REQUIREMENTS_INSTALL) find_package(PythonInterp 3 REQUIRED) diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 1e0699b931d..75421a3c972 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -17,9 +17,6 @@ if(NOT ENABLE_DOCKER) set(OpenVINO_DIR ${CMAKE_BINARY_DIR}) endif() - if(ENABLE_OV_ONNX_FRONTEND) - add_subdirectory(onnx_custom_op) - endif() add_subdirectory(template_extension) set(all_docs_targets diff --git a/docs/Doxyfile.config b/docs/Doxyfile.config index 1538db98e38..5c076a4dfe6 100644 --- a/docs/Doxyfile.config +++ b/docs/Doxyfile.config @@ -1045,7 +1045,6 @@ EXCLUDE_SYMBOLS = InferenceEngine::details \ EXAMPLE_PATH = "@OpenVINO_SOURCE_DIR@" \ "@OpenVINO_SOURCE_DIR@/docs/HOWTO/" \ "@OpenVINO_SOURCE_DIR@/docs/" \ - "@OpenVINO_SOURCE_DIR@/docs/onnx_custom_op/" \ "@OpenVINO_SOURCE_DIR@/docs/template_extension/" \ "@OpenVINO_SOURCE_DIR@/docs/template_extension/old/" \ "@OpenVINO_SOURCE_DIR@/docs/template_extension/new/" \ diff --git a/docs/onnx_custom_op/CMakeLists.txt b/docs/onnx_custom_op/CMakeLists.txt deleted file mode 100644 index 2f2a77c58b7..00000000000 --- a/docs/onnx_custom_op/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2018-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -# [cmake:onnx_custom_op] -set(CMAKE_CXX_STANDARD 11) - -set(TARGET_NAME "onnx_custom_op") - -find_package(OpenVINO REQUIRED COMPONENTS ONNX) - -add_library(${TARGET_NAME} STATIC onnx_custom_op.cpp onnx_custom_op.hpp) - -target_link_libraries(${TARGET_NAME} PUBLIC openvino::core openvino::frontend::onnx) -# [cmake:onnx_custom_op] - -# Enable code style check -add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}) diff --git a/docs/onnx_custom_op/custom_relu_model.prototxt b/docs/onnx_custom_op/custom_relu_model.prototxt deleted file mode 100644 index 3845cc00ce2..00000000000 --- a/docs/onnx_custom_op/custom_relu_model.prototxt +++ /dev/null @@ -1,52 +0,0 @@ -ir_version: 3 -producer_name: "nGraph ONNX Importer" -graph { - node { - input: "in" - output: "out" - name: "customrelu" - op_type: "CustomRelu" - domain: "com.example" - attribute { - name: "alpha" - type: FLOAT - f: 2 - } - attribute { - name: "beta" - type: FLOAT - f: 3 - } - } - name: "custom relu graph" - input { - name: "in" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 8 - } - } - } - } - } - output { - name: "out" - type { - tensor_type { - elem_type: 1 - shape { - dim { - dim_value: 8 - } - } - } - } - } -} -opset_import { - domain: "com.example" - version: 1 -} diff --git a/docs/onnx_custom_op/onnx_custom_op.cpp b/docs/onnx_custom_op/onnx_custom_op.cpp deleted file mode 100644 index 399a6d2b4a3..00000000000 --- a/docs/onnx_custom_op/onnx_custom_op.cpp +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -//! [onnx_custom_op:headers] -// onnx_import/onnx_utils.hpp provides ngraph::onnx_import::register_operator function, that registers operator in ONNX importer's set. -#include -// ngraph/opsets/opset5.hpp provides the declaration of predefined nGraph operator set -#include -//! [onnx_custom_op:headers] - -void register_custom_relu_operator() { - // CustomRelu is defined as follows: - // x >= 0 => f(x) = x * alpha - // x < 0 => f(x) = x * beta - -//! [onnx_custom_op:register_operator] - ngraph::onnx_import::register_operator( - "CustomRelu", 1, "com.example", [](const ngraph::onnx_import::Node& onnx_node) -> ngraph::OutputVector { - namespace opset = ngraph::opset5; - - ngraph::OutputVector ng_inputs{onnx_node.get_ng_inputs()}; - const ngraph::Output& data = ng_inputs.at(0); - // create constant node with a single element that's equal to zero - std::shared_ptr zero_node = opset::Constant::create(data.get_element_type(), ngraph::Shape{}, {0}); - // create a negative map for 'data' node, 1 for negative values , 0 for positive values or zero - // then convert it from boolean type to `data.get_element_type()` - std::shared_ptr negative_map = std::make_shared( - std::make_shared(data, zero_node), data.get_element_type()); - // create a positive map for 'data' node, 0 for negative values , 1 for positive values or zero - // then convert it from boolean type to `data.get_element_type()` - std::shared_ptr positive_map = std::make_shared( - std::make_shared(data, zero_node), data.get_element_type()); - - // fetch alpha and beta attributes from ONNX node - float alpha = onnx_node.get_attribute_value("alpha", 1); // if 'alpha' attribute is not provided in the model, then the default value is 1 - float beta = onnx_node.get_attribute_value("beta"); - // create constant node with a single element 'alpha' with type f32 - std::shared_ptr alpha_node = opset::Constant::create(ngraph::element::f32, ngraph::Shape{}, {alpha}); - // create constant node with a single element 'beta' with type f32 - std::shared_ptr beta_node = opset::Constant::create(ngraph::element::f32, ngraph::Shape{}, {beta}); - - return { - std::make_shared( - std::make_shared(alpha_node, std::make_shared(data, positive_map)), - std::make_shared(beta_node, std::make_shared(data, negative_map)) - ) - }; - }); -//! [onnx_custom_op:register_operator] -} - -void unregister_custom_relu_operator() { -//! [onnx_custom_op:unregister_operator] - ngraph::onnx_import::unregister_operator("CustomRelu", 1, "com.example"); -//! [onnx_custom_op:unregister_operator] -} diff --git a/docs/onnx_custom_op/onnx_custom_op.hpp b/docs/onnx_custom_op/onnx_custom_op.hpp deleted file mode 100644 index 3554226da6b..00000000000 --- a/docs/onnx_custom_op/onnx_custom_op.hpp +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -void register_custom_relu_operator(); -void unregister_custom_relu_operator(); diff --git a/src/tests/functional/plugin/cpu/CMakeLists.txt b/src/tests/functional/plugin/cpu/CMakeLists.txt index 9ba753bea29..6af1b7155cd 100644 --- a/src/tests/functional/plugin/cpu/CMakeLists.txt +++ b/src/tests/functional/plugin/cpu/CMakeLists.txt @@ -12,12 +12,9 @@ set(INCLUDES ${CMAKE_CURRENT_SOURCE_DIR} $ #include #include -#include #include #include -#include - class CustomAbsKernel : public InferenceEngine::ILayerExecImpl { public: @@ -152,23 +149,6 @@ static std::string model_full_path(const char* path) { return FileUtils::makePath(TEST_MODELS, path); } -TEST(Extension, OnnxModelWithCustomAbs) { - std::vector input_values{1, -2, 3, -4, 5, -6, 7, -8, 9, -10}; - std::vector expected{1, 4, 3, 8, 5, 12, 7, 16, 9, 20}; - InferenceEngine::Core ie; - ie.AddExtension(std::make_shared()); - ngraph::onnx_import::register_operator( - CustomAbs::get_type_info_static().name, 1, "custom_domain", [](const ngraph::onnx_import::Node& node) -> ngraph::OutputVector { - ngraph::OutputVector ng_inputs{node.get_ng_inputs()}; - return {std::make_shared(ng_inputs.at(0))}; - }); - - auto network = ie.ReadNetwork(model_full_path("func_tests/models/custom_abs_op.onnx")); - infer_model(ie, network, input_values, expected); - ngraph::onnx_import::unregister_operator(CustomAbs::get_type_info_static().name, 1, "custom_domain"); -} - - TEST(Extension, XmlModelWithCustomAbs) { std::string model = R"V0G0N( @@ -293,15 +273,3 @@ TEST(Extension, OnnxModelWithExtensionFromDSO) { auto network = ie.ReadNetwork(model_full_path("func_tests/models/custom_template_op.onnx")); infer_model(ie, network, input_values, expected); } - - -TEST(Extension, OnnxModelWithCustomReluDocsExample) { - std::vector input_values{0, -1, 2, -3, 4, -5, 6, -7}; - std::vector expected{0, -3, 4, -9, 8, -15, 12, -21}; - - register_custom_relu_operator(); - InferenceEngine::Core ie; - auto network = ie.ReadNetwork(model_full_path("docs/models/custom_relu_model.onnx")); - infer_model(ie, network, input_values, expected); - unregister_custom_relu_operator(); -} From ffd63f9758537c48ec69af3f05f05fdecbc74654 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 25 Feb 2022 00:44:48 +0300 Subject: [PATCH 100/310] Replaced IE with OV runtime: docs (#10642) * Updated glossary * Removed references to OpenVX * Moved migration_ov_2_0 to OpenVINO User guide * Replaced IE with OV runtime --- docs/Doxyfile.config | 1 - docs/IE_PLUGIN_DG/AsyncInferRequest.md | 4 +- docs/MO_DG/Known_Issues_Limitations.md | 38 ------------------- .../prepare_model/Additional_Optimizations.md | 4 +- .../Default_Model_Optimizer_Optimizations.md | 2 +- .../Getting_performance_numbers.md | 22 +++++------ .../prepare_model/Model_Optimizer_FAQ.md | 6 +-- .../Supported_Frameworks_Layers.md | 2 +- .../convert_model/Convert_Model_From_Caffe.md | 2 +- .../convert_model/Convert_Model_From_Kaldi.md | 4 +- .../convert_model/Convert_Model_From_MxNet.md | 2 +- .../Convert_Model_From_Paddle.md | 2 +- .../Convert_Model_From_PyTorch.md | 2 +- .../Convert_Model_From_TensorFlow.md | 4 +- .../convert_model/Converting_Model.md | 8 ++-- .../convert_model/Cutting_Model.md | 4 +- .../IR_suitable_for_INT8_inference.md | 2 +- .../Convert_Style_Transfer_From_MXNet.md | 2 +- .../Convert_GNMT_From_Tensorflow.md | 4 +- .../Convert_Object_Detection_API_Models.md | 16 ++++---- .../Convert_Slim_Library_Models.md | 4 +- .../Customize_Model_Optimizer.md | 6 +-- docs/OV_Runtime_UG/Model_caching_overview.md | 6 +-- docs/OV_Runtime_UG/auto_device_selection.md | 8 ++-- .../docs/common_inference_pipeline.md | 0 .../docs/graph_construction.md | 0 .../migration_ov_2_0/docs/intro.md | 0 docs/OV_Runtime_UG/multi_device.md | 4 +- docs/OV_Runtime_UG/supported_plugins/GNA.md | 2 +- docs/OV_Runtime_UG/supported_plugins/GPU.md | 2 +- docs/OV_Runtime_UG/supported_plugins/HDDL.md | 2 +- .../OV_Runtime_UG/supported_plugins/MYRIAD.md | 2 +- .../supported_plugins/Supported_Devices.md | 6 +-- docs/OV_Runtime_UG/supported_plugins/VPU.md | 2 +- docs/benchmarks/performance_benchmarks_faq.md | 2 +- .../performance_benchmarks_openvino.md | 2 +- .../benchmarks/performance_benchmarks_ovms.md | 2 +- docs/documentation.md | 4 +- docs/gapi/face_beautification.md | 2 +- docs/gapi/gapi_face_analytics_pipeline.md | 6 +-- docs/get_started.md | 4 +- docs/get_started/get_started_demos.md | 12 +++--- docs/glossary.md | 22 +++++------ .../install_guides/deployment-manager-tool.md | 4 +- .../install_guides/installing-openvino-apt.md | 2 +- .../installing-openvino-conda.md | 2 +- .../installing-openvino-images.md | 2 +- .../installing-openvino-overview.md | 2 +- .../install_guides/installing-openvino-pip.md | 2 +- .../installing-openvino-yocto.md | 10 ++--- .../install_guides/installing-openvino-yum.md | 2 +- docs/install_guides/pypi-openvino-dev.md | 6 +-- .../dldt_optimization_guide.md | 2 +- 53 files changed, 112 insertions(+), 153 deletions(-) rename docs/{ => OV_Runtime_UG}/migration_ov_2_0/docs/common_inference_pipeline.md (100%) rename docs/{ => OV_Runtime_UG}/migration_ov_2_0/docs/graph_construction.md (100%) rename docs/{ => OV_Runtime_UG}/migration_ov_2_0/docs/intro.md (100%) diff --git a/docs/Doxyfile.config b/docs/Doxyfile.config index 5c076a4dfe6..7c3614c5f10 100644 --- a/docs/Doxyfile.config +++ b/docs/Doxyfile.config @@ -936,7 +936,6 @@ EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = */temp/* \ */bin/* \ */tests/* \ - */openvx/* \ */thirdparty/* \ "@DOXYREST_OUT@" \ "@XML_OUTPUT@" \ diff --git a/docs/IE_PLUGIN_DG/AsyncInferRequest.md b/docs/IE_PLUGIN_DG/AsyncInferRequest.md index d9aa2a5d3c0..59c5beadd55 100644 --- a/docs/IE_PLUGIN_DG/AsyncInferRequest.md +++ b/docs/IE_PLUGIN_DG/AsyncInferRequest.md @@ -1,7 +1,7 @@ # Asynchronous Inference Request {#openvino_docs_ie_plugin_dg_async_infer_request} Asynchronous Inference Request runs an inference pipeline asynchronously in one or several task executors depending on a device pipeline structure. -Inference Engine Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class: +OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class: - The class has the `_pipeline` field of `std::vector >`, which contains pairs of an executor and executed task. - All executors are passed as arguments to a class constructor and they are in the running state and ready to run tasks. @@ -10,7 +10,7 @@ Inference Engine Plugin API provides the base InferenceEngine::AsyncInferRequest `AsyncInferRequest` Class ------------------------ -Inference Engine Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class for a custom asynchronous inference request implementation: +OpenVINO Runtime Plugin API provides the base InferenceEngine::AsyncInferRequestThreadSafeDefault class for a custom asynchronous inference request implementation: @snippet src/template_async_infer_request.hpp async_infer_request:header diff --git a/docs/MO_DG/Known_Issues_Limitations.md b/docs/MO_DG/Known_Issues_Limitations.md index ec8897d06c6..610bafc579c 100644 --- a/docs/MO_DG/Known_Issues_Limitations.md +++ b/docs/MO_DG/Known_Issues_Limitations.md @@ -7,41 +7,3 @@ TensorFlow* provides only prebuilt binaries with AVX instructions enabled. When To run the Model Optimizer on this hardware, you should compile TensorFlow binaries from source as described at the [TensorFlow website](https://www.tensorflow.org/install/source). Another option is to run the Model Optimizer to generate an IR on hardware that supports AVX to and then perform inference on hardware without AVX. - - -## Multiple OpenMP Loadings - -If the application uses the Inference Engine with third-party components that depend on Intel OpenMP, multiple loadings of the libiomp library may occur and cause OpenMP runtime initialization conflicts. This may happen, for example, if the application uses Intel® Math Kernel Library (Intel® MKL) through the “Single Dynamic Library” (libmkl_rt.so) mechanism and calls Intel MKL after loading the Inference Engine plugin. -The error log looks as follows: -```sh -OMP: Error #15: Initializing libiomp5.so, but found libiomp5.so already initialized. -OMP: Hint: This means that multiple copies of the OpenMP runtime have been linked into the program. That is dangerous, since it can degrade performance or cause incorrect results. The best thing to do is to ensure that only a single OpenMP runtime is linked into the process, e.g. by avoiding static linking of the OpenMP runtime in any library. As an unsafe, unsupported, undocumented workaround you can set the environment variable KMP_DUPLICATE_LIB_OK=TRUE to allow the program to continue to execute, but that may cause crashes or silently produce incorrect results. For more information, please see http://www.intel.com/software/products/support/. -``` - -Possible workarounds: - -* Preload the OpenMP runtime using the LD_PRELOAD variable: - ```sh - LD_PRELOAD= ``` - This eliminates multiple loadings of libiomp, and makes all the components use this specific version of OpenMP. - -* Alternatively, you can set KMP_DUPLICATE_LIB_OK=TRUE. However, performance degradation or incorrect results may occur in this case. - - -## Old proto compiler breaks protobuf library - -With python protobuf library version 3.5.1 the following incompatibility can happen. -The known case is for Cent OS 7.4 - -The error log looks as follows: - -```sh -File "../lib64/python3.5/site-packages/google/protobuf/descriptor.py", line 829, in _new_ -return _message.default_pool.AddSerializedFile(serialized_pb) -TypeError: expected bytes, str found -``` - -Possible workaround is to upgrade default protobuf compiler (libprotoc 2.5.0) to newer version, for example -libprotoc 2.6.1. - -[protobuf_issue]: https://github.com/google/protobuf/issues/4272 diff --git a/docs/MO_DG/prepare_model/Additional_Optimizations.md b/docs/MO_DG/prepare_model/Additional_Optimizations.md index 4a927563b6c..726a2139bfd 100644 --- a/docs/MO_DG/prepare_model/Additional_Optimizations.md +++ b/docs/MO_DG/prepare_model/Additional_Optimizations.md @@ -3,13 +3,13 @@ Model Optimizer performs preprocessing to a model. It is possible to optimize this step and improve first inference time, to do that, follow the tips bellow: - **Image mean/scale parameters**
- Make sure to use the input image mean/scale parameters (`--scale` and `–mean_values`) with the Model Optimizer when you need pre-processing. It allows the tool to bake the pre-processing into the IR to get accelerated by the Inference Engine. + Make sure to use the input image mean/scale parameters (`--scale` and `–mean_values`) with the Model Optimizer when you need pre-processing. It allows the tool to bake the pre-processing into the IR to get accelerated by the OpenVINO Runtime. - **RGB vs. BGR inputs**
If, for example, your network assumes the RGB inputs, the Model Optimizer can swap the channels in the first convolution using the `--reverse_input_channels` command line option, so you do not need to convert your inputs to RGB every time you get the BGR image, for example, from OpenCV*. - **Larger batch size**
- Notice that the devices like GPU are doing better with larger batch size. While it is possible to set the batch size in the runtime using the Inference Engine [ShapeInference feature](../../OV_Runtime_UG/ShapeInference.md). + Notice that the devices like GPU are doing better with larger batch size. While it is possible to set the batch size in the runtime using the OpenVINO Runtime API [ShapeInference feature](../../OV_Runtime_UG/ShapeInference.md). - **Resulting IR precision**
The resulting IR precision, for instance, `FP16` or `FP32`, directly affects performance. As CPU now supports `FP16` (while internally upscaling to `FP32` anyway) and because this is the best precision for a GPU target, you may want to always convert models to `FP16`. Notice that this is the only precision that Intel® Movidius™ Myriad™ 2 and Intel® Myriad™ X VPUs support. diff --git a/docs/MO_DG/prepare_model/Default_Model_Optimizer_Optimizations.md b/docs/MO_DG/prepare_model/Default_Model_Optimizer_Optimizations.md index 6b4f1826384..b40da184e66 100644 --- a/docs/MO_DG/prepare_model/Default_Model_Optimizer_Optimizations.md +++ b/docs/MO_DG/prepare_model/Default_Model_Optimizer_Optimizations.md @@ -8,4 +8,4 @@ The picture above shows Caffe\* Resnet269\* topology. The left model is the orig If you still see these operations, inspect the Model Optimizer output carefully while searching for warnings, such as on the tool being unable to fuse. For example, non-linear operations (like activations) in between convolutions and linear operations might prevent the fusing. If performance is of concern, try to change (and potentially re-train) the topology. Refer to the [Model Optimizer Guide](Model_Optimization_Techniques.md) for more optimizations. -Notice that the activation (`_relu`) is not touched by the Model Optimizer, and while it can be merged into convolution as well, this is rather a device-specific optimization, covered by Inference Engine during the model loading time. You are encouraged to inspect performance counters from plugins that should indicate that these particular layers are not executed (“Optimized out”). For more information, refer to Internal Inference Performance Counters. +Notice that the activation (`_relu`) is not touched by the Model Optimizer, and while it can be merged into convolution as well, this is rather a device-specific optimization, covered by OpenVINO Runtime during the model loading time. You are encouraged to inspect performance counters from plugins that should indicate that these particular layers are not executed (“Optimized out”). For more information, refer to Internal Inference Performance Counters. diff --git a/docs/MO_DG/prepare_model/Getting_performance_numbers.md b/docs/MO_DG/prepare_model/Getting_performance_numbers.md index 1d11be89064..dc32b87e805 100644 --- a/docs/MO_DG/prepare_model/Getting_performance_numbers.md +++ b/docs/MO_DG/prepare_model/Getting_performance_numbers.md @@ -3,11 +3,11 @@ ## Tip 1. Measure the Proper Set of Operations -When evaluating performance of your model with the Inference Engine, you must measure the proper set of operations. To do so, consider the following tips: +When evaluating performance of your model with the OpenVINO Runtime, you must measure the proper set of operations. To do so, consider the following tips: - Avoid including one-time costs like model loading. -- Track separately the operations that happen outside the Inference Engine, like video decoding. +- Track separately the operations that happen outside the OpenVINO Runtime, like video decoding. > **NOTE**: Some image pre-processing can be baked into the IR and accelerated. For more information, refer to [Model Optimizer Knobs Related to Performance](Additional_Optimizations.md) @@ -18,7 +18,7 @@ You need to build your performance conclusions on reproducible data. Do the perf - If the warm-up run does not help or execution time still varies, you can try running a large number of iterations and then average or find a mean of the results. - For time values that range too much, use geomean. -Refer to the [Inference Engine Samples](../../OV_Runtime_UG/Samples_Overview.md) for code examples for the performance measurements. Almost every sample, except interactive demos, has a `-ni` option to specify the number of iterations. +Refer to the [OpenVINO Samples](../../OV_Runtime_UG/Samples_Overview.md) for code examples for the performance measurements. Almost every sample, except interactive demos, has a `-ni` option to specify the number of iterations. ## Getting performance numbers using OpenVINO tool @@ -45,16 +45,16 @@ Instead, it is possible to keep a separate infer request per camera or another s ## Comparing Performance with Native/Framework Code -When comparing the Inference Engine performance with the framework or another reference code, make sure that both versions are as similar as possible: +When comparing the OpenVINO Runtime performance with the framework or another reference code, make sure that both versions are as similar as possible: -- Wrap exactly the inference execution (refer to the [Inference Engine Samples](../../OV_Runtime_UG/Samples_Overview.md) for examples). +- Wrap exactly the inference execution (refer to the [OpenVINO Samples](../../OV_Runtime_UG/Samples_Overview.md) for examples). - Do not include model loading time. -- Ensure the inputs are identical for the Inference Engine and the framework. For example, Caffe\* allows to auto-populate the input with random values. Notice that it might give different performance than on real images. -- Similarly, for correct performance comparison, make sure the access pattern, for example, input layouts, is optimal for Inference Engine (currently, it is NCHW). +- Ensure the inputs are identical for the OpenVINO Runtime and the framework. For example, Caffe\* allows to auto-populate the input with random values. Notice that it might give different performance than on real images. +- Similarly, for correct performance comparison, make sure the access pattern, for example, input layouts, is optimal for OpenVINO Runtime (currently, it is NCHW). - Any user-side pre-processing should be tracked separately. -- Make sure to try the same environment settings that the framework developers recommend, for example, for TensorFlow*. In many cases, things that are more machine friendly, like respecting NUMA (see CPU Checklist), might work well for the Inference Engine as well. -- If applicable, use batching with the Inference Engine. -- If possible, demand the same accuracy. For example, TensorFlow allows `FP16` support, so when comparing to that, make sure to test the Inference Engine with the `FP16` as well. +- Make sure to try the same environment settings that the framework developers recommend, for example, for TensorFlow*. In many cases, things that are more machine friendly, like respecting NUMA (see CPU Checklist), might work well for the OpenVINO Runtime as well. +- If applicable, use batching. +- If possible, demand the same accuracy. For example, TensorFlow allows `FP16` support, so when comparing to that, make sure to test the OpenVINO Runtime with the `FP16` as well. ## Using Tools @@ -64,7 +64,7 @@ Alternatively, you can gather the raw profiling data that samples report, the se ### Internal Inference Performance Counters -Almost every sample (inspect command-line options for a specific sample with `-h`) supports a `-pc` command that outputs internal execution breakdown. Refer to the [samples code](../../OV_Runtime_UG/Samples_Overview.md) for the actual Inference Engine API behind that. +Almost every sample (inspect command-line options for a specific sample with `-h`) supports a `-pc` command that outputs internal execution breakdown. Refer to the [OpenVINO Samples](../../OV_Runtime_UG/Samples_Overview.md) for the actual OpenVINO Runtime API behind that. Below is example of CPU plugin output for a network (since the device is CPU, the layers wall clock `realTime` and the `cpu` time are the same): diff --git a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md index dd131a5c972..58c786e8424 100644 --- a/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md +++ b/docs/MO_DG/prepare_model/Model_Optimizer_FAQ.md @@ -158,7 +158,7 @@ However, if your model contains more than one input, the Model Optimizer is able #### 9. What does the message "Mean file for topologies with multiple inputs is not supported" mean? -Model Optimizer does not support mean file processing for topologies with more than one input. In this case, you need to perform preprocessing of the inputs for a generated Intermediate Representation in the Inference Engine to perform subtraction for every input of your multi-input model. +Model Optimizer does not support mean file processing for topologies with more than one input. In this case, you need to perform preprocessing of the inputs for a generated Intermediate Representation in the OpenVINO Runtime to perform subtraction for every input of your multi-input model, see [Overview of Preprocessing](../../OV_Runtime_UG/preprocessing_overview.md) for details. #### 10. What does the message "Cannot load or process mean file: value error" mean? @@ -214,7 +214,7 @@ One of the layers in the specified topology might not have inputs or values. Ple #### 24. What does the message "Part of the nodes was not translated to IE. Stopped" mean? -Some of the layers are not supported by the Inference Engine and cannot be translated to an Intermediate Representation. You can extend the Model Optimizer by allowing generation of new types of layers and implement these layers in the dedicated Inference Engine plugins. For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md) +Some of the operations are not supported by the OpenVINO Runtime and cannot be translated to an Intermediate Representation. You can extend the Model Optimizer by allowing generation of new types of operations and implement these operations in the dedicated OpenVINO plugins. For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md) #### 25. What does the message "While creating an edge from .. to .. : node name is undefined in the graph. Check correctness of the input model" mean? @@ -572,7 +572,7 @@ file is not available or does not exist. Also refer to FAQ [#90](#question-90). This message means that if you have model with custom layers and its json file has been generated with MXNet version lower than 1.0.0, Model Optimizer does not support such topologies. If you want to convert it you have to rebuild MXNet with unsupported layers or generate new json with MXNet version 1.0.0 and higher. Also you need to implement -Inference Engine extension for used custom layers. +OpenVINO extension for used custom layers. For more information, refer to the [OpenVINO™ Extensibility Mechanism](../../Extensibility_UG/Intro.md). #### 97. What does the message "Graph contains a cycle. Can not proceed .." mean? diff --git a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md index 272062f828f..cf336bf6684 100644 --- a/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md +++ b/docs/MO_DG/prepare_model/Supported_Frameworks_Layers.md @@ -151,7 +151,7 @@ ## TensorFlow Supported Operations -Some TensorFlow operations do not match to any Inference Engine layer, but are still supported by the Model Optimizer and can be used on constant propagation path. These layers are labeled 'Constant propagation' in the table. +Some TensorFlow operations do not match to any OpenVINO operation, but are still supported by the Model Optimizer and can be used on constant propagation path. These layers are labeled 'Constant propagation' in the table. | Operation Name in TensorFlow | Limitations| diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md index 187e73a4574..365ccd2c781 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md @@ -10,7 +10,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Caffe\*. 2. [Convert a Caffe\* Model](#Convert_From_Caffe) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md) +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO samples](../../../OV_Runtime_UG/Samples_Overview.md) 4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment ## Supported Topologies diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md index 5d65d897ef5..ed199d565b0 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md @@ -16,7 +16,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for Kaldi\*. 2. [Convert a Kaldi\* Model](#Convert_From_Kaldi) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values. -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO Samples](../../../OV_Runtime_UG/Samples_Overview.md). 4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment. > **NOTE**: The Model Optimizer supports the [nnet1](http://kaldi-asr.org/doc/dnn1.html) and [nnet2](http://kaldi-asr.org/doc/dnn2.html) formats of Kaldi models. Support of the [nnet3](http://kaldi-asr.org/doc/dnn3.html) format is limited. @@ -100,7 +100,7 @@ The Model Optimizer finds the last layer of the topology and removes this layer > **NOTE**: Model Optimizer can remove SoftMax layer only if the topology has one output. - > **NOTE**: For sample inference of Kaldi models, you can use the Inference Engine Speech Recognition sample application. The sample supports models with one output. If your model has several outputs, specify the desired one with the `--output` option. + > **NOTE**: For sample inference of Kaldi models, you can use the OpenVINO Speech Recognition sample application. The sample supports models with one output. If your model has several outputs, specify the desired one with the `--output` option. If you want to convert a model for inference on Intel® Movidius™ Myriad™, use the `--remove_memory` option. It removes Memory layers from the IR. Instead of it, additional inputs and outputs appear in the IR. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md index 62bba643129..81f04fca9c5 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md @@ -17,7 +17,7 @@ A summary of the steps for optimizing and deploying a model that was trained wit 1. [Configure the Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for MXNet* (MXNet was used to train your model) 2. [Convert a MXNet model](#ConvertMxNet) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md) +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO Samples](../../../OV_Runtime_UG/Samples_Overview.md) 4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment ## Supported Topologies diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md index ddf5a3313c7..5e9100e4723 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md @@ -4,7 +4,7 @@ A summary of the steps for optimizing and deploying a model trained with PaddleP 1. [Configure Model Optimizer](../../Deep_Learning_Model_Optimizer_DevGuide.md) for PaddlePaddle. 2. [Convert a PaddlePaddle Model](#Convert_From_Paddle) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases. -3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided Inference Engine [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). +3. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [OpenVINO Samples](../../../OV_Runtime_UG/Samples_Overview.md). 4. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in your application to deploy the model in the target environment. ## Supported Topologies diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md index 63cef40c49d..35aa0741a88 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md @@ -49,7 +49,7 @@ PyTorch* framework is supported through export to ONNX\* format. A summary of th 2. [Export PyTorch model to ONNX\*](#export-to-onnx). 3. [Convert an ONNX\* model](Convert_Model_From_ONNX.md) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values. 4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). -5. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the Inference Engine in your application to deploy the model in the target environment. +5. [Integrate OpenVINO Runtime](../../../OV_Runtime_UG/Samples_Overview.md) in your application to deploy the model in the target environment. ## Export PyTorch\* Model to ONNX\* Format diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md index 2628131a5e4..ba51959bb28 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -32,13 +32,13 @@ A summary of the steps for optimizing and deploying a model that was trained wit 2. [Freeze the TensorFlow model](#freeze-the-tensorflow-model) if your model is not already frozen or skip this step and use the [instruction](#loading-nonfrozen-models) to a convert a non-frozen model. 3. [Convert a TensorFlow\* model](#Convert_From_TF) to produce an optimized [Intermediate Representation (IR)](../../IR_and_opsets.md) of the model based on the trained network topology, weights, and biases values. 4. Test the model in the Intermediate Representation format using the [OpenVINO™ Runtime](../../../OV_Runtime_UG/openvino_intro.md) in the target environment via provided [sample applications](../../../OV_Runtime_UG/Samples_Overview.md). -5. [Integrate](../../../OV_Runtime_UG/Samples_Overview.md) the Inference Engine in your application to deploy the model in the target environment. +5. [Integrate OpenVINO Runtime](../../../OV_Runtime_UG/Samples_Overview.md) in your application to deploy the model in the target environment. ## Supported Topologies **Supported Non-Frozen Topologies with Links to the Associated Slim Model Classification Download Files** -Detailed information on how to convert models from the TensorFlow\*-Slim Image Classification Model Library is available in the [Converting TensorFlow*-Slim Image Classification Model Library Models](tf_specific/Convert_Slim_Library_Models.md) chapter. The table below contains list of supported TensorFlow\*-Slim Image Classification Model Library models and required mean/scale values. The mean values are specified as if the input image is read in BGR channels order layout like Inference Engine classification sample does. +Detailed information on how to convert models from the TensorFlow\*-Slim Image Classification Model Library is available in the [Converting TensorFlow*-Slim Image Classification Model Library Models](tf_specific/Convert_Slim_Library_Models.md) chapter. The table below contains list of supported TensorFlow\*-Slim Image Classification Model Library models and required mean/scale values. The mean values are specified as if the input image is read in BGR channels order layout like OpenVINO classification sample does. | Model Name| Slim Model Checkpoint File| \-\-mean_values | \-\-scale| | ------------- | ------------ | ------------- | -----:| diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md index c5c3bb1695a..2602d789f68 100644 --- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md @@ -185,7 +185,7 @@ Framework-agnostic parameters: --static_shape Enables IR generation for fixed input shape (folding `ShapeOf` operations and shape-calculating sub-graphs to `Constant`). Changing model input shape using - the Inference Engine API in runtime may fail for such an IR. + the OpenVINO Runtime API in runtime may fail for such an IR. --disable_weights_compression Disable compression and store weights with original precision. @@ -206,7 +206,7 @@ Usually neural network models are trained with the normalized input data. This m * The input pre-processing operations are a part of a topology. In this case, the application that uses the framework to infer the topology does not pre-process the input. * The input pre-processing operations are not a part of a topology and the pre-processing is performed within the application which feeds the model with an input data. -In the first case, the Model Optimizer generates the IR with required pre-processing layers and Inference Engine samples may be used to infer the model. +In the first case, the Model Optimizer generates the IR with required pre-processing operations and OpenVINO Samples may be used to infer the model. In the second case, information about mean/scale values should be provided to the Model Optimizer to embed it to the generated IR. Model Optimizer provides a number of command line parameters to specify them: `--mean`, `--scale`, `--scale_values`, `--mean_values`. @@ -221,11 +221,11 @@ There is no a universal recipe for determining the mean/scale values for a parti There are situations when the input data shape for the model is not fixed, like for the fully-convolutional neural networks. In this case, for example, TensorFlow\* models contain `-1` values in the `shape` attribute of the `Placeholder` operation. Inference Engine does not support input layers with undefined size, so if the input shapes are not defined in the model, the Model Optimizer fails to convert the model. The solution is to provide the input shape(s) using the `--input` or `--input_shape` command line parameter for all input(s) of the model or provide the batch size using the `-b` command line parameter if the model contains just one input with undefined batch size only. In the latter case, the `Placeholder` shape for the TensorFlow\* model looks like this `[-1, 224, 224, 3]`. ## When to Reverse Input Channels -Input data for your application can be of RGB or BRG color input order. For example, Inference Engine samples load input images in the BGR channels order. However, the model may be trained on images loaded with the opposite order (for example, most TensorFlow\* models are trained with images in RGB order). In this case, inference results using the Inference Engine samples may be incorrect. The solution is to provide `--reverse_input_channels` command line parameter. Taking this parameter, the Model Optimizer performs first convolution or other channel dependent operation weights modification so these operations output will be like the image is passed with RGB channels order. +Input data for your application can be of RGB or BRG color input order. For example, OpenVINO Samples load input images in the BGR channels order. However, the model may be trained on images loaded with the opposite order (for example, most TensorFlow\* models are trained with images in RGB order). In this case, inference results using the OpenVINO samples may be incorrect. The solution is to provide `--reverse_input_channels` command line parameter. Taking this parameter, the Model Optimizer performs first convolution or other channel dependent operation weights modification so these operations output will be like the image is passed with RGB channels order. ## When to Specify `--static_shape` Command Line Parameter If the `--static_shape` command line parameter is specified the Model Optimizer evaluates shapes of all operations in the model (shape propagation) for a fixed input(s) shape(s). During the shape propagation the Model Optimizer evaluates operations *Shape* and removes them from the computation graph. With that approach, the initial model which can consume inputs of different shapes may be converted to IR working with the input of one fixed shape only. For example, consider the case when some blob is reshaped from 4D of a shape *[N, C, H, W]* to a shape *[N, C, H \* W]*. During the model conversion the Model Optimize calculates output shape as a constant 1D blob with values *[N, C, H \* W]*. So if the input shape changes to some other value *[N,C,H1,W1]* (it is possible scenario for a fully convolutional model) then the reshape layer becomes invalid. -Resulting Intermediate Representation will not be resizable with the help of Inference Engine. +Resulting Intermediate Representation will not be resizable with the help of OpenVINO Runtime API. ## Examples of CLI Commands diff --git a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md index aac7d73f006..8b22503439e 100644 --- a/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Cutting_Model.md @@ -6,10 +6,10 @@ Sometimes some parts of a model must be removed while the Model Optimizer is con The following examples are the situations when model cutting is useful or even required: -* model has pre- or post-processing parts that cannot be translated to existing Inference Engine layers. +* model has pre- or post-processing parts that cannot be translated to existing OpenVINO operations. * model has a training part that is convenient to be kept in the model, but not used during inference. * model is too complex (contains lots of unsupported operations that cannot be easily implemented as custom layers), so the complete model cannot be converted in one shot. -* problem with model conversion in the Model Optimizer or inference in the Inference Engine occurred. To localize the issue, limit the scope for conversion by iteratively searching for problematic places in the model. +* problem with model conversion in the Model Optimizer or inference in the OpenVINO Runtime occurred. To localize the issue, limit the scope for conversion by iteratively searching for problematic places in the model. * single custom layer or a combination of custom layers is isolated for debugging purposes. ## Command-Line Options diff --git a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md index 34df1408cdb..9f7ac357e30 100644 --- a/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md +++ b/docs/MO_DG/prepare_model/convert_model/IR_suitable_for_INT8_inference.md @@ -2,7 +2,7 @@ ## Introduction -Inference Engine CPU and GPU plugin can infer models in the low precision. +OpenVINO Runtime CPU and GPU devices can infer models in the low precision. For details, refer to [Low Precision Inference on the CPU](../../../OV_Runtime_UG/Int8Inference.md). Intermediate Representation (IR) should be specifically formed to be suitable for low precision inference. diff --git a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md index 86a4990ee58..a3dad226b7e 100644 --- a/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md +++ b/docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md @@ -114,4 +114,4 @@ cp models/13_decoder_auxs.nd nst_model ```sh mo --input_symbol /nst_vgg19-symbol.json --framework mxnet --output_dir --input_shape [1,3,224,224] --nd_prefix_name 13_decoder --pretrained_model /vgg19-0000.params ``` -4. The IR is generated (`.bin`, `.xml` and `.mapping` files) in the specified output directory and ready to be consumed by the Inference Engine. +4. The IR is generated (`.bin`, `.xml` and `.mapping` files) in the specified output directory and ready to be consumed by the OpenVINO Runtime. diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md index 236dd4aac90..19fbb757198 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_GNMT_From_Tensorflow.md @@ -244,7 +244,7 @@ python3 benchmark_app.py -m -d CPU ``` -2. With Inference Engine Python API: +2. With OpenVINO Runtime Python API: > **NOTE**: Before running the example, insert a path to your GNMT `.xml` and `.bin` files into `MODEL_PATH` and `WEIGHTS_PATH`, and fill `input_data_tensor` and `seq_lengths` tensors according to your input data. @@ -274,4 +274,4 @@ exec_net = ie.load_network(network=net, device_name="CPU") result_ie = exec_net.infer(input_data) ``` -For more information about Python API, refer to [Inference Engine Python API](ie_python_api/api.html). +For more information about Python API, refer to [OpenVINO Runtime Python API](ie_python_api/api.html). diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md index 56324c1facb..0489ff08768 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Object_Detection_API_Models.md @@ -2,8 +2,8 @@ > **NOTES**: > * Starting with the 2022.1 release, the Model Optimizer can convert the TensorFlow\* Object Detection API Faster and Mask RCNNs topologies differently. By default, the Model Optimizer adds operation "Proposal" to the generated IR. This operation needs an additional input to the model with name "image_info" which should be fed with several values describing the pre-processing applied to the input image (refer to the [Proposal](../../../../ops/detection/Proposal_4.md) operation specification for more information). However, this input is redundant for the models trained and inferred with equal size images. Model Optimizer can generate IR for such models and insert operation [DetectionOutput](../../../../ops/detection/DetectionOutput_1.md) instead of `Proposal`. The `DetectionOutput` operation does not require additional model input "image_info" and moreover, for some models the produced inference results are closer to the original TensorFlow\* model. In order to trigger new behaviour the attribute "operation_to_add" in the corresponding JSON transformation configuration file should be set to value "DetectionOutput" instead of default one "Proposal". -> * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the Inference Engine using dedicated reshape API. Refer to [Using Shape Inference](../../../../OV_Runtime_UG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size. -> * To generate IRs for TF 1 SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` operations instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the Inference Engine using dedicated Inference Engine API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape. +> * Starting with the 2021.1 release, the Model Optimizer converts the TensorFlow\* Object Detection API SSDs, Faster and Mask RCNNs topologies keeping shape-calculating sub-graphs by default, so topologies can be re-shaped in the OpenVINO Runtime using dedicated reshape API. Refer to [Using Shape Inference](../../../../OV_Runtime_UG/ShapeInference.md) for more information on how to use this feature. It is possible to change the both spatial dimensions of the input image and batch size. +> * To generate IRs for TF 1 SSD topologies, the Model Optimizer creates a number of `PriorBoxClustered` operations instead of a constant node with prior boxes calculated for the particular input image size. This change allows you to reshape the topology in the OpenVINO Runtime using dedicated API. The reshaping is supported for all SSD topologies except FPNs which contain hardcoded shapes for some operations preventing from changing topology input shape. ## How to Convert a Model @@ -45,7 +45,7 @@ To convert a TensorFlow\* Object Detection API model, go to the `/t * `--tensorflow_object_detection_api_pipeline_config ` --- A special configuration file that describes the topology hyper-parameters and structure of the TensorFlow Object Detection API model. For the models downloaded from the TensorFlow\* Object Detection API zoo, the configuration file is named `pipeline.config`. If you plan to train a model yourself, you can find templates for these files in the [models repository](https://github.com/tensorflow/models/tree/master/research/object_detection/samples/configs). * `--input_shape` (optional) --- A custom input image shape. Refer to [Custom Input Shape](#tf_od_custom_input_shape) for more information how the `--input_shape` parameter is handled for the TensorFlow* Object Detection API models. -> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. If you convert a TensorFlow\* Object Detection API model to use with the Inference Engine sample applications, you must specify the `--reverse_input_channels` parameter. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](../Converting_Model.md). +> **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. If you convert a TensorFlow\* Object Detection API model to use with the OpenVINO sample applications, you must specify the `--reverse_input_channels` parameter. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](../Converting_Model.md). Additionally to the mandatory parameters listed above you can use optional conversion parameters if needed. A full list of parameters is available in the [Converting a TensorFlow* Model](../Convert_Model_From_TensorFlow.md) topic. @@ -57,24 +57,24 @@ mo --input_model=/tmp/ssd_inception_v2_coco_2018_01_28/frozen_inference_graph.pb ## OpenVINO&; Toolkit Samples and Open Model Zoo Demos -Inference Engine comes with a number of samples to demonstrate use of OpenVINO API, additionally, +OpenVINO comes with a number of samples to demonstrate use of OpenVINO Runtime API, additionally, Open Model Zoo provides set of demo applications to show implementation of close to real life applications based on deep learning in various tasks, including Image Classifiacton, Visual Object Detection, Text Recognition, Speech Recognition, Natural Language Processing and others. Refer to the links below for more details. -* [Inference Engine Samples](../../../../OV_Runtime_UG/Samples_Overview.md) +* [OpenVINO Samples](../../../../OV_Runtime_UG/Samples_Overview.md) * [Open Model Zoo Demos](@ref omz_demos) ## Important Notes About Feeding Input Images to the Samples There are several important notes about feeding input images to the samples: -1. Inference Engine samples stretch input image to the size of the input operation without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for other models like Faster R-CNN, Mask R-CNN and R-FCN. These models usually use keeps aspect ratio resizer. The type of pre-processing is defined in the pipeline configuration file in the section `image_resizer`. If keeping aspect ratio is used, then it is necessary to resize image before passing it to the sample and optionally pad the resized image with 0s (if the attribute "pad_to_max_dimension" in the pipeline.config is equal to "true"). +1. OpenVINO samples stretch input image to the size of the input operation without preserving aspect ratio. This behavior is usually correct for most topologies (including SSDs), but incorrect for other models like Faster R-CNN, Mask R-CNN and R-FCN. These models usually use keeps aspect ratio resizer. The type of pre-processing is defined in the pipeline configuration file in the section `image_resizer`. If keeping aspect ratio is used, then it is necessary to resize image before passing it to the sample and optionally pad the resized image with 0s (if the attribute "pad_to_max_dimension" in the pipeline.config is equal to "true"). -2. TensorFlow\* implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like `.jpg`) could give different results in the sample and TensorFlow\*. So, if it is necessary to compare accuracy between the TensorFlow\* and the Inference Engine it is recommended to pass pre-resized input image in a non-compressed format (like `.bmp`). +2. TensorFlow\* implementation of image resize may be different from the one implemented in the sample. Even reading input image from compressed format (like `.jpg`) could give different results in the sample and TensorFlow\*. So, if it is necessary to compare accuracy between the TensorFlow\* and the OpenVINO it is recommended to pass pre-resized input image in a non-compressed format (like `.bmp`). -3. If you want to infer the model with the Inference Engine samples, convert the model specifying the `--reverse_input_channels` command line parameter. The samples load images in BGR channels order, while TensorFlow* models were trained with images in RGB order. When the `--reverse_input_channels` command line parameter is specified, the Model Optimizer performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order. +3. If you want to infer the model with the OpenVINO samples, convert the model specifying the `--reverse_input_channels` command line parameter. The samples load images in BGR channels order, while TensorFlow* models were trained with images in RGB order. When the `--reverse_input_channels` command line parameter is specified, the Model Optimizer performs first convolution or other channel dependent operation weights modification so the output will be like the image is passed with RGB channels order. 4. Read carefully messaged printed by the Model Optimizer during a model conversion. They contain important instructions on how to prepare input data before running the inference and how to interpret the output. diff --git a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md index 518fe816893..c4b5f75d0b0 100644 --- a/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md +++ b/docs/MO_DG/prepare_model/convert_model/tf_specific/Convert_Slim_Library_Models.md @@ -64,7 +64,7 @@ The `-b` command line parameter is required because the Model Optimizer cannot c Refer to the [Mean and Scale Values for TensorFlow\*-Slim Models](#tf_slim_mean_scale_values) for the information why `--mean_values` and `--scale` command line parameters are used. ## Mean and Scale Values for TensorFlow\*-Slim Models -The TensorFlow\*-Slim Models were trained with normalized input data. There are several different normalization algorithms used in the Slim library. Inference Engine classification sample does not perform image pre-processing except resizing to the input layer size. It is necessary to pass mean and scale values to the Model Optimizer so they are embedded into the generated IR in order to get correct classification results. +The TensorFlow\*-Slim Models were trained with normalized input data. There are several different normalization algorithms used in the Slim library. OpenVINO classification sample does not perform image pre-processing except resizing to the input layer size. It is necessary to pass mean and scale values to the Model Optimizer so they are embedded into the generated IR in order to get correct classification results. The file [preprocessing_factory.py](https://github.com/tensorflow/models/blob/master/research/slim/preprocessing/preprocessing_factory.py) contains a dictionary variable `preprocessing_fn_map` defining mapping between the model type and pre-processing function to be used. The function code should be analyzed to figure out the mean/scale values. @@ -83,7 +83,7 @@ The [inception_preprocessing.py](https://github.com/tensorflow/models/blob/maste Firstly, the `image` is converted to data type `tf.float32` and the values in the tensor are scaled to the `[0, 1]` range using the [tf.image.convert_image_dtype](https://www.tensorflow.org/api_docs/python/tf/image/convert_image_dtype) function. Then the `0.5` is subtracted from the image values and values multiplied by `2.0`. The final image range of values is `[-1, 1]`. -Inference Engine classification sample reads an input image as a three-dimensional array of integer values from the range `[0, 255]`. In order to scale them to `[-1, 1]` range, the mean value `127.5` for each image channel should be specified as well as scale factor `127.5`. +OpenVINO classification sample reads an input image as a three-dimensional array of integer values from the range `[0, 255]`. In order to scale them to `[-1, 1]` range, the mean value `127.5` for each image channel should be specified as well as scale factor `127.5`. Similarly, the mean/scale values can be determined for other Slim models. diff --git a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md index 77ed9d02cb7..2ff2fc05c05 100644 --- a/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md +++ b/docs/MO_DG/prepare_model/customize_model_optimizer/Customize_Model_Optimizer.md @@ -177,9 +177,9 @@ defined as a mathematical expression using the [ShapeOf](../../../ops/shape/Shap Model Optimizer calculates output shapes for all operations in a model to write them to Intermediate Representation files. -> **NOTE**: This is a legacy requirement because starting from IR version 10 Inference Engine needs to know shapes of +> **NOTE**: This is a legacy requirement because starting from IR version 10 OpenVINO Runtime needs to know shapes of > the [Const](../../../ops/infrastructure/Constant_1.md) and the [Parameter](../../../ops/infrastructure/Parameter_1.md) -> operations only. The nGraph component of the Inference Engine calculates output shapes for all operations in a model +> operations only. The OpenVINO Runtime calculates output shapes for all operations in a model > using shapes of [Parameter](../../../ops/infrastructure/Parameter_1.md) and > [Const](../../../ops/infrastructure/Constant_1.md) operations defined with respective operation attributes. @@ -303,7 +303,7 @@ The last phase of a model conversion is the Intermediate Representation emitting steps: 1. Iterates over all operation nodes in the graph and checks that all nodes have the `type` attribute set. This attribute -defines the operation type and is used in the Inference Engine to instantiate proper operation from the +defines the operation type and is used in the OpenVINO to instantiate proper operation from the [opset](@ref openvino_docs_ops_opset) specified in the `version` attribute of the node. If some node does not have attribute `type` or its values is equal to `None`, the Model Optimizer exits with an error. 2. Performs type inference of graph operations similar to the shape inference. Inferred data types are saved to a port diff --git a/docs/OV_Runtime_UG/Model_caching_overview.md b/docs/OV_Runtime_UG/Model_caching_overview.md index 07d8e871c18..074f471b4de 100644 --- a/docs/OV_Runtime_UG/Model_caching_overview.md +++ b/docs/OV_Runtime_UG/Model_caching_overview.md @@ -10,7 +10,7 @@ As described in the [OpenVINO™ Runtime User Guide](openvino_intro.md), a common application flow consists of the following steps: -1. **Create an Inference Engine Core object**: First step to manage available devices and read network objects +1. **Create a Core object**: First step to manage available devices and read network objects 2. **Read the Intermediate Representation**: Read an Intermediate Representation file into an object of the `InferenceEngine::CNNNetwork` @@ -72,9 +72,9 @@ To check in advance if a particular device supports model caching, your applicat
Python
@endsphinxdirective -As described in Inference Engine Developer Guide, a common application flow consists of the following steps: +As described in OpenVINO User Guide, a common application flow consists of the following steps: -1. **Create an Inference Engine Core Object** +1. **Create a Core Object** 2. **Read the Intermediate Representation** - Read an Intermediate Representation file into an object of the [ie_api.IENetwork](api/ie_python_api/_autosummary/openvino.inference_engine.IENetwork.html) 3. **Prepare inputs and outputs** 4. **Set configuration** - Pass device-specific loading configurations to the device diff --git a/docs/OV_Runtime_UG/auto_device_selection.md b/docs/OV_Runtime_UG/auto_device_selection.md index 285cdf453b8..e127f7e31b8 100644 --- a/docs/OV_Runtime_UG/auto_device_selection.md +++ b/docs/OV_Runtime_UG/auto_device_selection.md @@ -39,7 +39,7 @@ There are two ways to use Auto-device: Both methods allow limiting the list of device candidates for the AUTO plugin. -> **NOTE**: The Inference Engine lets you use "GPU" as an alias for "GPU.0" in function calls. +> **NOTE**: The OpenVINO Runtime lets you use "GPU" as an alias for "GPU.0" in function calls. The Auto-device plugin supports query device optimization capabilities in metric. @@ -49,7 +49,7 @@ The Auto-device plugin supports query device optimization capabilities in metric ### Enumerating Devices and Selection Logic -The Inference Engine now features a dedicated API to enumerate devices and their capabilities. +The OpenVINO Runtime API now features a dedicated methods to enumerate devices and their capabilities. See [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md). This is the example output from the sample (truncated to device names only): @@ -85,7 +85,7 @@ For example, CPU, dGPU and iGPU can support the following precision and optimiza In cases when loading the network to dGPU or iGPU fails, CPU is the fall-back choice. -According to the Auto-device selection logic from the previous section, tell the Inference Engine +According to the Auto-device selection logic from the previous section, tell the OpenVINO Runtime to use the most suitable device from available devices as follows: @snippet snippets/AUTO2.cpp part2 @@ -208,7 +208,7 @@ The Auto-device plugin supports query device optimization capabilities in metric ### Enumerating Devices and Selection Logic -The Inference Engine now features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md) for code. +The OpenVINO Runtime API now features a dedicated methods to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md) for code. This is the example output from the sample (truncated to device names only): diff --git a/docs/migration_ov_2_0/docs/common_inference_pipeline.md b/docs/OV_Runtime_UG/migration_ov_2_0/docs/common_inference_pipeline.md similarity index 100% rename from docs/migration_ov_2_0/docs/common_inference_pipeline.md rename to docs/OV_Runtime_UG/migration_ov_2_0/docs/common_inference_pipeline.md diff --git a/docs/migration_ov_2_0/docs/graph_construction.md b/docs/OV_Runtime_UG/migration_ov_2_0/docs/graph_construction.md similarity index 100% rename from docs/migration_ov_2_0/docs/graph_construction.md rename to docs/OV_Runtime_UG/migration_ov_2_0/docs/graph_construction.md diff --git a/docs/migration_ov_2_0/docs/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/docs/intro.md similarity index 100% rename from docs/migration_ov_2_0/docs/intro.md rename to docs/OV_Runtime_UG/migration_ov_2_0/docs/intro.md diff --git a/docs/OV_Runtime_UG/multi_device.md b/docs/OV_Runtime_UG/multi_device.md index e058b763886..f49a94cc0ca 100644 --- a/docs/OV_Runtime_UG/multi_device.md +++ b/docs/OV_Runtime_UG/multi_device.md @@ -44,7 +44,7 @@ Notice that the priorities of the devices can be changed in real time for the ex Finally, there is a way to specify number of requests that the Multi-Device will internally keep for each device. Suppose your original app was running 4 cameras with 4 inference requests. You would probably want to share these 4 requests between 2 devices used in MULTI. The easiest way is to specify a number of requests for each device using parentheses: "MULTI:CPU(2),GPU(2)" and use the same 4 requests in your app. However, such an explicit configuration is not performance-portable and hence not recommended. Instead, the better way is to configure the individual devices and query the resulting number of requests to be used at the application level (see [Configuring the Individual Devices and Creating the Multi-Device On Top](#configuring-the-individual-devices-and-creating-the-multi-device-on-top)). ### Enumerating Available Devices -The Inference Engine features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md). This is example output from the sample (truncated to device names only): +The OpenVINO Runtime API features a dedicated methods to enumerate devices and their capabilities. See the [Hello Query Device C++ Sample](../../samples/cpp/hello_query_device/README.md). This is example output from the sample (truncated to device names only): ```sh ./hello_query_device @@ -182,7 +182,7 @@ You can set the configuration directly as a string, or use the metric key `MULTI ### Enumerating Available Devices -The Inference Engine features a dedicated API to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md). This is example output from the sample (truncated to device names only): +The OpenVINO Runtime API features a dedicated methods to enumerate devices and their capabilities. See the [Hello Query Device Python Sample](../../samples/python/hello_query_device/README.md). This is example output from the sample (truncated to device names only): ```sh ./hello_query_device diff --git a/docs/OV_Runtime_UG/supported_plugins/GNA.md b/docs/OV_Runtime_UG/supported_plugins/GNA.md index 497dec14ac0..3426b10549f 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GNA.md +++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md @@ -242,7 +242,7 @@ For example, the Kaldi model optimizer inserts such a permute after convolution ## Operation Precision -Intel® GNA essentially operates in the low-precision mode, which represents a mix of 8-bit (`I8`), 16-bit (`I16`), and 32-bit (`I32`) integer computations. Outputs calculated using a reduced integer precision are different from the scores calculated using the floating point format, for example, `FP32` outputs calculated on CPU using the Inference Engine [CPU Plugin](CPU.md). +Intel® GNA essentially operates in the low-precision mode, which represents a mix of 8-bit (`I8`), 16-bit (`I16`), and 32-bit (`I32`) integer computations. Outputs calculated using a reduced integer precision are different from the scores calculated using the floating point format, for example, `FP32` outputs calculated on CPU using the OpenVINO [CPU device](CPU.md). Unlike other plugins supporting low-precision execution, the GNA plugin can calculate quantization factors at the model loading time, so you can run a model without calibration using the [Post-Training Optimization Tool](@ref pot_README). However, this mode may not provide satisfactory accuracy because the internal quantization algorithm is based on heuristics which may or may not be efficient, depending on the model and dynamic range of input data. diff --git a/docs/OV_Runtime_UG/supported_plugins/GPU.md b/docs/OV_Runtime_UG/supported_plugins/GPU.md index 3bba80568f7..06e110e26f0 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/GPU.md @@ -13,7 +13,7 @@ The GPU plugin uses the Intel® Compute Library for Deep Neural Networks (clDNN) to infer deep neural networks. clDNN is an open source performance library for Deep Learning (DL) applications intended for acceleration of Deep Learning Inference on Intel® Processor Graphics including Intel® HD Graphics, Intel® Iris® Graphics, Intel® Iris® Xe Graphics, and Intel® Iris® Xe MAX graphics. -For an in-depth description of clDNN, see [Inference Engine source files](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics). +For an in-depth description of clDNN, see [OpenVINO Runtime GPU plugin source files](https://github.com/openvinotoolkit/openvino/tree/master/src/plugins/intel_gpu/) and [Accelerate Deep Learning Inference with Intel® Processor Graphics](https://software.intel.com/en-us/articles/accelerating-deep-learning-inference-with-intel-processor-graphics). ## Device Naming Convention * Devices are enumerated as "GPU.X" where `X={0, 1, 2,...}`. Only Intel® GPU devices are considered. diff --git a/docs/OV_Runtime_UG/supported_plugins/HDDL.md b/docs/OV_Runtime_UG/supported_plugins/HDDL.md index 94fd2417a78..d4642f6fcae 100644 --- a/docs/OV_Runtime_UG/supported_plugins/HDDL.md +++ b/docs/OV_Runtime_UG/supported_plugins/HDDL.md @@ -2,7 +2,7 @@ ## Introducing the HDDL Plugin -The Inference Engine HDDL plugin was developed for inference with neural networks on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. It is designed for use cases that require large throughput for deep learning inference, up to dozens of times more than the MYRIAD Plugin. +The OpenVINO Runtime HDDL plugin was developed for inference with neural networks on Intel® Vision Accelerator Design with Intel® Movidius™ VPUs. It is designed for use cases that require large throughput for deep learning inference, up to dozens of times more than the MYRIAD Plugin. ## Configuring the HDDL Plugin diff --git a/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md b/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md index 8ccfe223e14..a5d1a46b67f 100644 --- a/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md +++ b/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md @@ -2,7 +2,7 @@ ## Introducing MYRIAD Plugin -The Inference Engine MYRIAD plugin has been developed for inference of neural networks on Intel® Neural Compute Stick 2. +The OpenVINO Runtime MYRIAD plugin has been developed for inference of neural networks on Intel® Neural Compute Stick 2. ## Configuring the MYRIAD Plugin diff --git a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md index 54aee5a84be..c8914472035 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md +++ b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md @@ -1,11 +1,11 @@ Supported Devices {#openvino_docs_IE_DG_supported_plugins_Supported_Devices} ================== -The Inference Engine can infer models in different formats with various input and output formats. This section provides supported and optimal configurations per device. In OpenVINO™ documentation, "device" refers to an Intel® processors used for inference, which can be a supported CPU, GPU, VPU (vision processing unit), or GNA (Gaussian neural accelerator coprocessor), or a combination of those devices. +The OpenVINO Runtime can infer models in different formats with various input and output formats. This section provides supported and optimal configurations per device. In OpenVINO™ documentation, "device" refers to an Intel® processors used for inference, which can be a supported CPU, GPU, VPU (vision processing unit), or GNA (Gaussian neural accelerator coprocessor), or a combination of those devices. > **NOTE**: With OpenVINO™ 2020.4 release, Intel® Movidius™ Neural Compute Stick is no longer supported. -The Inference Engine provides unique capabilities to infer deep learning models on the following device types with corresponding plugins: +The OpenVINO Runtime provides unique capabilities to infer deep learning models on the following device types with corresponding plugins: | Plugin | Device types | |------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -35,7 +35,7 @@ The table below shows the plugin libraries and additional dependencies for Linux ## Supported Configurations -The Inference Engine can inference models in different formats with various input and output formats. +The OpenVINO Runtime can inference models in different formats with various input and output formats. This page shows supported and optimal configurations for each plugin. ### Terminology diff --git a/docs/OV_Runtime_UG/supported_plugins/VPU.md b/docs/OV_Runtime_UG/supported_plugins/VPU.md index e7a0ce80c33..e1b2efcdf91 100644 --- a/docs/OV_Runtime_UG/supported_plugins/VPU.md +++ b/docs/OV_Runtime_UG/supported_plugins/VPU.md @@ -11,7 +11,7 @@ @endsphinxdirective -This chapter provides information on the Inference Engine plugins that enable inference of deep learning models on the supported VPU devices: +This chapter provides information on the OpenVINO Runtime plugins that enable inference of deep learning models on the supported VPU devices: * Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X — Supported by the [MYRIAD Plugin](MYRIAD.md) * Intel® Vision Accelerator Design with Intel® Movidius™ VPUs — Supported by the [HDDL Plugin](HDDL.md) diff --git a/docs/benchmarks/performance_benchmarks_faq.md b/docs/benchmarks/performance_benchmarks_faq.md index 2010111bf3b..b628b12f116 100644 --- a/docs/benchmarks/performance_benchmarks_faq.md +++ b/docs/benchmarks/performance_benchmarks_faq.md @@ -66,6 +66,6 @@ The CAFFE version of resnet-50, mobilenet-v1-1.0-224 and mobilenet-v2 have been The web site format has changed in order to support the more common search approach of looking for the performance of a given neural network model on different HW-platforms. As opposed to review a given HW-platform's performance on different neural network models. #### 13. How is Latency measured? -Latency is measured by running the OpenVINO™ inference engine in synchronous mode. In synchronous mode each frame or image is processed through the entire set of stages (pre-processing, inference, post-processing) before the next frame or image is processed. This KPI is relevant for applications where the inference on a single image is required, for example the analysis of an ultra sound image in a medical application or the analysis of a seismic image in the oil & gas industry. Other use cases include real-time or near real-time applications like an industrial robot's response to changes in its environment and obstacle avoidance for autonomous vehicles where a quick response to the result of the inference is required. +Latency is measured by running the OpenVINO™ Runtime in synchronous mode. In synchronous mode each frame or image is processed through the entire set of stages (pre-processing, inference, post-processing) before the next frame or image is processed. This KPI is relevant for applications where the inference on a single image is required, for example the analysis of an ultra sound image in a medical application or the analysis of a seismic image in the oil & gas industry. Other use cases include real-time or near real-time applications like an industrial robot's response to changes in its environment and obstacle avoidance for autonomous vehicles where a quick response to the result of the inference is required. For more complete information about performance and benchmark results, visit: [www.intel.com/benchmarks](https://www.intel.com/benchmarks) and [Optimization Notice](https://software.intel.com/articles/optimization-notice). [Legal Information](../Legal_Information.md). diff --git a/docs/benchmarks/performance_benchmarks_openvino.md b/docs/benchmarks/performance_benchmarks_openvino.md index ca55baf8c2c..89b282ebf6a 100644 --- a/docs/benchmarks/performance_benchmarks_openvino.md +++ b/docs/benchmarks/performance_benchmarks_openvino.md @@ -14,7 +14,7 @@ This benchmark setup includes a single machine on which both the benchmark application and the OpenVINO™ installation reside. -The benchmark application loads the Inference Engine (SW) at run time and executes inferences on the specified hardware inference engine, (CPU, GPU or VPU). The benchmark application measures the time spent on actual inferencing (excluding any pre or post processing) and then reports on the inferences per second (or Frames Per Second). For more information on the benchmark application, please also refer to the entry 5 of the [FAQ section](performance_benchmarks_faq.md). +The benchmark application loads the OpenVINO Runtime (SW) at runtime and executes inferences on the specified hardware (CPU, GPU or VPU). The benchmark application measures the time spent on actual inferencing (excluding any pre or post processing) and then reports on the inferences per second (or Frames Per Second). For more information on the benchmark application, please also refer to the entry 5 of the [FAQ section](performance_benchmarks_faq.md). Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). diff --git a/docs/benchmarks/performance_benchmarks_ovms.md b/docs/benchmarks/performance_benchmarks_ovms.md index 57495835d86..d7393aa5047 100644 --- a/docs/benchmarks/performance_benchmarks_ovms.md +++ b/docs/benchmarks/performance_benchmarks_ovms.md @@ -1,6 +1,6 @@ # OpenVINO™ Model Server Benchmark Results {#openvino_docs_performance_benchmarks_ovms} -OpenVINO™ Model Server is an open-source, production-grade inference platform that exposes a set of models via a convenient inference API over gRPC or HTTP/REST. It employs the inference engine libraries for from the Intel® Distribution of OpenVINO™ toolkit to extend workloads across Intel® hardware including CPU, GPU and others. +OpenVINO™ Model Server is an open-source, production-grade inference platform that exposes a set of models via a convenient inference API over gRPC or HTTP/REST. It employs the OpenVINO Runtime libraries for from the Intel® Distribution of OpenVINO™ toolkit to extend workloads across Intel® hardware including CPU, GPU and others. ![OpenVINO™ Model Server](../img/performance_benchmarks_ovms_01.png) diff --git a/docs/documentation.md b/docs/documentation.md index c42f93adaf4..8a3234ce8dc 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -52,8 +52,6 @@ Intel® Deep Learning Streamer openvino_docs_gapi_gapi_intro - OpenVX Developer Guide - OpenVX API Reference OpenCV* Developer Guide OpenCL™ Developer Guide @@ -108,5 +106,5 @@ The OpenVINO™ toolkit also works with the following media processing framework * [Intel® Deep Learning Streamer (Intel® DL Streamer)](@ref openvino_docs_dlstreamer) — A streaming media analytics framework based on GStreamer, for creating complex media analytics pipelines optimized for Intel hardware platforms. Go to the Intel® DL Streamer [documentation](https://dlstreamer.github.io/) website to learn more. * [Intel® oneAPI Video Processing Library (oneVPL)](https://www.intel.com/content/www/us/en/develop/documentation/oneapi-programming-guide/top/api-based-programming/intel-oneapi-video-processing-library-onevpl.html) — A programming interface for video decoding, encoding, and processing to build portable media pipelines on CPUs, GPUs, and other accelerators. -You can also add computer vision capabilities to your application using optimized versions of [OpenCV](https://opencv.org/) and [OpenVX](https://khronos.org/openvx). +You can also add computer vision capabilities to your application using optimized versions of [OpenCV](https://opencv.org/). diff --git a/docs/gapi/face_beautification.md b/docs/gapi/face_beautification.md index 25619ae8e0b..7026d9b98a0 100644 --- a/docs/gapi/face_beautification.md +++ b/docs/gapi/face_beautification.md @@ -338,7 +338,7 @@ The steps to get the masks are: Once the graph is fully expressed, we can finally compile it and run on real data. G-API graph compilation is the stage where the G-API framework actually understands which kernels and networks to use. This configuration happens via G-API compilation arguments. ### DNN Parameters -This sample is using OpenVINO™ Toolkit Inference Engine backend for DL inference, which is configured the following way: +This sample is using OpenVINO™ Toolkit OpenVINO Runtime backend for DL inference, which is configured the following way: ```cpp auto faceParams = cv::gapi::ie::Params { diff --git a/docs/gapi/gapi_face_analytics_pipeline.md b/docs/gapi/gapi_face_analytics_pipeline.md index 6b544485668..be07aaae573 100644 --- a/docs/gapi/gapi_face_analytics_pipeline.md +++ b/docs/gapi/gapi_face_analytics_pipeline.md @@ -41,7 +41,7 @@ Constructing a G-API graph for a video streaming case does not differ much from ### Declare Deep Learning topologies {#gapi_ifd_declaring_nets} -In contrast with traditional CV functions (see [core](https://docs.opencv.org/4.5.0/df/d1f/group__gapi__core.html) and [imgproc](https://docs.opencv.org/4.5.0/d2/d00/group__gapi__imgproc.html)) where G-API declares distinct operations for every function, inference in G-API is a single generic operation `cv::gapi::infer<>`. As usual, it is just an interface and it can be implemented in a number of ways under the hood. In OpenCV 4.2, only OpenVINO™ Inference Engine-based backend is available, and OpenCV's own DNN module-based backend is to come. +In contrast with traditional CV functions (see [core](https://docs.opencv.org/4.5.0/df/d1f/group__gapi__core.html) and [imgproc](https://docs.opencv.org/4.5.0/d2/d00/group__gapi__imgproc.html)) where G-API declares distinct operations for every function, inference in G-API is a single generic operation `cv::gapi::infer<>`. As usual, it is just an interface and it can be implemented in a number of ways under the hood. In OpenCV 4.2, only OpenVINO™ Runtime-based backend is available, and OpenCV's own DNN module-based backend is to come. `cv::gapi::infer<>` is _parametrized_ by the details of a topology we are going to execute. Like operations, topologies in G-API are strongly typed and are defined with a special macro `G_API_NET()`: @@ -126,7 +126,7 @@ G-API strictly separates construction from configuration -- with the idea to kee Platform-specific details arise when the pipeline is *compiled* -- i.e. is turned from a declarative to an executable form. The way *how* to run stuff is specified via compilation arguments, and new inference/streaming features are no exception from this rule. -G-API is built on backends which implement interfaces (see [Architecture](https://docs.opencv.org/4.5.0/de/d4d/gapi_hld.html) and [Kernels](kernel_api.md) for details) -- thus `cv::gapi::infer<>` is a function which can be implemented by different backends. In OpenCV 4.2, only OpenVINO™ Inference Engine backend for inference is available. Every inference backend in G-API has to provide a special parameterizable structure to express *backend-specific* neural network parameters -- and in this case, it is `cv::gapi::ie::Params`: +G-API is built on backends which implement interfaces (see [Architecture](https://docs.opencv.org/4.5.0/de/d4d/gapi_hld.html) and [Kernels](kernel_api.md) for details) -- thus `cv::gapi::infer<>` is a function which can be implemented by different backends. In OpenCV 4.2, only OpenVINO™ Runtime backend for inference is available. Every inference backend in G-API has to provide a special parameterizable structure to express *backend-specific* neural network parameters -- and in this case, it is `cv::gapi::ie::Params`: ```cpp auto det_net = cv::gapi::ie::Params { @@ -148,7 +148,7 @@ auto emo_net = cv::gapi::ie::Params { Here we define three parameter objects: `det_net`, `age_net`, and `emo_net`. Every object is a `cv::gapi::ie::Params` structure parametrization for each particular network we use. On a compilation stage, G-API automatically matches network parameters with their `cv::gapi::infer<>` calls in graph using this information. -Regardless of the topology, every parameter structure is constructed with three string arguments – specific to the OpenVINO™ Inference Engine: +Regardless of the topology, every parameter structure is constructed with three string arguments – specific to the OpenVINO™ Runtime: * Path to the topology's intermediate representation (.xml file); * Path to the topology's model weights (.bin file); diff --git a/docs/get_started.md b/docs/get_started.md index f576ab82b02..c55b1055421 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -41,7 +41,7 @@ .. toctree:: :maxdepth: 1 :hidden: - :caption: Inference Engine Code Samples + :caption: OpenVINO Code Samples openvino_docs_IE_DG_Samples_Overview @@ -87,7 +87,7 @@

Use a web-based version of OpenVINO with a Graphical User Interface. Installing a DL Workbench container is required.

-

Inference Engine samples

+

OpenVINO samples

See ready-made applications explaining OpenVINO features and various use-cases.

diff --git a/docs/get_started/get_started_demos.md b/docs/get_started/get_started_demos.md index 25eb5786d5e..73e6dc4c21e 100644 --- a/docs/get_started/get_started_demos.md +++ b/docs/get_started/get_started_demos.md @@ -39,15 +39,15 @@ To build OpenVINO samples: @sphinxdirective .. tab:: Linux - Go to the :doc:`Inference Engine Samples page ` and see the "Build the Sample Applications on Linux*" section. + Go to the :doc:`OpenVINO Samples page ` and see the "Build the Sample Applications on Linux*" section. .. tab:: Windows - Go to the :doc:`Inference Engine Samples page ` and see the "Build the Sample Applications on Microsoft Windows* OS" section. + Go to the :doc:`OpenVINO Samples page ` and see the "Build the Sample Applications on Microsoft Windows* OS" section. .. tab:: macOS - Go to the :doc:`Inference Engine Samples page ` and see the "Build the Sample Applications on macOS*" section. + Go to the :doc:`OpenVINO Samples page ` and see the "Build the Sample Applications on macOS*" section. @endsphinxdirective @@ -358,9 +358,9 @@ Your screen looks similar to this after the download: ## Step 2: Convert the Model with Model Optimizer -In this step, your trained models are ready to run through the Model Optimizer to convert them to the IR (Intermediate Representation) format. For most model types, this is required before using the Inference Engine with the model. +In this step, your trained models are ready to run through the Model Optimizer to convert them to the IR (Intermediate Representation) format. For most model types, this is required before using the OpenVINO Runtime with the model. -Models in the IR format always include an `.xml` and `.bin` file and may also include other files such as `.json` or `.mapping`. Make sure you have these files together in a single directory so the Inference Engine can find them. +Models in the IR format always include an `.xml` and `.bin` file and may also include other files such as `.json` or `.mapping`. Make sure you have these files together in a single directory so the OpenVINO Runtime can find them. REQUIRED: `model_name.xml` REQUIRED: `model_name.bin` @@ -393,7 +393,7 @@ Create an `` directory to contain the model's Intermediate Representatio @endsphinxdirective -The Inference Engine can perform inference on different precision formats, such as FP32, FP16, or INT8. To generate an IR with a specific precision, run the Model Optimizer with the appropriate `--data_type` option. +The OpenVINO Runtime can perform inference on different precision formats, such as FP32, FP16, or INT8. To generate an IR with a specific precision, run the Model Optimizer with the appropriate `--data_type` option. Generic Model Optimizer script: diff --git a/docs/glossary.md b/docs/glossary.md index 5e168add190..a0b9d690e5b 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -58,19 +58,19 @@ Glossary of terms used in the OpenVINO™ | Term | Description | | :--- |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Batch | Number of images to analyze during one call of infer. Maximum batch size is a property of the network and it is set before loading of the network to the plugin. In NHWC, NCHW and NCDHW image data layout representation, the N refers to the number of images in the batch | -| Tensor | Memory container used for storing inputs, outputs of the network, weights and biases of the layers | -| Device (Affinitity) | A preferred Intel(R) hardware device to run the inference (CPU, GPU, etc.) | -| Extensibility mechanism, Custom layers | The mechanism that provides you with capabilities to extend the OpenVINO™ Runtime and Model Optimizer so that they can work with topologies containing layers that are not yet supported | -| ov::Model | A class of the Model that OpenVINO™ Runtime reads from IR. Consists of topology, weights and biases | -| ov::CompiledModel | An instance of the loaded network which allows the OpenVINO™ Runtime to request (several) infer requests and perform inference synchronously or asynchronously | -| InferRequest | A class that represents the end point of inference on the model loaded to the plugin and represented by executable network. Inputs are set here, outputs should be requested from this interface as well | -| ov::ProfileInfo | Represents basic inference profiling information per layer | -| OpenVINO™ Runtime | A C++ library with a set of classes that you can use in your application to infer input data (images) and get the result | -| OpenVINO™ API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation, set input and output formats and execute the model on various devices | +| Batch | Number of images to analyze during one call of infer. Maximum batch size is a property of the model and it is set before compiling of the model by the device. In NHWC, NCHW and NCDHW image data layout representation, the N refers to the number of images in the batch | +| Tensor | Memory container used for storing inputs, outputs of the model, weights and biases of the operations | +| Device (Affinitity) | A preferred Intel(R) hardware device to run the inference (CPU, GPU, GNA, etc.) | +| Extensibility mechanism, Custom layers | The mechanism that provides you with capabilities to extend the OpenVINO™ Runtime and Model Optimizer so that they can work with models containing operations that are not yet supported | +| ov::Model | A class of the Model that OpenVINO™ Runtime reads from IR or converts from ONNX, PaddlePaddle formats. Consists of model structure, weights and biases | +| ov::CompiledModel | An instance of the compiled model which allows the OpenVINO™ Runtime to request (several) infer requests and perform inference synchronously or asynchronously | +| ov::InferRequest | A class that represents the end point of inference on the model compiled by the device and represented by a compiled model. Inputs are set here, outputs should be requested from this interface as well | +| ov::ProfilingInfo | Represents basic inference profiling information per operation | +| OpenVINO™ Runtime | A C++ library with a set of classes that you can use in your application to infer input tensors and get the results | +| OpenVINO™ API | The basic default API for all supported devices, which allows you to load a model from Intermediate Representation or convert from ONNX, PaddlePaddle file formars, set input and output formats and execute the model on various devices | | OpenVINO™ Core | OpenVINO™ Core is a software component that manages inference on certain Intel(R) hardware devices: CPU, GPU, MYRIAD, GNA, etc. | | ov::Layout | Image data layout refers to the representation of images batch. Layout shows a sequence of 4D or 5D tensor data in memory. A typical NCHW format represents pixel in horizontal direction, rows by vertical dimension, planes by channel and images into batch. See also [Layout API Overview](./OV_Runtime_UG/layout_overview.md) | -| ov::element::Type | Represents data element type. For example, f32 is 32-bit floating point, f16 is 16-bit floating point. Element type can be changed before loading the network to the plugin | +| ov::element::Type | Represents data element type. For example, f32 is 32-bit floating point, f16 is 16-bit floating point. | ## See Also diff --git a/docs/install_guides/deployment-manager-tool.md b/docs/install_guides/deployment-manager-tool.md index e64b8eb9aed..23e1ecebbae 100644 --- a/docs/install_guides/deployment-manager-tool.md +++ b/docs/install_guides/deployment-manager-tool.md @@ -157,7 +157,7 @@ The script successfully completes, and the deployment package is generated in th After the Deployment Manager has successfully completed, you can find the generated `.tar.gz` (for Linux or macOS) or `.zip` (for Windows) package in the output directory you specified. -To deploy the Inference Engine components from the development machine to the target system, perform the following steps: +To deploy the OpenVINO Runtime components from the development machine to the target system, perform the following steps: 1. Copy the generated archive to the target system using your preferred method. @@ -223,4 +223,4 @@ The package is unpacked to the destination directory and the following files and @endsphinxdirective -You have now finished the deployment of the Inference Engine components to the target system. +You have now finished the deployment of the OpenVINO Runtime components to the target system. diff --git a/docs/install_guides/installing-openvino-apt.md b/docs/install_guides/installing-openvino-apt.md index c2e45d71ad0..396f4c00735 100644 --- a/docs/install_guides/installing-openvino-apt.md +++ b/docs/install_guides/installing-openvino-apt.md @@ -140,5 +140,5 @@ If you are using Intel® Processor Graphics, Intel® Vision Accelerator Design w - OpenVINO™ toolkit online documentation: . - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - [OpenVINO Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide). -- For more information on Sample Applications, see the [Inference Engine Samples Overview](../OV_Runtime_UG/Samples_Overview.md). +- For more information on Sample Applications, see the [OpenVINO Samples Overview](../OV_Runtime_UG/Samples_Overview.md). - For IoT Libraries & Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/install_guides/installing-openvino-conda.md b/docs/install_guides/installing-openvino-conda.md index a6e7b55e5ef..38a77d7e45d 100644 --- a/docs/install_guides/installing-openvino-conda.md +++ b/docs/install_guides/installing-openvino-conda.md @@ -63,5 +63,5 @@ Now you can start developing your application. - OpenVINO™ toolkit online documentation: . - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - [OpenVINO Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide). -- For more information on Sample Applications, see the [Inference Engine Samples Overview](../OV_Runtime_UG/Samples_Overview.md). +- For more information on Sample Applications, see the [OpenVINO Samples Overview](../OV_Runtime_UG/Samples_Overview.md). - Intel® Distribution of OpenVINO™ toolkit Anaconda home page: [https://anaconda.org/intel/openvino-ie4py](https://anaconda.org/intel/openvino-ie4py) diff --git a/docs/install_guides/installing-openvino-images.md b/docs/install_guides/installing-openvino-images.md index 82cb0f4cdd5..101e247529b 100644 --- a/docs/install_guides/installing-openvino-images.md +++ b/docs/install_guides/installing-openvino-images.md @@ -10,5 +10,5 @@ You may install Intel® Distribution of OpenVINO™ toolkit from images and repo * [Yocto](installing-openvino-yocto.md) * [PyPI](installing-openvino-pip.md) -The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and you can build it for supported platforms using the Inference Engine Build Instructions. +The open source version is available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino) and you can build it for supported platforms using the OpenVINO Build Instructions. diff --git a/docs/install_guides/installing-openvino-overview.md b/docs/install_guides/installing-openvino-overview.md index b3243efaa31..2160bc11d3f 100644 --- a/docs/install_guides/installing-openvino-overview.md +++ b/docs/install_guides/installing-openvino-overview.md @@ -34,4 +34,4 @@ For C++ developers, you may choose one of the following installation options to > **NOTE**: From the 2022.1 release, the OpenVINO Development Tools can **only** be installed via PyPI. See [Install OpenVINO Development Tools](installing-model-dev-tools.md) for detailed steps. -Besides, the open source version is also available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). You can build it for supported platforms using the Inference Engine Build Instructions. +Besides, the open source version is also available in the [OpenVINO™ toolkit GitHub repository](https://github.com/openvinotoolkit/openvino/). You can build it for supported platforms using the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). diff --git a/docs/install_guides/installing-openvino-pip.md b/docs/install_guides/installing-openvino-pip.md index 79606ae18a5..08e60700142 100644 --- a/docs/install_guides/installing-openvino-pip.md +++ b/docs/install_guides/installing-openvino-pip.md @@ -38,4 +38,4 @@ For system requirements and more detailed steps, see . - [Model Optimizer Developer Guide](../MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). - [OpenVINO Runtime User Guide](../OV_Runtime_UG/OpenVINO_Runtime_User_Guide). -- For more information on Sample Applications, see the [Inference Engine Samples Overview](../OV_Runtime_UG/Samples_Overview.md). +- For more information on Sample Applications, see the [OpenVINO Samples Overview](../OV_Runtime_UG/Samples_Overview.md). - For IoT Libraries & Code Samples see the [Intel® IoT Developer Kit](https://github.com/intel-iot-devkit). diff --git a/docs/install_guides/pypi-openvino-dev.md b/docs/install_guides/pypi-openvino-dev.md index a7d39e61a70..4069e93f3e1 100644 --- a/docs/install_guides/pypi-openvino-dev.md +++ b/docs/install_guides/pypi-openvino-dev.md @@ -8,11 +8,11 @@ OpenVINO™ toolkit is a comprehensive toolkit for quickly developing applicatio | Component | Console Script | Description | |------------------|---------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [Model Optimizer](https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | `mo` |**Model Optimizer** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by Intel tools, especially the Inference Engine. 
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, and ONNX\*. | +| [Model Optimizer](https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html) | `mo` |**Model Optimizer** imports, converts, and optimizes models that were trained in popular frameworks to a format usable by OpenVINO components. 
Supported frameworks include Caffe\*, TensorFlow\*, MXNet\*, PaddlePaddle\*, and ONNX\*. | | [Benchmark Tool](https://docs.openvino.ai/latest/openvino_inference_engine_tools_benchmark_tool_README.html)| `benchmark_app` | **Benchmark Application** allows you to estimate deep learning inference performance on supported devices for synchronous and asynchronous modes. | | [Accuracy Checker](https://docs.openvino.ai/latest/omz_tools_accuracy_checker.html) and
[Annotation Converter](https://docs.openvino.ai/latest/omz_tools_accuracy_checker_annotation_converters.html) | `accuracy_check`
`convert_annotation` |**Accuracy Checker** is a deep learning accuracy validation tool that allows you to collect accuracy metrics against popular datasets. The main advantages of the tool are the flexibility of configuration and a set of supported datasets, preprocessing, postprocessing, and metrics.
**Annotation Converter** is a utility that prepares datasets for evaluation with Accuracy Checker. | | [Post-Training Optimization Tool](https://docs.openvino.ai/latest/pot_README.html)| `pot` |**Post-Training Optimization Tool** allows you to optimize trained models with advanced capabilities, such as quantization and low-precision optimizations, without the need to retrain or fine-tune models. Optimizations are also available through the [API](https://docs.openvino.ai/latest/pot_compression_api_README.html). | -| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/latest/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](https://docs.openvino.ai/latest/omz_models_group_public.html) and [Intel](https://docs.openvino.ai/latest/omz_models_group_intel.html)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with Model Optimizer. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the Inference Engine Intermediate Representation (IR) using Model Optimizer.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. +| [Model Downloader and other Open Model Zoo tools](https://docs.openvino.ai/latest/omz_tools_downloader.html)| `omz_downloader`
`omz_converter`
`omz_quantizer`
`omz_info_dumper`| **Model Downloader** is a tool for getting access to the collection of high-quality and extremely fast pre-trained deep learning [public](https://docs.openvino.ai/latest/omz_models_group_public.html) and [Intel](https://docs.openvino.ai/latest/omz_models_group_intel.html)-trained models. These free pre-trained models can be used to speed up the development and production deployment process without training your own models. The tool downloads model files from online sources and, if necessary, patches them to make them more usable with Model Optimizer. A number of additional tools are also provided to automate the process of working with downloaded models:
**Model Converter** is a tool for converting Open Model Zoo models that are stored in an original deep learning framework format into the OpenVINO Intermediate Representation (IR) using Model Optimizer.
**Model Quantizer** is a tool for automatic quantization of full-precision models in the IR format into low-precision versions using the Post-Training Optimization Tool.
**Model Information Dumper** is a helper utility for dumping information about the models to a stable, machine-readable format. The developer package also installs the OpenVINO™ Runtime package as a dependency. @@ -102,7 +102,7 @@ For example, to install and configure the components for working with TensorFlow ``` You will see the help message for Model Optimizer if installation finished successfully. -- To verify that Inference Engine from the **runtime package** is available, run the command below: +- To verify that OpenVINO Runtime from the **runtime package** is available, run the command below: ```sh python -c "from openvino.runtime import Core" ``` diff --git a/docs/optimization_guide/dldt_optimization_guide.md b/docs/optimization_guide/dldt_optimization_guide.md index 7381545800e..33b39bc1da8 100644 --- a/docs/optimization_guide/dldt_optimization_guide.md +++ b/docs/optimization_guide/dldt_optimization_guide.md @@ -15,7 +15,7 @@ Latency measures inference time (ms) required to process a single input. When it ## How to Improve Performance -> **NOTE**: Make sure that your model can be successfully inferred with OpenVINO Inference Engine. +> **NOTE**: Make sure that your model can be successfully inferred with OpenVINO Runtime. Inside OpenVINO there are two ways how to get better performance number: during developing and deployment your model. **It is possible to combine both developing and deployment optimizations**. From 53d3ef8eabe4f072d80a51906b94399d050d1680 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 25 Feb 2022 07:02:09 +0300 Subject: [PATCH 101/310] Removed ngraph mentions (#10647) --- docs/Doxyfile.config | 10 ---------- docs/documentation.md | 2 +- docs/ops/opset1.md | 2 +- docs/ops/opset2.md | 2 +- docs/ops/opset3.md | 2 +- docs/ops/opset4.md | 2 +- docs/ops/opset5.md | 2 +- docs/ops/opset6.md | 2 +- docs/ops/opset7.md | 2 +- docs/ops/opset8.md | 2 +- 10 files changed, 9 insertions(+), 19 deletions(-) diff --git a/docs/Doxyfile.config b/docs/Doxyfile.config index 7c3614c5f10..056f005fc73 100644 --- a/docs/Doxyfile.config +++ b/docs/Doxyfile.config @@ -843,16 +843,6 @@ INPUT = "@MARKDOWN_INPUT@" \ "@OpenVINO_SOURCE_DIR@/src/common/transformations/include/" \ "@OpenVINO_SOURCE_DIR@/src/common/util/include/" \ "@OpenVINO_SOURCE_DIR@/src/core/include/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/descriptor" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/op/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/op/util" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/opsets/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/pass/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/pattern/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/pattern/op/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/runtime/" \ - "@OpenVINO_SOURCE_DIR@/src/core/include/ngraph/type/" \ "@OpenVINO_SOURCE_DIR@/src/core/include/openvino/" \ "@OpenVINO_SOURCE_DIR@/src/core/include/openvino/core/" \ "@OpenVINO_SOURCE_DIR@/src/core/include/openvino/core/descriptor/" \ diff --git a/docs/documentation.md b/docs/documentation.md index 8a3234ce8dc..ba726707a03 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -91,7 +91,7 @@ This section provides reference documents that guide you through developing your With the [Model Downloader](@ref omz_tools_downloader) and [Model Optimizer](MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) guides, you will learn to download pre-trained models and convert them for use with the OpenVINO™ toolkit. You can provide your own model or choose a public or Intel model from a broad selection provided in the [Open Model Zoo](model_zoo.md). ## Deploying Inference -The [OpenVINO™ Runtime User Guide](OV_Runtime_UG/openvino_intro.md) explains the process of creating your own application that runs inference with the OpenVINO™ toolkit. The [API Reference](./api_references.html) defines the Inference Engine API for Python, C++, and C and the nGraph API for Python and C++. The Inference Engine API is what you'll use to create an OpenVINO™ application, while the nGraph API is available for using enhanced operations sets and other features. After writing your application, you can use the [Deployment Manager](install_guides/deployment-manager-tool.md) for deploying to target devices. +The [OpenVINO™ Runtime User Guide](OV_Runtime_UG/openvino_intro.md) explains the process of creating your own application that runs inference with the OpenVINO™ toolkit. The [API Reference](./api_references.html) defines the OpenVINO Runtime API for Python, C++, and C. The OpenVINO Runtime API is what you'll use to create an OpenVINO™ inference application, use enhanced operations sets and other features. After writing your application, you can use the [Deployment Manager](install_guides/deployment-manager-tool.md) for deploying to target devices. ## Tuning for Performance The toolkit provides a [Performance Optimization Guide](optimization_guide/dldt_optimization_guide.md) and utilities for squeezing the best performance out of your application, including [Accuracy Checker](@ref omz_tools_accuracy_checker), [Post-Training Optimization Tool](@ref pot_README), and other tools for measuring accuracy, benchmarking performance, and tuning your application. diff --git a/docs/ops/opset1.md b/docs/ops/opset1.md index 3eee791065d..fc4db9ce049 100644 --- a/docs/ops/opset1.md +++ b/docs/ops/opset1.md @@ -3,7 +3,7 @@ This specification document describes `opset1` operation set supported in OpenVINO. Support for each particular operation from the list below depends on the capabilities available in a inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are expressed as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset1`. diff --git a/docs/ops/opset2.md b/docs/ops/opset2.md index 04f9dfe048f..3ff00c6b762 100644 --- a/docs/ops/opset2.md +++ b/docs/ops/opset2.md @@ -3,7 +3,7 @@ This specification document describes `opset2` operation set supported in OpenVINO. Support for each particular operation from the list below depends on the capabilities available in a inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are expressed as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset2`. diff --git a/docs/ops/opset3.md b/docs/ops/opset3.md index 525e25d3449..dfdf64710be 100644 --- a/docs/ops/opset3.md +++ b/docs/ops/opset3.md @@ -3,7 +3,7 @@ This specification document describes `opset3` operation set supported in OpenVINO. Support for each particular operation from the list below depends on the capabilities available in a inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are expressed as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset3`. diff --git a/docs/ops/opset4.md b/docs/ops/opset4.md index b20fe4ac619..96e864bb5ae 100644 --- a/docs/ops/opset4.md +++ b/docs/ops/opset4.md @@ -3,7 +3,7 @@ This specification document describes `opset4` operation set supported in OpenVINO. Support for each particular operation from the list below depends on the capabilities available in a inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are expressed as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset4`. diff --git a/docs/ops/opset5.md b/docs/ops/opset5.md index f980a96a043..d0c6653a0c6 100644 --- a/docs/ops/opset5.md +++ b/docs/ops/opset5.md @@ -3,7 +3,7 @@ This specification document describes `opset5` operation set supported in OpenVINO. Support for each particular operation from the list below depends on the capabilities available in a inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are expressed as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset5`. diff --git a/docs/ops/opset6.md b/docs/ops/opset6.md index 3154484d56e..a2f35e51834 100644 --- a/docs/ops/opset6.md +++ b/docs/ops/opset6.md @@ -3,7 +3,7 @@ This specification document describes `opset6` operation set supported in OpenVINO. Support for each particular operation from the list below depends on the capabilities available in a inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are expressed as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset6`. diff --git a/docs/ops/opset7.md b/docs/ops/opset7.md index 8a786fc2b39..95a0734fa89 100644 --- a/docs/ops/opset7.md +++ b/docs/ops/opset7.md @@ -3,7 +3,7 @@ This specification document describes the `opset7` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities available in an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset7`. diff --git a/docs/ops/opset8.md b/docs/ops/opset8.md index a6274cdf968..70a9e98fecb 100644 --- a/docs/ops/opset8.md +++ b/docs/ops/opset8.md @@ -3,7 +3,7 @@ This specification document describes the `opset8` operation set supported in OpenVINO™. Support for each particular operation from the list below depends on the capabilities of an inference plugin and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR V10 xml -snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding OpenVINO operation classes declared in `namespace opset8`. From 6062e3d4b788b1d9bbc596d4dab277247df9e980 Mon Sep 17 00:00:00 2001 From: Nikolay Tyukaev Date: Fri, 25 Feb 2022 10:34:11 +0300 Subject: [PATCH 102/310] DOCS: benchmarks ovino vs tf (#10654) * benchmarks-ovino-vs-tf * minor fixes --- docs/_static/js/graphs_ov_tf.js | 109 ++++++++++++++++++++++++++++++++ docs/_templates/layout.html | 2 - docs/conf.py | 1 + 3 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 docs/_static/js/graphs_ov_tf.js diff --git a/docs/_static/js/graphs_ov_tf.js b/docs/_static/js/graphs_ov_tf.js new file mode 100644 index 00000000000..bf16e9dacc5 --- /dev/null +++ b/docs/_static/js/graphs_ov_tf.js @@ -0,0 +1,109 @@ +$(document).ready(function () { + var chartBlock = $('.chart-block-tf-ov'); + chartBlock.each(function () { + var url = $(this).data('loadcsv'); + Papa.parse(url, { + download: true, + complete: renderData($(this)) + }) + }); + + function getLabels(data) { + return data + .map((item) => item[1]); + } + + function getChartOptions(title, displayLabels) { + return { + responsive: false, + maintainAspectRatio: false, + legend: { display: true, position: 'bottom' }, + title: { + display: true, + text: title + }, + scales: { + xAxes: [{ + ticks: { + beginAtZero: true + } + }], + yAxes: [{ + ticks: { + display: displayLabels, //this will remove only the label + beginAtZero: true + } + }] + }, + plugins: { + datalabels: { + color: "#4A4A4A", + anchor: "end", + align: "end", + clamp: false, + offset: 0, + display: true, + font: { + size: 8, + family: 'Roboto' + } + } + } + } + } + + function getChartData(data) { + function getDataset(data, col, label, color) { + return { + label: label, + data: data.map(function (item) { + return item[col] + }), + backgroundColor: color, + borderColor: 'rgba(170,170,170,0)', + barThickness: 12 + } + } + return { + labels: getLabels(data), + datasets: [getDataset(data, 2, 'openvino', '#00C7FD'), getDataset(data, 3, 'TF', '#8F5DA2')] + }; + } + + function renderData(currentChart) { + return function (result) { + var data = result.data; + // remove col names + data.shift(0); + var chartName = data[1][0]; + var chartSlug = chartName.replace(')', '').replace(' (', '-'); + var graphContainer = $('
'); + var chartContainer = $('
'); + graphContainer.attr('id', 'ov-graph-container-' + chartSlug); + chartContainer.addClass('chart-container'); + chartContainer.addClass('container'); + var chartWrap = $('
'); + chartWrap.addClass('chart-wrap'); + chartWrap.addClass('container'); + chartContainer.append(chartWrap); + var chart = $('
'); + chart.addClass('chart'); + chart.addClass('col-md-12'); + var canvas = $(''); + chart.append(canvas); + var container = $('
'); + container.addClass('row'); + container.append(chart); + var context = canvas.get(0).getContext('2d'); + context.canvas.width = context.canvas.width * 2.5; + var chartTitle = chartName + ', Throughput (FPS) Precision: FP32 (Higher is better)'; + new Chart(context, { + type: 'horizontalBar', + data: getChartData(data), + options: getChartOptions(chartTitle, true) + }); + chartContainer.append(container); + currentChart.append(chartContainer); + } + } +}); diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index f12ea6528e0..de7a83e8c19 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -12,7 +12,5 @@ - - {% endblock %} diff --git a/docs/conf.py b/docs/conf.py index 49222306ee0..54cd19265c4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -150,6 +150,7 @@ def setup(app): app.add_config_value('repositories', repositories, rebuild=True) app.add_js_file('js/custom.js') app.add_js_file('js/graphs.js') + app.add_js_file('js/graphs_ov_tf.js') try: shutil.copytree(os.path.join(app.srcdir, 'csv'), os.path.join(app.outdir, 'csv'), dirs_exist_ok=True) except FileNotFoundError: From 9e3610c02808df435eceab5a61368fce14a1c508 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 25 Feb 2022 10:55:59 +0300 Subject: [PATCH 103/310] [CPU] Fix for subnormal numbers nullifying routine (#10622) --- src/plugins/intel_cpu/src/nodes/input.cpp | 2 +- .../cpu/subgraph_tests/src/denormal_check.cpp | 110 ++++++++++++++++++ .../desktop_references_config.xml | 6 +- 3 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index bcbe95ddf91..83f2d098ef2 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -249,7 +249,7 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const void MKLDNNInputNode::cloneBlobIfRequired() { Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); - const size_t size = shape.getRank(); + const size_t size = shape.getElementsCount(); DnnlBlockedMemoryDesc memDesc(prec, shape); auto cloneBlob = [&, this] () { diff --git a/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp b/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp new file mode 100644 index 00000000000..6127fb524fb --- /dev/null +++ b/src/tests/functional/plugin/cpu/subgraph_tests/src/denormal_check.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" +#include "ngraph/runtime/aligned_buffer.hpp" + +using namespace InferenceEngine; +using namespace ov::test; +namespace SubgraphTestsDefinitions { + +template +class AlignedBufferWrapper { +public: + AlignedBufferWrapper(size_t size, size_t alignment) { + _buffer.reset(new ngraph::runtime::AlignedBuffer(size * sizeof(T), alignment)); + } + AlignedBufferWrapper(const AlignedBufferWrapper&) = delete; + AlignedBufferWrapper& operator=(const AlignedBufferWrapper&) = delete; + AlignedBufferWrapper(AlignedBufferWrapper&&) = default; + AlignedBufferWrapper& operator=(AlignedBufferWrapper&&) = default; + + T* get_ptr() { + return _buffer->get_ptr(); + } + + size_t size() const { + return _buffer->size() / sizeof(T); + } +private: + std::unique_ptr _buffer = nullptr; +}; + +class DenormalNullifyCheck : public SubgraphBaseTest { +protected: +std::unique_ptr> pConstStorage; + +void validate() override { + const auto& actualOutputs = get_plugin_outputs(); + ASSERT_FALSE(actualOutputs.empty()); + auto& outTensor = actualOutputs.front(); + ASSERT_EQ(ov::element::f32, outTensor.get_element_type()) << "Unexpected element type"; + const uint32_t* data = reinterpret_cast(outTensor.data()); + bool hasDenormals = false; + for (size_t i = 0; i < outTensor.get_size(); ++i) { + if (data[i] && (data[i] & (0xff << 23)) == 0) { + hasDenormals = true; + } + } + ASSERT_FALSE(hasDenormals); +} + + +void SetUp() override { + constexpr size_t alignment = 64; // bytes cache line size, to avoid denormals zeroing due to memory reallocation in the input node implementation + const ov::Shape inpShape = {1, 24, 3, 3}; + targetStaticShapes.push_back({inpShape}); + targetDevice = CommonTestUtils::DEVICE_CPU; + + const auto elemsCount = shape_size(inpShape); + const auto rtPrc = ov::element::f32; + auto params = ngraph::builder::makeParams(rtPrc, {inpShape}); + pConstStorage.reset(new AlignedBufferWrapper(elemsCount, alignment)); + + auto constTensor = std::make_shared(rtPrc, inpShape, pConstStorage->get_ptr()); + auto constNode = std::make_shared(constTensor); + ov::NodeVector input = {params[0], constNode}; + auto concat = std::make_shared(input, 1); + + ov::ResultVector results{std::make_shared(concat->output(0))}; + + function = std::make_shared(results, params, "denormal_check"); +} +}; + +TEST_F(DenormalNullifyCheck, smoke_CPU_Denormal_Check) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + using indexInterval = std::pair; + size_t elemsCount = pConstStorage->size(); + const indexInterval intervals[] = { + {0, elemsCount/2}, + {elemsCount/2, elemsCount}, + {0, elemsCount} + }; + + constexpr unsigned seed = 1u; + constexpr unsigned denormalsCount = 15u; + constexpr uint32_t denormalsRange = (0xffffffffu >> 9u) - 1; + testing::internal::Random random(seed); + auto randomRange = NGraphFunctions::Utils::generateVector(elemsCount, 10, -10); + + for (auto& interval : intervals) { + auto randomIndices = NGraphFunctions::Utils::generateVector(denormalsCount, interval.second, interval.first); + std::unordered_set randomIndexSet(randomIndices.begin(), randomIndices.end()); + for (size_t i = 0; i < elemsCount; ++i) { + if (randomIndexSet.count(i)) { + auto denormal = random.Generate(denormalsRange) + 1; + pConstStorage->get_ptr()[i] = *(reinterpret_cast(&denormal)); + } else { + pConstStorage->get_ptr()[i] = randomRange[i]; + } + } + + run(); + } +} +}// namespace SubgraphTestsDefinitions \ No newline at end of file diff --git a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml index b0db2e425f4..6bc58fb4ce8 100644 --- a/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml +++ b/tests/stress_tests/.automation/memcheck_tests/precommit_configs/desktop_references_config.xml @@ -46,18 +46,18 @@ - + # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"} - + # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"} - + # values from {"commit_id": "25c76471d78628aa772f3a7e341ae915bdce6026", "commit_date": "2022-02-23 15:55"} From bacf597516447ecd196ba80b58a912e08be9a035 Mon Sep 17 00:00:00 2001 From: Dmitry Pigasin Date: Fri, 25 Feb 2022 11:25:35 +0300 Subject: [PATCH 104/310] [CPP Speech Sample] Improve `-o` and `-oname` flags (#10321) * Improve `-o` and `-oname` flags * Apply clang-format tool * fix saving output files * Apply clang-format * Fix error when `-oname` not specified * apply clang format * Fix error `-oname` * Use output name with port to find model output * fix comment line breaking * fix comparison with reference for multiple outputs * Fix output name printing error * try to fix clang format * fix problem with bs > 1 * minimal change to rerun test pipeline * clang format * Revert "Fix error `-oname`" This reverts commit c33d5f16e8b6a1c3f3863c52f6b50e52d99031b7. --- samples/cpp/speech_sample/README.md | 2 +- samples/cpp/speech_sample/main.cpp | 511 +++++++++++++++------------- 2 files changed, 273 insertions(+), 240 deletions(-) diff --git a/samples/cpp/speech_sample/README.md b/samples/cpp/speech_sample/README.md index 74ce696a106..dde7d168d04 100644 --- a/samples/cpp/speech_sample/README.md +++ b/samples/cpp/speech_sample/README.md @@ -1,6 +1,6 @@ # Automatic Speech Recognition C++ Sample {#openvino_inference_engine_samples_speech_sample_README} -This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors. +This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors. The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores. diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index 93c13d53bc6..135f310aa26 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -86,10 +86,11 @@ int main(int argc, char* argv[]) { uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs; std::shared_ptr model; std::vector outputs; + std::vector output_names; std::vector ports; // --------------------------- Processing custom outputs --------------------------------------------- if (!FLAGS_oname.empty()) { - std::vector output_names = convert_str_to_vector(FLAGS_oname); + output_names = convert_str_to_vector(FLAGS_oname); for (const auto& output_name : output_names) { auto pos_layer = output_name.rfind(":"); if (pos_layer == std::string::npos) { @@ -248,10 +249,9 @@ int main(int argc, char* argv[]) { auto t0 = Time::now(); ms loadTime = std::chrono::duration_cast(Time::now() - t0); slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl; - slog::info << "Loading model to the device " << FLAGS_d << slog::endl; ov::CompiledModel executableNet; if (!FLAGS_m.empty()) { - slog::info << "Loading model to the device" << slog::endl; + slog::info << "Loading model to the device " << FLAGS_d << slog::endl; executableNet = core.compile_model(model, deviceStr, genericPluginConfig); } else { slog::info << "Importing model to the device" << slog::endl; @@ -344,157 +344,184 @@ int main(int argc, char* argv[]) { } // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 5. Do inference -------------------------------------------------------- - for (size_t next_output = 0; next_output < count_file; next_output++) { - std::vector> ptrUtterances; - std::vector ptrScores; - std::vector ptrReferenceScores; - ScoreErrorT frameError, totalError; - ptrUtterances.resize(inputFiles.size()); - // initialize memory state before starting - for (auto&& state : inferRequests.begin()->inferRequest.query_state()) { - state.reset(); - } - /** Work with each utterance **/ - for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) { - std::map utterancePerfMap; - uint64_t totalNumberOfRunsOnHw = 0; - std::string uttName; - uint32_t numFrames(0), n(0); - std::vector numFrameElementsInput; - uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0), - numBytesReferenceScoreThisUtterance(0); - auto dims = executableNet.outputs()[0].get_shape(); - const auto numScoresPerFrame = - std::accumulate(std::begin(dims), std::end(dims), size_t{1}, std::multiplies()); - slog::info << "Number scores per frame : " << numScoresPerFrame << slog::endl; - /** Get information from input file for current utterance **/ - numFrameElementsInput.resize(numInputFiles); - for (size_t i = 0; i < inputFiles.size(); i++) { - std::vector ptrUtterance; - auto inputFilename = inputFiles[i].c_str(); - uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0); - file->get_file_info(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]); - ptrUtterance.resize(numBytesThisUtterance[i]); - file->load_file(inputFilename, - utteranceIndex, - uttName, - ptrUtterance, - ¤tNumFrames, - ¤tNumFrameElementsInput, - ¤tNumBytesPerElementInput); - if (numFrames == 0) { - numFrames = currentNumFrames; - } else if (numFrames != currentNumFrames) { - std::string errMessage("Number of frames in input files is different: " + - std::to_string(numFrames) + " and " + std::to_string(currentNumFrames)); - throw std::logic_error(errMessage); - } - ptrUtterances[i] = ptrUtterance; - numFrameElementsInput[i] = currentNumFrameElementsInput; - } - int i = 0; - for (auto& ptrInputBlob : ptrInputBlobs) { - if (ptrInputBlob.get_size() != numFrameElementsInput[i++] * batchSize) { - throw std::logic_error("network input size(" + std::to_string(ptrInputBlob.get_size()) + - ") mismatch to input file size (" + - std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")"); - } - } - ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float)); - if (!FLAGS_r.empty()) { - /** Read file with reference scores **/ - BaseFile* fileReferenceScores; - auto exReferenceScoresFile = fileExt(FLAGS_r); - if (exReferenceScoresFile == "ark") { - fileReferenceScores = &arkFile; - } else if (exReferenceScoresFile == "npz") { - fileReferenceScores = &numpyFile; - } else { - throw std::logic_error("Invalid Reference Scores file"); - } - std::string refUtteranceName; - fileReferenceScores->get_file_info(reference_name_files[next_output].c_str(), - utteranceIndex, - &n, - &numBytesReferenceScoreThisUtterance); - ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance); - fileReferenceScores->load_file(reference_name_files[next_output].c_str(), - utteranceIndex, - refUtteranceName, - ptrReferenceScores, - &numFramesReference, - &numFrameElementsReference, - &numBytesPerElementReference); - } - double totalTime = 0.0; - std::cout << "Utterance " << utteranceIndex << ": " << std::endl; - clear_score_error(&totalError); - totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE; - auto outputFrame = &ptrScores.front(); - std::vector inputFrame; - for (auto& ut : ptrUtterances) { - inputFrame.push_back(&ut.front()); - } - std::map callPerfMap; - size_t frameIndex = 0; - uint32_t numFramesFile = numFrames; - numFrames += FLAGS_cw_l + FLAGS_cw_r; - uint32_t numFramesThisBatch{batchSize}; - auto t0 = Time::now(); - auto t1 = t0; - while (frameIndex <= numFrames) { - if (frameIndex == numFrames) { - if (std::find_if(inferRequests.begin(), inferRequests.end(), [&](InferRequestStruct x) { - return (x.frameIndex != -1); - }) == inferRequests.end()) { - break; - } - } - bool inferRequestFetched = false; - /** Start inference loop **/ - for (auto& inferRequest : inferRequests) { - if (frameIndex == numFrames) { - numFramesThisBatch = 1; - } else { - numFramesThisBatch = - (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize; - } + std::vector> ptrUtterances; + std::vector> vectorPtrScores((outputs.size() == 0) ? 1 : outputs.size()); + std::vector numScoresPerOutput((outputs.size() == 0) ? 1 : outputs.size()); + std::vector> vectorPtrReferenceScores(reference_name_files.size()); + std::vector vectorFrameError(reference_name_files.size()), + vectorTotalError(reference_name_files.size()); + ptrUtterances.resize(inputFiles.size()); + // initialize memory state before starting + for (auto&& state : inferRequests.begin()->inferRequest.query_state()) { + state.reset(); + } + /** Work with each utterance **/ + for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) { + std::map utterancePerfMap; + uint64_t totalNumberOfRunsOnHw = 0; + std::string uttName; + uint32_t numFrames(0), n(0); + std::vector numFrameElementsInput; + std::vector numFramesReference(reference_name_files.size()), + numFrameElementsReference(reference_name_files.size()), + numBytesPerElementReference(reference_name_files.size()), + numBytesReferenceScoreThisUtterance(reference_name_files.size()); + + /** Get information from input file for current utterance **/ + numFrameElementsInput.resize(numInputFiles); + for (size_t i = 0; i < inputFiles.size(); i++) { + std::vector ptrUtterance; + auto inputFilename = inputFiles[i].c_str(); + uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0); + file->get_file_info(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]); + ptrUtterance.resize(numBytesThisUtterance[i]); + file->load_file(inputFilename, + utteranceIndex, + uttName, + ptrUtterance, + ¤tNumFrames, + ¤tNumFrameElementsInput, + ¤tNumBytesPerElementInput); + if (numFrames == 0) { + numFrames = currentNumFrames; + } else if (numFrames != currentNumFrames) { + std::string errMessage("Number of frames in input files is different: " + + std::to_string(numFrames) + " and " + std::to_string(currentNumFrames)); + throw std::logic_error(errMessage); + } + ptrUtterances[i] = ptrUtterance; + numFrameElementsInput[i] = currentNumFrameElementsInput; + } + int i = 0; + for (auto& ptrInputBlob : ptrInputBlobs) { + if (ptrInputBlob.get_size() != numFrameElementsInput[i++] * batchSize) { + throw std::logic_error("network input size(" + std::to_string(ptrInputBlob.get_size()) + + ") mismatch to input file size (" + + std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")"); + } + } + + double totalTime = 0.0; + + for (size_t errorIndex = 0; errorIndex < vectorFrameError.size(); errorIndex++) { + clear_score_error(&vectorTotalError[errorIndex]); + vectorTotalError[errorIndex].threshold = vectorFrameError[errorIndex].threshold = MAX_SCORE_DIFFERENCE; + } + + std::vector inputFrame; + for (auto& ut : ptrUtterances) { + inputFrame.push_back(&ut.front()); + } + std::map callPerfMap; + size_t frameIndex = 0; + uint32_t numFramesFile = numFrames; + numFrames += FLAGS_cw_l + FLAGS_cw_r; + uint32_t numFramesThisBatch{batchSize}; + auto t0 = Time::now(); + auto t1 = t0; + + BaseFile* fileReferenceScores; + std::string refUtteranceName; + + if (!FLAGS_r.empty()) { + /** Read file with reference scores **/ + auto exReferenceScoresFile = fileExt(FLAGS_r); + if (exReferenceScoresFile == "ark") { + fileReferenceScores = &arkFile; + } else if (exReferenceScoresFile == "npz") { + fileReferenceScores = &numpyFile; + } else { + throw std::logic_error("Invalid Reference Scores file"); + } + for (size_t next_output = 0; next_output < count_file; next_output++) { + if (fileReferenceScores != nullptr) { + fileReferenceScores->get_file_info(reference_name_files[next_output].c_str(), + utteranceIndex, + &n, + &numBytesReferenceScoreThisUtterance[next_output]); + vectorPtrReferenceScores[next_output].resize(numBytesReferenceScoreThisUtterance[next_output]); + fileReferenceScores->load_file(reference_name_files[next_output].c_str(), + utteranceIndex, + refUtteranceName, + vectorPtrReferenceScores[next_output], + &numFramesReference[next_output], + &numFrameElementsReference[next_output], + &numBytesPerElementReference[next_output]); + } + } + } + + while (frameIndex <= numFrames) { + if (frameIndex == numFrames) { + if (std::find_if(inferRequests.begin(), inferRequests.end(), [&](InferRequestStruct x) { + return (x.frameIndex != -1); + }) == inferRequests.end()) { + break; + } + } + bool inferRequestFetched = false; + /** Start inference loop **/ + for (auto& inferRequest : inferRequests) { + if (frameIndex == numFrames) { + numFramesThisBatch = 1; + } else { + numFramesThisBatch = + (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize; + } + + /* waits until inference result becomes available */ + if (inferRequest.frameIndex != -1) { + inferRequest.inferRequest.wait(); + if (inferRequest.frameIndex >= 0) + for (size_t next_output = 0; next_output < count_file; next_output++) { + std::string outputName = (outputs.size() == 0) ? executableNet.output(0).get_any_name() + : output_names[next_output]; + auto dims = executableNet.output(outputName).get_shape(); + numScoresPerOutput[next_output] = std::accumulate(std::begin(dims), + std::end(dims), + size_t{1}, + std::multiplies()); + + vectorPtrScores[next_output].resize(numFramesFile * numScoresPerOutput[next_output] * + sizeof(float)); - /* waits until inference result becomes available */ - if (inferRequest.frameIndex != -1) { - inferRequest.inferRequest.wait(); - if (inferRequest.frameIndex >= 0) { if (!FLAGS_o.empty()) { /* Prepare output data for save to file in future */ - outputFrame = &ptrScores.front() + - numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex); + auto outputFrame = + &vectorPtrScores[next_output].front() + + numScoresPerOutput[next_output] * sizeof(float) * (inferRequest.frameIndex); ov::Tensor outputBlob = - inferRequest.inferRequest.get_tensor(executableNet.outputs()[0]); + inferRequest.inferRequest.get_tensor(executableNet.output(outputName)); if (!outputs.empty()) { outputBlob = - inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname)); + inferRequest.inferRequest.get_tensor(executableNet.output(outputName)); } - // locked memory holder should be alive all time while access to its buffer - // happens - auto byteSize = numScoresPerFrame * sizeof(float); + // locked memory holder should be alive all time while access to its buffer happens + auto byteSize = numScoresPerOutput[next_output] * sizeof(float); std::memcpy(outputFrame, outputBlob.data(), byteSize); } if (!FLAGS_r.empty()) { /** Compare output data with reference scores **/ ov::Tensor outputBlob = - inferRequest.inferRequest.get_tensor(executableNet.outputs()[0]); - if (!FLAGS_oname.empty()) - outputBlob = - inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname)); - compare_scores( - outputBlob.data(), - &ptrReferenceScores[inferRequest.frameIndex * numFrameElementsReference * - numBytesPerElementReference], - &frameError, - inferRequest.numFramesThisBatch, - numFrameElementsReference); - update_score_error(&frameError, &totalError); + inferRequest.inferRequest.get_tensor(executableNet.output(outputName)); + + if (numScoresPerOutput[next_output] / numFrameElementsReference[next_output] == + batchSize) { + compare_scores( + outputBlob.data(), + &vectorPtrReferenceScores[next_output] + [inferRequest.frameIndex * + numFrameElementsReference[next_output] * + numBytesPerElementReference[next_output]], + &vectorFrameError[next_output], + inferRequest.numFramesThisBatch, + numFrameElementsReference[next_output]); + update_score_error(&vectorFrameError[next_output], + &vectorTotalError[next_output]); + } else { + throw std::logic_error("Number of output and reference frames does not match."); + } } if (FLAGS_pc) { // retrieve new counters @@ -503,90 +530,108 @@ int main(int argc, char* argv[]) { sum_performance_counters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw); } } - // ----------------------------------------------------------------------------------------------------- - } - if (frameIndex == numFrames) { - inferRequest.frameIndex = -1; - continue; - } - ptrInputBlobs.clear(); - if (FLAGS_iname.empty()) { - for (auto& input : cInputInfo) { - ptrInputBlobs.push_back(inferRequest.inferRequest.get_tensor(input)); - } - } else { - std::vector inputNameBlobs = convert_str_to_vector(FLAGS_iname); - for (const auto& input : inputNameBlobs) { - ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input); - if (!blob) { - std::string errMessage("No blob with name : " + input); - throw std::logic_error(errMessage); - } - ptrInputBlobs.push_back(blob); - } - } - - /** Iterate over all the input blobs **/ - for (size_t i = 0; i < numInputFiles; ++i) { - ov::Tensor minput = ptrInputBlobs[i]; - if (!minput) { - std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) + - "] to be inherited from Tensor, " + - "but in fact we were not able to cast input to Tensor"); - throw std::logic_error(errMessage); - } - memcpy(minput.data(), inputFrame[i], minput.get_byte_size()); - // Used to infer fewer frames than the batch size - if (batchSize != numFramesThisBatch) { - memset(minput.data() + numFramesThisBatch * numFrameElementsInput[i], - 0, - (batchSize - numFramesThisBatch) * numFrameElementsInput[i]); - } - } // ----------------------------------------------------------------------------------------------------- - int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r); - /* Starting inference in asynchronous mode*/ - inferRequest.inferRequest.start_async(); - inferRequest.frameIndex = index < 0 ? -2 : index; - inferRequest.numFramesThisBatch = numFramesThisBatch; - frameIndex += numFramesThisBatch; - for (size_t j = 0; j < inputFiles.size(); j++) { - if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) { - int idx = frameIndex - FLAGS_cw_l; - if (idx > 0 && idx < static_cast(numFramesFile)) { - inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; - } else if (idx >= static_cast(numFramesFile)) { - inputFrame[j] = &ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) * - numFrameElementsInput[j] * - numFramesThisBatch; - } else if (idx <= 0) { - inputFrame[j] = &ptrUtterances[j].front(); - } - } else { - inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; - } - } - inferRequestFetched |= true; } - /** Inference was finished for current frame **/ - if (!inferRequestFetched) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + if (frameIndex == numFrames) { + inferRequest.frameIndex = -1; continue; } - } - t1 = Time::now(); - fsec fs = t1 - t0; - ms d = std::chrono::duration_cast(fs); - totalTime += d.count(); - // resetting state between utterances - for (auto&& state : inferRequests.begin()->inferRequest.query_state()) { - state.reset(); - } - // ----------------------------------------------------------------------------------------------------- + ptrInputBlobs.clear(); + if (FLAGS_iname.empty()) { + for (auto& input : cInputInfo) { + ptrInputBlobs.push_back(inferRequest.inferRequest.get_tensor(input)); + } + } else { + std::vector inputNameBlobs = convert_str_to_vector(FLAGS_iname); + for (const auto& input : inputNameBlobs) { + ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input); + if (!blob) { + std::string errMessage("No blob with name : " + input); + throw std::logic_error(errMessage); + } + ptrInputBlobs.push_back(blob); + } + } - // --------------------------- Step 6. Process output - // ------------------------------------------------------- + /** Iterate over all the input blobs **/ + for (size_t i = 0; i < numInputFiles; ++i) { + ov::Tensor minput = ptrInputBlobs[i]; + if (!minput) { + std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) + + "] to be inherited from Tensor, " + + "but in fact we were not able to cast input to Tensor"); + throw std::logic_error(errMessage); + } + memcpy(minput.data(), inputFrame[i], minput.get_byte_size()); + // Used to infer fewer frames than the batch size + if (batchSize != numFramesThisBatch) { + memset(minput.data() + numFramesThisBatch * numFrameElementsInput[i], + 0, + (batchSize - numFramesThisBatch) * numFrameElementsInput[i]); + } + } + // ----------------------------------------------------------------------------------------------------- + int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r); + /* Starting inference in asynchronous mode*/ + inferRequest.inferRequest.start_async(); + inferRequest.frameIndex = index < 0 ? -2 : index; + inferRequest.numFramesThisBatch = numFramesThisBatch; + frameIndex += numFramesThisBatch; + for (size_t j = 0; j < inputFiles.size(); j++) { + if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) { + int idx = frameIndex - FLAGS_cw_l; + if (idx > 0 && idx < static_cast(numFramesFile)) { + inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; + } else if (idx >= static_cast(numFramesFile)) { + inputFrame[j] = &ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) * + numFrameElementsInput[j] * + numFramesThisBatch; + } else if (idx <= 0) { + inputFrame[j] = &ptrUtterances[j].front(); + } + } else { + inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; + } + } + inferRequestFetched |= true; + } + /** Inference was finished for current frame **/ + if (!inferRequestFetched) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } + } + t1 = Time::now(); + fsec fs = t1 - t0; + ms d = std::chrono::duration_cast(fs); + totalTime += d.count(); + // resetting state between utterances + for (auto&& state : inferRequests.begin()->inferRequest.query_state()) { + state.reset(); + } + // ----------------------------------------------------------------------------------------------------- + // --------------------------- Step 6. Process output + // ------------------------------------------------------- + + /** Show performance results **/ + std::cout << "Utterance " << utteranceIndex << ": " << std::endl; + std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" << std::endl; + std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" << std::endl; + std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast(numFrames) << " ms\n" + << std::endl; + + if (FLAGS_pc) { + // print performance results + print_performance_counters(utterancePerfMap, + frameIndex, + std::cout, + getFullDeviceName(core, FLAGS_d), + totalNumberOfRunsOnHw, + FLAGS_d); + } + + for (size_t next_output = 0; next_output < count_file; next_output++) { if (!FLAGS_o.empty()) { auto exOutputScoresFile = fileExt(FLAGS_o); if (exOutputScoresFile == "ark") { @@ -601,33 +646,21 @@ int main(int argc, char* argv[]) { fileOutput->save_file(output_name_files[next_output].c_str(), shouldAppend, uttName, - &ptrScores.front(), + &vectorPtrScores[next_output].front(), numFramesFile, - numScoresPerFrame); - } - /** Show performance results **/ - std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" << std::endl; - std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" << std::endl; - std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast(numFrames) << " ms" - << std::endl; - if (FLAGS_pc) { - // print performance results - print_performance_counters(utterancePerfMap, - frameIndex, - std::cout, - getFullDeviceName(core, FLAGS_d), - totalNumberOfRunsOnHw, - FLAGS_d); + numScoresPerOutput[next_output]); } if (!FLAGS_r.empty()) { // print statistical score error - print_reference_compare_results(totalError, numFrames, std::cout); + std::string outputName = + (outputs.size() == 0) ? executableNet.output(0).get_any_name() : output_names[next_output]; + std::cout << "Output name: " << outputName << std::endl; + std::cout << "Number scores per frame: " << numScoresPerOutput[next_output] / batchSize << std::endl + << std::endl; + print_reference_compare_results(vectorTotalError[next_output], numFrames, std::cout); } - std::cout << "End of Utterance " << utteranceIndex << std::endl << std::endl; - // ----------------------------------------------------------------------------------------------------- } } - // ----------------------------------------------------------------------------------------------------- } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; From a32ed5a07aafc3d1aedf8651bf363a12673bbd71 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Fri, 25 Feb 2022 11:41:23 +0300 Subject: [PATCH 105/310] Fixed build for CI (#10659) --- src/tests/functional/plugin/cpu/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/functional/plugin/cpu/CMakeLists.txt b/src/tests/functional/plugin/cpu/CMakeLists.txt index 6af1b7155cd..2790bf21525 100644 --- a/src/tests/functional/plugin/cpu/CMakeLists.txt +++ b/src/tests/functional/plugin/cpu/CMakeLists.txt @@ -14,7 +14,7 @@ set(LINK_LIBRARIES funcSharedTests cpuSpecificRtInfo) if (ENABLE_OV_ONNX_FRONTEND) list(APPEND DEFINES TEST_MODELS="${TEST_MODEL_ZOO}") else() - set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/onnx) + set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx) endif() addIeTargetTest( From 38d87dd9de186a6979dd2c0b1614a5f6f891eae1 Mon Sep 17 00:00:00 2001 From: Anton Pankratov Date: Fri, 25 Feb 2022 11:57:23 +0300 Subject: [PATCH 106/310] Removed stream enum (#10645) * Removed stream enum * Fixed build * fixed build * Fixed test --- .../include/openvino/runtime/properties.hpp | 31 +++++-------------- src/plugins/intel_cpu/src/plugin.cpp | 4 +-- .../ov_executable_network/properties.cpp | 12 +++---- 3 files changed, 15 insertions(+), 32 deletions(-) diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 1c8632db316..7a3e7e0136c 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -652,23 +652,11 @@ namespace streams { struct Num { using Base = std::tuple; //!< NumStreams is representable as int32_t - /** - * @brief Special value for ov::execution::num_streams property. - */ - enum Special { - AUTO = -1, //!< Creates bare minimum of streams to improve the performance - NUMA = -2, //!< Creates as many streams as needed to accommodate NUMA and avoid associated penalties - }; - - constexpr Num() : num{AUTO} {}; + constexpr Num() : num{-1} {}; constexpr Num(const int32_t num_) : num{num_} {} - operator int32_t() { - return num; - } - - operator int32_t() const { + constexpr operator int32_t() const { return num; } @@ -680,16 +668,16 @@ struct Num { */ static constexpr Property num{"NUM_STREAMS"}; -static constexpr Num AUTO{Num::AUTO}; //!< Creates bare minimum of streams to improve the performance +static constexpr Num AUTO{-1}; //!< Creates bare minimum of streams to improve the performance static constexpr Num NUMA{ - Num::NUMA}; //!< Creates as many streams as needed to accommodate NUMA and avoid associated penalties + -2}; //!< Creates as many streams as needed to accommodate NUMA and avoid associated penalties /** @cond INTERNAL */ inline std::ostream& operator<<(std::ostream& os, const Num& num) { - switch (num.num) { - case Num::AUTO: + switch (num) { + case AUTO: return os << "AUTO"; - case Num::NUMA: + case NUMA: return os << "NUMA"; default: return os << num.num; @@ -715,11 +703,6 @@ inline std::istream& operator>>(std::istream& is, Num& num) { /** @endcond */ } // namespace streams -/** - * @brief Class to represent number of streams in streams executor - */ -using NumStreams = streams::Num; - /** * @brief The number of executor logical partitions */ diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index afe9058d4a7..cadea8efd78 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -569,7 +569,7 @@ void Engine::ApplyPerformanceHints(std::map &config, c if (mode_name == CONFIG_VALUE(LATENCY)) { config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA); - config[ov::num_streams.name()] = ov::util::to_string(ov::NumStreams(ov::NumStreams::NUMA)); + config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA); } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) { const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; @@ -627,7 +627,7 @@ void Engine::ApplyPerformanceHints(std::map &config, c engConfig.perfHintsConfig.ovPerfHintNumRequests); } config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams); - config[ov::num_streams.name()] = ov::util::to_string(ov::NumStreams(num_streams)); + config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA); } } diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp index 96189a934cc..4b6066fb8b5 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp @@ -86,32 +86,32 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesDefaultTe OVCompiledModelPropertiesDefaultTests::getTestCaseName); const std::vector properties = { - {ov::num_streams(ov::NumStreams::AUTO)}, - {ov::num_streams(ov::NumStreams::NUMA)}, + {ov::num_streams(ov::streams::NUMA)}, + {ov::num_streams(ov::streams::AUTO)}, {ov::num_streams(0), ov::inference_num_threads(1)}, {ov::num_streams(1), ov::inference_num_threads(1)}, {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}} }; const std::vector hetero_properties = { - {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::num_streams(ov::NumStreams::AUTO)}, + {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::num_streams(ov::streams::AUTO)}, {ov::device::priorities(CommonTestUtils::DEVICE_CPU), {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, }; const std::vector multi_properties = { - {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::num_streams(ov::NumStreams::AUTO)}, + {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::num_streams(ov::streams::AUTO)}, {ov::device::priorities(CommonTestUtils::DEVICE_CPU), {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, }; const std::vector auto_batch_properties = { - {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, ov::num_streams(ov::NumStreams::AUTO)}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, ov::num_streams(ov::streams::AUTO)}, {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, - {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}, ov::num_streams(ov::NumStreams::AUTO)}, + {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}, ov::num_streams(ov::streams::AUTO)}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesTests, From 14d11a8998dcc6d8fe459c8afc80a7b5db9a68cc Mon Sep 17 00:00:00 2001 From: Yury Gaydaychuk Date: Fri, 25 Feb 2022 12:57:03 +0300 Subject: [PATCH 108/310] [CPU] Fix of invalid read in DefConv (#10481) --- src/plugins/intel_cpu/src/nodes/def_conv.cpp | 89 +++++++++++++++++++ .../skip_tests_config.cpp | 3 - 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/def_conv.cpp b/src/plugins/intel_cpu/src/nodes/def_conv.cpp index fb82f296a80..87281c49beb 100644 --- a/src/plugins/intel_cpu/src/nodes/def_conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/def_conv.cpp @@ -115,6 +115,11 @@ private: Xbyak::Label l_table; + inline void checkZeroWei(const Xbyak::Xmm &x1, Label &nullifyLabel) { + uni_vtestps(x1, x1); + jz(nullifyLabel); + } + void ow_loop() { Label ow_loop_main; Label ow_tail; @@ -280,6 +285,22 @@ private: Label ic_loop_main; Label ic_loop_tail; Label loop_end; + Label nullify_v1; + Label nullify_v2; + Label nullify_v3; + Label nullify_v4; + Label nullify_v1_end; + Label nullify_v2_end; + Label nullify_v3_end; + Label nullify_v4_end; + Label nullify_v1_tail; + Label nullify_v2_tail; + Label nullify_v3_tail; + Label nullify_v4_tail; + Label nullify_v1_end_tail; + Label nullify_v2_end_tail; + Label nullify_v3_end_tail; + Label nullify_v4_end_tail; mov(aux2_reg_input, aux_reg_input); add(aux2_reg_input, (ow * jcp_.stride_w * jcp_.ic) * jcp_.typesize_in); @@ -337,35 +358,69 @@ private: cmp(reg_ic_iter, simd_w); jl(ic_loop_tail, T_NEAR); + // check zero markers + uni_vbroadcastss(xmm_v1, dword[aux_reg_sampled_wei + ind_off_ll * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(xmm_v2, dword[aux_reg_sampled_wei + ind_off_hl * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(xmm_v3, dword[aux_reg_sampled_wei + ind_off_lh * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(xmm_v4, dword[aux_reg_sampled_wei + ind_off_hh * jcp_.typesize_sampled_wei]); + size_t input_buffer_off = (size_t) kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; uni_vpmovsxdq(xmm_v1_off, xmm_v1_off); uni_vmovq(reg_tmp_64, xmm_v1_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v1, nullify_v1); uni_vmovups(vmm_v1, ptr[reg_tmp_64]); uni_vmulps(vmm_v1, vmm_v1, vmm_w1); + jmp(nullify_v1_end, T_NEAR); + L(nullify_v1); + { + uni_vpxor(vmm_v1, vmm_v1, vmm_v1); + } + L(nullify_v1_end); uni_vpmovsxdq(xmm_v2_off, xmm_v2_off); uni_vmovq(reg_tmp_64, xmm_v2_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v2, nullify_v2); uni_vmovups(vmm_v2, ptr[reg_tmp_64]); uni_vmulps(vmm_v2, vmm_v2, vmm_w2); + jmp(nullify_v2_end, T_NEAR); + L(nullify_v2); + { + uni_vpxor(vmm_v2, vmm_v2, vmm_v2); + } + L(nullify_v2_end); uni_vpmovsxdq(xmm_v3_off, xmm_v3_off); uni_vmovq(reg_tmp_64, xmm_v3_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v3, nullify_v3); uni_vmovups(vmm_v3, ptr[reg_tmp_64]); uni_vmulps(vmm_v3, vmm_v3, vmm_w3); + jmp(nullify_v3_end, T_NEAR); + L(nullify_v3); + { + uni_vpxor(vmm_v3, vmm_v3, vmm_v3); + } + L(nullify_v3_end); uni_vpmovsxdq(xmm_v4_off, xmm_v4_off); uni_vmovq(reg_tmp_64, xmm_v4_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v4, nullify_v4); uni_vmovups(vmm_v4, ptr[reg_tmp_64]); uni_vmulps(vmm_v4, vmm_v4, vmm_w4); + jmp(nullify_v4_end, T_NEAR); + L(nullify_v4); + { + uni_vpxor(vmm_v4, vmm_v4, vmm_v4); + } + L(nullify_v4_end); uni_vaddps(vmm_v1, vmm_v1, vmm_v2); uni_vaddps(vmm_v1, vmm_v1, vmm_v3); @@ -383,34 +438,68 @@ private: cmp(reg_ic_iter, 1); jl(loop_end, T_NEAR); + // check zero markers + uni_vbroadcastss(xmm_v1, dword[aux_reg_sampled_wei + ind_off_ll * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(xmm_v2, dword[aux_reg_sampled_wei + ind_off_hl * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(xmm_v3, dword[aux_reg_sampled_wei + ind_off_lh * jcp_.typesize_sampled_wei]); + uni_vbroadcastss(xmm_v4, dword[aux_reg_sampled_wei + ind_off_hh * jcp_.typesize_sampled_wei]); + size_t input_buffer_off = (size_t) kh * jcp_.kw * jcp_.ic + kw * jcp_.ic; uni_vpmovsxdq(xmm_v1_off, xmm_v1_off); uni_vmovq(reg_tmp_64, xmm_v1_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v1, nullify_v1_tail); uni_vmovss(xmm_v1, ptr[reg_tmp_64]); uni_vmulss(xmm_v1, xmm_v1, xmm_w1); + jmp(nullify_v1_end_tail, T_NEAR); + L(nullify_v1_tail); + { + uni_vpxor(xmm_v1, xmm_v1, xmm_v1); + } + L(nullify_v1_end_tail); uni_vpmovsxdq(xmm_v2_off, xmm_v2_off); uni_vmovq(reg_tmp_64, xmm_v2_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v2, nullify_v2_tail); uni_vmovss(xmm_v2, ptr[reg_tmp_64]); uni_vmulss(xmm_v2, xmm_v2, xmm_w2); + jmp(nullify_v2_end_tail, T_NEAR); + L(nullify_v2_tail); + { + uni_vpxor(xmm_v2, xmm_v2, xmm_v2); + } + L(nullify_v2_end_tail); uni_vpmovsxdq(xmm_v3_off, xmm_v3_off); uni_vmovq(reg_tmp_64, xmm_v3_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v3, nullify_v3_tail); uni_vmovss(xmm_v3, ptr[reg_tmp_64]); uni_vmulss(xmm_v3, xmm_v3, xmm_w3); + jmp(nullify_v3_end_tail, T_NEAR); + L(nullify_v3_tail); + { + uni_vpxor(xmm_v3, xmm_v3, xmm_v3); + } + L(nullify_v3_end_tail); uni_vpmovsxdq(xmm_v4_off, xmm_v4_off); uni_vmovq(reg_tmp_64, xmm_v4_off); imul(reg_tmp_64, reg_tmp_64, jcp_.ic * jcp_.typesize_in); add(reg_tmp_64, aux2_reg_input); + checkZeroWei(xmm_v4, nullify_v4_tail); uni_vmovss(xmm_v4, ptr[reg_tmp_64]); uni_vmulss(xmm_v4, xmm_v4, xmm_w4); + jmp(nullify_v4_end_tail, T_NEAR); + L(nullify_v4_tail); + { + uni_vpxor(xmm_v4, xmm_v4, xmm_v4); + } + L(nullify_v4_end_tail); uni_vaddss(xmm_v1, xmm_v1, xmm_v2); uni_vaddss(xmm_v1, xmm_v1, xmm_v3); diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index ff4f7c36838..d98c578ee67 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -141,9 +141,6 @@ std::vector disabledTestPatterns() { *IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)", // Issue: 69222 R"(.*smoke_PriorBoxClustered.*PriorBoxClusteredLayerCPUTest.*_netPRC=f16_.*)", - // Issue: 74817 - // Sporadic failings with NAN on Dynamic shape cases with jit implementation - R"(.*DefConvLayoutTest7.*)", // Issue: 71968 R"(.*LSTMSequenceCommonZeroClip.*PURE.*CONST.*hidden_size=10.*sigmoid.sigmoid.sigmoid.*reverse.*FP32_targetDevice=CPU.*)", // Issue: 72005 From 54f39294de312b26c2ab816d158fb80fdb6e313e Mon Sep 17 00:00:00 2001 From: Jan Iwaszkiewicz Date: Fri, 25 Feb 2022 11:02:04 +0100 Subject: [PATCH 109/310] [PYTHON] Fix style in python doc strings (#10606) * Fix style in python doc strings * New line quotes --- .../python/src/openvino/runtime/ie_api.py | 13 +- .../python/src/openvino/runtime/opset1/ops.py | 212 +++++++++--------- .../python/src/openvino/runtime/opset2/ops.py | 28 +-- .../python/src/openvino/runtime/opset3/ops.py | 38 ++-- .../python/src/openvino/runtime/opset4/ops.py | 26 +-- .../python/src/openvino/runtime/opset5/ops.py | 18 +- .../python/src/openvino/runtime/opset6/ops.py | 10 +- .../python/src/openvino/runtime/opset7/ops.py | 12 +- .../python/src/openvino/runtime/opset8/ops.py | 34 +-- .../src/pyopenvino/core/async_infer_queue.cpp | 6 +- .../src/pyopenvino/core/compiled_model.cpp | 34 +-- .../python/src/pyopenvino/core/core.cpp | 29 ++- .../src/pyopenvino/core/infer_request.cpp | 30 +-- .../core/offline_transformations.cpp | 2 +- .../python/src/pyopenvino/core/tensor.cpp | 8 +- .../python/src/pyopenvino/graph/model.cpp | 2 +- .../src/pyopenvino/graph/node_output.hpp | 2 +- .../python/src/pyopenvino/graph/util.cpp | 2 +- 18 files changed, 254 insertions(+), 252 deletions(-) diff --git a/src/bindings/python/src/openvino/runtime/ie_api.py b/src/bindings/python/src/openvino/runtime/ie_api.py index 67bb84ef68a..b43d335463d 100644 --- a/src/bindings/python/src/openvino/runtime/ie_api.py +++ b/src/bindings/python/src/openvino/runtime/ie_api.py @@ -132,7 +132,7 @@ class InferRequest(InferRequestBase): :param inputs: Data to be set on input tensors. :type inputs: Union[Dict[keys, values], List[values]], optional - :param userdata: Any data that will be passed inside callback call. + :param userdata: Any data that will be passed inside the callback. :type userdata: Any """ super().start_async( @@ -164,8 +164,8 @@ class CompiledModel(CompiledModelBase): Blocks all methods of CompiledModel while request is running. Method creates new temporary InferRequest and run inference on it. - It is advised to use dedicated InferRequest class for performance, - optimizing workflows and creating advanced pipelines. + It is advised to use a dedicated InferRequest class for performance, + optimizing workflows, and creating advanced pipelines. The allowed types of keys in the `inputs` dictionary are: @@ -188,7 +188,10 @@ class CompiledModel(CompiledModelBase): ) def __call__(self, inputs: Union[dict, list] = None) -> dict: - """Callable infer wrapper for CompiledModel. Look at `infer_new_request` for reference.""" + """Callable infer wrapper for CompiledModel. + + Take a look at `infer_new_request` for reference. + """ return self.infer_new_request(inputs) @@ -245,7 +248,7 @@ class Core(CoreBase): """Core class represents OpenVINO runtime Core entity. User applications can create several Core class instances, but in this - case the underlying plugins are created multiple times and not shared + case, the underlying plugins are created multiple times and not shared between several Core instances. The recommended way is to have a single Core instance per application. """ diff --git a/src/bindings/python/src/openvino/runtime/opset1/ops.py b/src/bindings/python/src/openvino/runtime/opset1/ops.py index 7507acb2a3f..c05fe5159a2 100644 --- a/src/bindings/python/src/openvino/runtime/opset1/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset1/ops.py @@ -43,7 +43,7 @@ def absolute(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with Abs operation applied on it. + :return: New node with Abs operation applied on it. """ return _get_node_factory_opset1().create("Abs", [node]) @@ -54,7 +54,7 @@ def acos(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with arccos operation applied on it. + :return: New node with arccos operation applied on it. """ return _get_node_factory_opset1().create("Acos", [node]) @@ -78,7 +78,7 @@ def asin(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with arcsin operation applied on it. + :return: New node with arcsin operation applied on it. """ return _get_node_factory_opset1().create("Asin", [node]) @@ -89,7 +89,7 @@ def atan(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with arctan operation applied on it. + :return: New node with arctan operation applied on it. """ return _get_node_factory_opset1().create("Atan", [node]) @@ -120,7 +120,7 @@ def avg_pool( [None, 'same_upper', 'same_lower', 'valid'] :param name: Optional name for the new output node. - returns New node with AvgPool operation applied on its data. + :return: New node with AvgPool operation applied on its data. """ if auto_pad is None: auto_pad = "explicit" @@ -159,7 +159,7 @@ def batch_norm_inference( :param epsilon: The number to be added to the variance to avoid division by zero when normalizing a value. :param name: The optional name of the output node. - returns The new node which performs BatchNormInference. + :return: The new node which performs BatchNormInference. """ inputs = as_nodes(gamma, beta, data, mean, variance) return _get_node_factory_opset1().create("BatchNormInference", inputs, {"epsilon": epsilon}) @@ -190,7 +190,7 @@ def binary_convolution( :param pad_value: Floating-point value used to fill pad area. :param auto_pad: The type of padding. Range of values: explicit, same_upper, same_lower, valid. :param name: The optional new name for output node. - returns New node performing binary convolution operation. + :return: New node performing binary convolution operation. """ return _get_node_factory_opset1().create( "BinaryConvolution", @@ -224,7 +224,7 @@ def broadcast( :param mode: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: NUMPY, EXPLICIT. :param name: Optional new name for output node. - returns New node with broadcast shape. + :return: New node with broadcast shape. """ inputs = as_nodes(data, target_shape) if mode.upper() == "EXPLICIT": @@ -247,7 +247,7 @@ def ctc_greedy_decoder( :param sequence_mask: The tensor with sequence masks for each sequence in the batch. :param merge_repeated: The flag for merging repeated labels during the CTC calculation. :param name: Optional name for output node. - returns The new node performing an CTCGreedyDecoder operation on input tensor. + :return: The new node performing an CTCGreedyDecoder operation on input tensor. """ node_inputs = as_nodes(data, sequence_mask) return _get_node_factory_opset1().create( @@ -261,7 +261,7 @@ def ceiling(node: NodeInput, name: Optional[str] = None) -> Node: :param node: The node providing data to ceiling operation. :param name: Optional name for output node. - returns The node performing element-wise ceiling. + :return: The node performing element-wise ceiling. """ return _get_node_factory_opset1().create("Ceiling", [node]) @@ -276,7 +276,7 @@ def clamp( :param min_value: The lower bound of the range. Scalar value. :param max_value: The upper bound of the range. Scalar value. :param name: Optional output node name. - returns The new node performing a clamp operation on its input data element-wise. + :return: The new node performing a clamp operation on its input data element-wise. Performs a clipping operation on an input value between a pair of boundary values. @@ -306,7 +306,7 @@ def concat(nodes: List[NodeInput], axis: int, name: Optional[str] = None) -> Nod :param nodes: The nodes we want concatenate into single new node. :param axis: The axis along which we want to concatenate input nodes. :param name: The optional new name for output node. - returns Return new node that is a concatenation of input nodes. + :return: Return new node that is a concatenation of input nodes. """ return _get_node_factory_opset1().create("Concat", as_nodes(*nodes), {"axis": axis}) @@ -322,7 +322,7 @@ def constant( :param value: One of: array of values or scalar to initialize node with. :param dtype: The data type of provided data. :param name: Optional name for output node. - returns The Constant node initialized with provided data. + :return: The Constant node initialized with provided data. """ return make_constant_node(value, dtype) @@ -336,7 +336,7 @@ def convert( :param data: Node which produces the input tensor. :param destination_type: Provides the target type for the conversion. :param name: Optional name for the output node. - returns New node performing the conversion operation. + :return: New node performing the conversion operation. """ if not isinstance(destination_type, str): destination_type = get_element_type_str(destination_type) @@ -352,7 +352,7 @@ def convert_like(data: NodeInput, like: NodeInput, name: Optional[str] = None) - :param data: Node which produces the input tensor :param like: Node which provides the target type information for the conversion :param name: Optional name for the output node. - returns New node performing the conversion operation. + :return: New node performing the conversion operation. """ return _get_node_factory_opset1().create("ConvertLike", [data, like]) @@ -378,7 +378,7 @@ def convolution( :param dilations: The data batch dilation strides. :param auto_pad: The type of padding. Range of values: explicit, same_upper, same_lower, valid. :param name: The optional new name for output node. - returns New node performing batched convolution operation. + :return: New node performing batched convolution operation. """ return _get_node_factory_opset1().create( "Convolution", @@ -419,7 +419,7 @@ def convolution_backprop_data( in the filter. :param name: The node name. - returns The node object representing ConvolutionBackpropData operation. + :return: The node object representing ConvolutionBackpropData operation. """ spatial_dim_count = len(strides) if pads_begin is None: @@ -456,7 +456,7 @@ def cos(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with cos operation applied on it. + :return: New node with cos operation applied on it. """ return _get_node_factory_opset1().create("Cos", [node]) @@ -467,7 +467,7 @@ def cosh(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with cosh operation applied on it. + :return: New node with cosh operation applied on it. """ return _get_node_factory_opset1().create("Cosh", [node]) @@ -499,7 +499,7 @@ def deformable_convolution( :param deformable_group: The number of groups which deformable values and output should be split into along the channel axis. :param name: The optional new name for output node. - returns New node performing deformable convolution operation. + :return: New node performing deformable convolution operation. """ return _get_node_factory_opset1().create( "DeformableConvolution", @@ -548,7 +548,7 @@ def deformable_psroi_pooling( :param part_size: The number of parts the output tensor spatial dimensions are divided into. :param offsets: Optional node. 4D input blob with transformation values (offsets). :param name: The optional new name for output node. - returns New node performing DeformablePSROIPooling operation. + :return: New node performing DeformablePSROIPooling operation. """ node_inputs = as_nodes(feature_maps, coords) if offsets is not None: @@ -592,7 +592,7 @@ def depth_to_space(node: Node, mode: str, block_size: int = 1, name: str = None) :param block_size: The size of the spatial block of values describing how the tensor's data is to be rearranged. :param name: Optional output node name. - returns The new node performing an DepthToSpace operation on its input tensor. + :return: The new node performing an DepthToSpace operation on its input tensor. """ return _get_node_factory_opset1().create( "DepthToSpace", [node], {"mode": mode, "block_size": block_size}, @@ -618,7 +618,7 @@ def detection_output( :param aux_class_preds: The 2D input tensor with additional class predictions information. :param aux_box_preds: The 2D input tensor with additional box predictions information. :param name: Optional name for the output node. - returns Node representing DetectionOutput operation. + :return: Node representing DetectionOutput operation. Available attributes are: @@ -774,7 +774,7 @@ def divide( :param right_node: The node providing divisor data. :param auto_broadcast: Specifies rules used for auto-broadcasting of input tensors. :param name: Optional name for output node. - returns The node performing element-wise division. + :return: The node performing element-wise division. """ return _get_node_factory_opset1().create( "Divide", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -793,7 +793,7 @@ def elu(data: NodeInput, alpha: NumericType, name: Optional[str] = None) -> Node :param data: Input tensor. One of: input node, array or scalar. :param alpha: Scalar multiplier for negative values. :param name: Optional output node name. - returns The new node performing an ELU operation on its input data element-wise. + :return: The new node performing an ELU operation on its input data element-wise. """ return _get_node_factory_opset1().create("Elu", [as_node(data)], {"alpha": alpha}) @@ -812,7 +812,7 @@ def equal( :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. :param name: The optional name for output new node. - returns The node performing element-wise equality check. + :return: The node performing element-wise equality check. """ return _get_node_factory_opset1().create( "Equal", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -825,7 +825,7 @@ def erf(node: NodeInput, name: Optional[str] = None) -> Node: :param node: The node providing data for operation. :param name: The optional name for new output node. - returns The new node performing element-wise Erf operation. + :return: The new node performing element-wise Erf operation. """ return _get_node_factory_opset1().create("Erf", [node]) @@ -836,7 +836,7 @@ def exp(node: NodeInput, name: Optional[str] = None) -> Node: :param node: The node providing data for operation. :param name: The optional name for new output node. - returns The new node performing natural exponential operation. + :return: The new node performing natural exponential operation. """ return _get_node_factory_opset1().create("Exp", [node]) @@ -862,7 +862,7 @@ def fake_quantize( :param levels: The number of quantization levels. Integer value. :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. - returns New node with quantized value. + :return: New node with quantized value. Input floating point values are quantized into a discrete set of floating point values. @@ -895,7 +895,7 @@ def floor(node: NodeInput, name: Optional[str] = None) -> Node: :param node: The input node providing data. :param name: The optional name for new output node. - returns The node performing element-wise floor operation. + :return: The node performing element-wise floor operation. """ return _get_node_factory_opset1().create("Floor", [node]) @@ -913,7 +913,7 @@ def floor_mod( :param right_node: The second input node for FloorMod operation. :param auto_broadcast: Specifies rules used for auto-broadcasting of input tensors. :param name: Optional name for output node. - returns The node performing element-wise FloorMod operation. + :return: The node performing element-wise FloorMod operation. """ return _get_node_factory_opset1().create( "FloorMod", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -930,7 +930,7 @@ def gather( :param indices: Tensor with indexes to gather. :param axis: The dimension index to gather data from. :param name: Optional name for output node. - returns The new node performing a Gather operation on the data input tensor. + :return: The new node performing a Gather operation on the data input tensor. """ node_inputs = as_nodes(data, indices, axis) return _get_node_factory_opset1().create("Gather", node_inputs) @@ -951,7 +951,7 @@ def gather_tree( :param max_seq_len: The tensor with maximum lengths for each sequence in the batch. :param end_token: The scalar tensor with value of the end marker in a sequence. :param name: Optional name for output node. - returns The new node performing a GatherTree operation. + :return: The new node performing a GatherTree operation. The GatherTree node generates the complete beams from the indices per each step and the parent beam indices. @@ -988,7 +988,7 @@ def greater( :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. :param name: The optional new name for output node. - returns The node performing element-wise check whether left_node is greater than right_node. + :return: The node performing element-wise check whether left_node is greater than right_node. """ return _get_node_factory_opset1().create( "Greater", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1009,7 +1009,7 @@ def greater_equal( :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. :param name: The optional new name for output node. - returns The node performing element-wise check whether left_node is greater than or equal + :return: The node performing element-wise check whether left_node is greater than or equal right_node. """ return _get_node_factory_opset1().create( @@ -1027,7 +1027,7 @@ def grn(data: Node, bias: float, name: Optional[str] = None) -> Node: :param data: The node with data tensor. :param bias: The bias added to the variance. Scalar value. :param name: Optional output node name. - returns The new node performing a GRN operation on tensor's channels. + :return: The new node performing a GRN operation on tensor's channels. """ return _get_node_factory_opset1().create("GRN", [data], {"bias": bias}) @@ -1062,7 +1062,7 @@ def group_convolution( Ceil(num_dims/2) at the end VALID: No padding :param name: Optional output node name. - returns The new node performing a Group Convolution operation on tensor from input node. + :return: The new node performing a Group Convolution operation on tensor from input node. """ return _get_node_factory_opset1().create( "GroupConvolution", @@ -1113,7 +1113,7 @@ def group_convolution_backprop_data( :param output_padding: The additional amount of paddings added per each spatial axis in the output tensor. :param name: Optional output node name. - returns The new node performing a Group Convolution operation on tensor from input node. + :return: The new node performing a Group Convolution operation on tensor from input node. """ spatial_dim_count = len(strides) if dilations is None: @@ -1150,7 +1150,7 @@ def hard_sigmoid(data: Node, alpha: NodeInput, beta: NodeInput, name: Optional[s :param alpha: A node producing the alpha parameter. :param beta: A node producing the beta parameter :param name: Optional output node name. - returns The new node performing a Hard Sigmoid element-wise on input tensor. + :return: The new node performing a Hard Sigmoid element-wise on input tensor. Hard Sigmoid uses the following logic: @@ -1171,7 +1171,7 @@ def interpolate( :param output_shape: 1D tensor describing output shape for spatial axes. :param attrs: The dictionary containing key, value pairs for attributes. :param name: Optional name for the output node. - returns Node representing interpolation operation. + :return: Node representing interpolation operation. Available attributes are: @@ -1251,7 +1251,7 @@ def less( :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. :param name: The optional new name for output node. - returns The node performing element-wise check whether left_node is less than the right_node. + :return: The node performing element-wise check whether left_node is less than the right_node. """ return _get_node_factory_opset1().create( "Less", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1272,7 +1272,7 @@ def less_equal( :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. :param name: The optional new name for output node. - returns The node performing element-wise check whether left_node is less than or equal the + :return: The node performing element-wise check whether left_node is less than or equal the right_node. """ return _get_node_factory_opset1().create( @@ -1286,7 +1286,7 @@ def log(node: NodeInput, name: Optional[str] = None) -> Node: :param node: The input node providing data for operation. :param name: The optional new name for output node. - returns The new node performing log operation element-wise. + :return: The new node performing log operation element-wise. """ return _get_node_factory_opset1().create("Log", [node]) @@ -1305,7 +1305,7 @@ def logical_and( :param auto_broadcast: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: numpy, explicit. :param name: The optional new name for output node. - returns The node performing logical and operation on input nodes corresponding elements. + :return: The node performing logical and operation on input nodes corresponding elements. """ return _get_node_factory_opset1().create( "LogicalAnd", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1318,7 +1318,7 @@ def logical_not(node: NodeInput, name: Optional[str] = None) -> Node: :param node: The input node providing data. :param name: The optional new name for output node. - returns The node performing element-wise logical NOT operation with given tensor. + :return: The node performing element-wise logical NOT operation with given tensor. """ return _get_node_factory_opset1().create("LogicalNot", [node]) @@ -1337,7 +1337,7 @@ def logical_or( :param auto_broadcast: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: numpy, explicit. :param name: The optional new name for output node. - returns The node performing logical or operation on input nodes corresponding elements. + :return: The node performing logical or operation on input nodes corresponding elements. """ return _get_node_factory_opset1().create( "LogicalOr", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1358,7 +1358,7 @@ def logical_xor( :param auto_broadcast: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: numpy, explicit. :param name: The optional new name for output node. - returns The node performing logical or operation on input nodes corresponding elements. + :return: The node performing logical or operation on input nodes corresponding elements. """ return _get_node_factory_opset1().create( "LogicalXor", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1383,7 +1383,7 @@ def lrn( :param bias: An offset (usually positive) to avoid dividing by 0. :param size: Width of the 1-D normalization window. :param name: An optional name of the output node. - returns The new node which performs LRN. + :return: The new node which performs LRN. """ attributes = {"alpha": alpha, "beta": beta, "bias": bias, "size": size} return _get_node_factory_opset1().create("LRN", as_nodes(data, axes), attributes) @@ -1419,7 +1419,7 @@ def lstm_cell( :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. :param name: An optional name of the output node. - returns The new node represents LSTMCell. Node outputs count: 2. + :return: The new node represents LSTMCell. Node outputs count: 2. """ if activations is None: activations = ["sigmoid", "tanh", "tanh"] @@ -1493,7 +1493,7 @@ def lstm_sequence( :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. :param name: An optional name of the output node. - returns The new node represents LSTMSequence. Node outputs count: 3. + :return: The new node represents LSTMSequence. Node outputs count: 3. """ if activations is None: activations = ["sigmoid", "tanh", "tanh"] @@ -1546,7 +1546,7 @@ def matmul( :param data_b: right-hand side matrix :param transpose_a: should the first matrix be transposed before operation :param transpose_b: should the second matrix be transposed - returns MatMul operation node + :return: MatMul operation node """ return _get_node_factory_opset1().create( "MatMul", as_nodes(data_a, data_b), {"transpose_a": transpose_a, "transpose_b": transpose_b} @@ -1578,7 +1578,7 @@ def max_pool( [None, 'same_upper', 'same_lower', 'valid'] :param name: The optional name for the created output node. - returns The new node performing max pooling operation. + :return: The new node performing max pooling operation. """ if auto_pad is None: auto_pad = "explicit" @@ -1635,7 +1635,7 @@ def mod( :param right_node: The second input node for mod operation. :param auto_broadcast: Specifies rules used for auto-broadcasting of input tensors. :param name: Optional name for output node. - returns The node performing element-wise Mod operation. + :return: The node performing element-wise Mod operation. """ return _get_node_factory_opset1().create( "Mod", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1683,7 +1683,7 @@ def non_max_suppression( :param box_encoding: Format of boxes data encoding. Range of values: corner or cente. :param sort_result_descending: Flag that specifies whenever it is necessary to sort selected boxes across batches or not. - returns The new node which performs NonMaxSuppression + :return: The new node which performs NonMaxSuppression """ if max_output_boxes_per_class is None: max_output_boxes_per_class = make_constant_node(0, np.int64) @@ -1711,7 +1711,7 @@ def normalize_l2( :param axes: Node indicating axes along which L2 reduction is calculated :param eps: The epsilon added to L2 norm :param eps_mode: how eps is combined with L2 value (`add` or `max`) - returns New node which performs the L2 normalization. + :return: New node which performs the L2 normalization. """ return _get_node_factory_opset1().create( "NormalizeL2", as_nodes(data, axes), {"eps": eps, "mode": eps_mode} @@ -1732,7 +1732,7 @@ def not_equal( :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. :param name: The optional name for output new node. - returns The node performing element-wise inequality check. + :return: The node performing element-wise inequality check. """ return _get_node_factory_opset1().create( "NotEqual", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1759,7 +1759,7 @@ def one_hot( by indices in input take. :param name: The optional name for new output node. - returns New node performing one-hot operation. + :return: New node performing one-hot operation. """ return _get_node_factory_opset1().create( "OneHot", as_nodes(indices, depth, on_value, off_value), {"axis": axis} @@ -1783,7 +1783,7 @@ def pad( :param pads_end: number of padding elements to be added after the last element. :param pad_mode: "constant", "edge", "reflect" or "symmetric" :param arg_pad_value: value used for padding if pad_mode is "constant" - returns Pad operation node. + :return: Pad operation node. """ input_nodes = as_nodes(arg, pads_begin, pads_end) if arg_pad_value: @@ -1818,7 +1818,7 @@ def power( :param name: The optional name for the new output node. :param auto_broadcast: The type of broadcasting specifies rules used for auto-broadcasting of input tensors. - returns The new node performing element-wise exponentiation operation on input nodes. + :return: The new node performing element-wise exponentiation operation on input nodes. """ return _get_node_factory_opset1().create( "Power", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -1832,7 +1832,7 @@ def prelu(data: NodeInput, slope: NodeInput, name: Optional[str] = None) -> Node :param data: The node with data tensor. :param slope: The node with the multipliers for negative values. :param name: Optional output node name. - returns The new node performing a PRelu operation on tensor's channels. + :return: The new node performing a PRelu operation on tensor's channels. PRelu uses the following logic: @@ -1858,7 +1858,7 @@ def prior_box_clustered( specifies shape of the image for which boxes are generated. :param attrs: The dictionary containing key, value pairs for attributes. :param name: Optional name for the output node. - returns Node representing PriorBoxClustered operation. + :return: Node representing PriorBoxClustered operation. Available attributes are: @@ -1942,7 +1942,7 @@ def prior_box( :param image_shape: Shape of image to which prior boxes are scaled. :param attrs: The dictionary containing key, value pairs for attributes. :param name: Optional name for the output node. - returns Node representing prior box operation. + :return: Node representing prior box operation. Available attributes are: @@ -2062,7 +2062,7 @@ def proposal( :param image_shape: The 1D input tensor with 3 or 4 elements describing image shape. :param attrs: The dictionary containing key, value pairs for attributes. :param name: Optional name for the output node. - returns Node representing Proposal operation. + :return: Node representing Proposal operation. * base_size The size of the anchor to which scale and ratio attributes are applied. Range of values: a positive unsigned integer number @@ -2196,15 +2196,15 @@ def psroi_pooling( ) -> Node: """Return a node which produces a PSROIPooling operation. - :param input: Input feature map {N, C, ...} - :param coords: Coordinates of bounding boxes - :param output_dim: Output channel number - :param group_size: Number of groups to encode position-sensitive scores - :param spatial_scale: Ratio of input feature map over input image size - :param spatial_bins_x: Numbers of bins to divide the input feature maps over - :param spatial_bins_y: Numbers of bins to divide the input feature maps over - :param mode: Mode of pooling - "avg" or "bilinear" - returns PSROIPooling node + :param input: Input feature map `{N, C, ...}`. + :param coords: Coordinates of bounding boxes. + :param output_dim: Output channel number. + :param group_size: Number of groups to encode position-sensitive scores. + :param spatial_scale: Ratio of input feature map over input image size. + :param spatial_bins_x: Numbers of bins to divide the input feature maps over. + :param spatial_bins_y: Numbers of bins to divide the input feature maps over. + :param mode: Mode of pooling - "avg" or "bilinear". + :return: PSROIPooling node """ mode = mode.lower() return _get_node_factory_opset1().create( @@ -2225,11 +2225,11 @@ def psroi_pooling( def range(start: Node, stop: NodeInput, step: NodeInput, name: Optional[str] = None) -> Node: """Return a node which produces the Range operation. - :param start: The start value of the generated range - :param stop: The stop value of the generated range - :param step: The step value for the generated range + :param start: The start value of the generated range. + :param stop: The stop value of the generated range. + :param step: The step value for the generated range. :param name: Optional name for output node. - returns Range node + :return: Range node """ return _get_node_factory_opset1().create("Range", as_nodes(start, stop, step)) @@ -2240,7 +2240,7 @@ def relu(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: The optional output node name. - returns The new node performing relu operation on its input element-wise. + :return: The new node performing relu operation on its input element-wise. """ return _get_node_factory_opset1().create("Relu", [node]) @@ -2253,9 +2253,9 @@ def reduce_logical_and( :param node: The tensor we want to reduce. :param reduction_axes: The axes to eliminate through AND operation. - :param keep_dims: If set to True it holds axes that are used for reduction + :param keep_dims: If set to True it holds axes that are used for reduction. :param name: Optional name for output node. - returns The new node performing reduction operation. + :return: The new node performing reduction operation. """ return _get_node_factory_opset1().create( "ReduceLogicalAnd", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -2270,9 +2270,9 @@ def reduce_logical_or( :param node: The tensor we want to reduce. :param reduction_axes: The axes to eliminate through OR operation. - :param keep_dims: If set to True it holds axes that are used for reduction + :param keep_dims: If set to True it holds axes that are used for reduction. :param name: Optional name for output node. - returns The new node performing reduction operation. + :return: The new node performing reduction operation. """ return _get_node_factory_opset1().create( "ReduceLogicalOr", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -2287,7 +2287,7 @@ def reduce_max( :param node: The tensor we want to max-reduce. :param reduction_axes: The axes to eliminate through max operation. - :param keep_dims: If set to True it holds axes that are used for reduction + :param keep_dims: If set to True it holds axes that are used for reduction. :param name: Optional name for output node. """ return _get_node_factory_opset1().create( @@ -2303,9 +2303,9 @@ def reduce_mean( :param node: The tensor we want to mean-reduce. :param reduction_axes: The axes to eliminate through mean operation. - :param keep_dims: If set to True it holds axes that are used for reduction + :param keep_dims: If set to True it holds axes that are used for reduction. :param name: Optional name for output node. - returns The new node performing mean-reduction operation. + :return: The new node performing mean-reduction operation. """ return _get_node_factory_opset1().create( "ReduceMean", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -2338,7 +2338,7 @@ def reduce_prod( :param reduction_axes: The axes to eliminate through product operation. :param keep_dims: If set to True it holds axes that are used for reduction :param name: Optional name for output node. - returns The new node performing product-reduction operation. + :return: The new node performing product-reduction operation. """ return _get_node_factory_opset1().create( "ReduceProd", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -2355,7 +2355,7 @@ def reduce_sum( :param reduction_axes: The axes to eliminate through summation. :param keep_dims: If set to True it holds axes that are used for reduction :param name: The optional new name for output node. - returns The new node performing summation along `reduction_axes` element-wise. + :return: The new node performing summation along `reduction_axes` element-wise. """ return _get_node_factory_opset1().create( "ReduceSum", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -2387,7 +2387,7 @@ def region_yolo( :param end_axis: Axis to end softmax on :param anchors: A flattened list of pairs `[width, height]` that describes prior box sizes :param name: Optional name for output node. - returns RegionYolo node + :return: RegionYolo node """ if anchors is None: anchors = [] @@ -2434,7 +2434,7 @@ def result(data: NodeInput, name: Optional[str] = None) -> Node: """Return a node which represents an output of a graph (Model). :param data: The tensor containing the input data - returns Result node + :return: Result node """ return _get_node_factory_opset1().create("Result", [data]) @@ -2453,7 +2453,7 @@ def reverse_sequence( :param seq_lengths: 1D tensor of integers with sequence lengths in the input tensor. :param batch_axis: index of the batch dimension. :param seq_axis: index of the sequence dimension. - returns ReverseSequence node + :return: ReverseSequence node """ return _get_node_factory_opset1().create( "ReverseSequence", @@ -2479,7 +2479,7 @@ def select( item value is `False`. :param auto_broadcast: Mode specifies rules used for auto-broadcasting of input tensors. :param name: The optional new name for output node. - returns The new node with values selected according to provided arguments. + :return: The new node with values selected according to provided arguments. """ inputs = as_nodes(cond, then_node, else_node) return _get_node_factory_opset1().create( @@ -2499,7 +2499,7 @@ def selu( :param alpha: Alpha coefficient of SELU operation :param lambda_value: Lambda coefficient of SELU operation :param name: The optional output node name. - returns The new node performing relu operation on its input element-wise. + :return: The new node performing relu operation on its input element-wise. """ return _get_node_factory_opset1().create("Selu", as_nodes(data, alpha, lambda_value)) @@ -2509,7 +2509,7 @@ def shape_of(data: NodeInput, name: Optional[str] = None) -> Node: """Return a node which produces a tensor containing the shape of its input data. :param data: The tensor containing the input data. - returns ShapeOf node + :return: ShapeOf node """ return _get_node_factory_opset1().create("ShapeOf", [as_node(data)]) @@ -2519,7 +2519,7 @@ def sigmoid(data: NodeInput, name: Optional[str] = None) -> Node: """Return a node which applies the sigmoid function element-wise. :param data: The tensor containing the input data - returns Sigmoid node + :return: Sigmoid node """ return _get_node_factory_opset1().create("Sigmoid", [data]) @@ -2530,7 +2530,7 @@ def sign(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: The optional new name for output node. - returns The node with mapped elements of the input tensor to -1 (if it is negative), + :return: The node with mapped elements of the input tensor to -1 (if it is negative), 0 (if it is zero), or 1 (if it is positive). """ return _get_node_factory_opset1().create("Sign", [node]) @@ -2542,7 +2542,7 @@ def sin(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with sin operation applied on it. + :return: New node with sin operation applied on it. """ return _get_node_factory_opset1().create("Sin", [node]) @@ -2553,7 +2553,7 @@ def sinh(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with sin operation applied on it. + :return: New node with sin operation applied on it. """ return _get_node_factory_opset1().create("Sinh", [node]) @@ -2564,7 +2564,7 @@ def softmax(data: NodeInput, axis: int, name: Optional[str] = None) -> Node: :param data: The tensor providing input data. :param axis: An axis along which Softmax should be calculated - returns The new node with softmax operation applied on each element. + :return: The new node with softmax operation applied on each element. """ return _get_node_factory_opset1().create("Softmax", [as_node(data)], {"axis": axis}) @@ -2574,7 +2574,7 @@ def space_to_depth(data: Node, mode: str, block_size: int = 1, name: str = None) """Perform SpaceToDepth operation on the input tensor. SpaceToDepth rearranges blocks of spatial data into depth. - The operator returns a copy of the input tensor where values from the height + The operator :return: a copy of the input tensor where values from the height and width dimensions are moved to the depth dimension. :param data: The node with data tensor. @@ -2585,7 +2585,7 @@ def space_to_depth(data: Node, mode: str, block_size: int = 1, name: str = None) :param block_size: The size of the block of values to be moved. Scalar value. :param name: Optional output node name. - returns The new node performing a SpaceToDepth operation on input tensor. + :return: The new node performing a SpaceToDepth operation on input tensor. """ return _get_node_factory_opset1().create( "SpaceToDepth", [data], {"mode": mode, "block_size": block_size}, @@ -2599,7 +2599,7 @@ def split(data: NodeInput, axis: NodeInput, num_splits: int, name: Optional[str] :param data: The input tensor to be split :param axis: Axis along which the input data will be split :param num_splits: Number of the output tensors that should be produced - returns Split node + :return: Split node """ return _get_node_factory_opset1().create( "Split", @@ -2614,7 +2614,7 @@ def sqrt(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns The new node with sqrt operation applied element-wise. + :return: The new node with sqrt operation applied element-wise. """ return _get_node_factory_opset1().create("Sqrt", [node]) @@ -2632,7 +2632,7 @@ def squared_difference( :param auto_broadcast: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: numpy, explicit. :param name: Optional new name for output node. - returns The new node performing a squared difference between two tensors. + :return: The new node performing a squared difference between two tensors. """ return _get_node_factory_opset1().create( "SquaredDifference", [x1, x2], {"auto_broadcast": auto_broadcast.upper()} @@ -2647,7 +2647,7 @@ def squeeze(data: NodeInput, axes: NodeInput, name: Optional[str] = None) -> Nod :param axes: List of non-negative integers, indicate the dimensions to squeeze. One of: input node or array. :param name: Optional new name for output node. - returns The new node performing a squeeze operation on input tensor. + :return: The new node performing a squeeze operation on input tensor. Remove single-dimensional entries from the shape of a tensor. Takes a parameter `axes` with a list of axes to squeeze. @@ -2690,7 +2690,7 @@ def strided_slice( :param new_axis_mask: A mask indicating dimensions where '1' should be inserted :param shrink_axis_mask: A mask indicating which dimensions should be deleted :param ellipsis_mask: Indicates positions where missing dimensions should be inserted - returns StridedSlice node + :return: StridedSlice node """ if new_axis_mask is None: new_axis_mask = [] @@ -2725,7 +2725,7 @@ def subtract( :param auto_broadcast: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: numpy, explicit. :param name: The optional name for output node. - returns The new output node performing subtraction operation on both tensors element-wise. + :return: The new output node performing subtraction operation on both tensors element-wise. """ return _get_node_factory_opset1().create( "Subtract", [left_node, right_node], {"auto_broadcast": auto_broadcast.upper()} @@ -2738,7 +2738,7 @@ def tan(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with tan operation applied on it. + :return: New node with tan operation applied on it. """ return _get_node_factory_opset1().create("Tan", [node]) diff --git a/src/bindings/python/src/openvino/runtime/opset2/ops.py b/src/bindings/python/src/openvino/runtime/opset2/ops.py index 9d863962c8d..c833e3a31ea 100644 --- a/src/bindings/python/src/openvino/runtime/opset2/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset2/ops.py @@ -54,7 +54,7 @@ def batch_to_space( :param crops_begin: Specifies the amount to crop from the beginning along each axis of `data`. :param crops_end: Specifies the amount to crop from the end along each axis of `data`. :param name: Optional output node name. - returns The new node performing a BatchToSpace operation. + :return: The new node performing a BatchToSpace operation. """ return _get_node_factory_opset2().create( "BatchToSpace", as_nodes(data, block_shape, crops_begin, crops_end) @@ -73,7 +73,7 @@ def gelu(node: NodeInput, name: Optional[str] = None) -> Node: :param node: Input tensor. One of: input node, array or scalar. :param name: Optional output node name. - returns The new node performing a GELU operation on its input data element-wise. + :return: The new node performing a GELU operation on its input data element-wise. """ return _get_node_factory_opset2().create("Gelu", [node]) @@ -96,9 +96,9 @@ def mvn( :param across_channels: Denotes if mean values are shared across channels. :param normalize_variance: Denotes whether to perform variance normalization. :param eps: The number added to the variance to avoid division by zero - when normalizing the value. Scalar value. + when normalizing the value. Scalar value. :param name: Optional output node name. - returns The new node performing a MVN operation on input tensor. + :return: The new node performing a MVN operation on input tensor. """ return _get_node_factory_opset2().create( "MVN", @@ -111,10 +111,10 @@ def mvn( def reorg_yolo(input: Node, stride: List[int], name: Optional[str] = None) -> Node: """Return a node which produces the ReorgYolo operation. - :param input: Input data - :param stride: Stride to reorganize input by + :param input: Input data. + :param stride: Stride to reorganize input by. :param name: Optional name for output node. - returns ReorgYolo node + :return: ReorgYolo node. """ return _get_node_factory_opset2().create("ReorgYolo", [input], {"stride": stride}) @@ -130,12 +130,12 @@ def roi_pooling( ) -> Node: """Return a node which produces an ROIPooling operation. - :param input: Input feature map {N, C, ...} - :param coords: Coordinates of bounding boxes - :param output_size: Height/Width of ROI output features (shape) - :param spatial_scale: Ratio of input feature map over input image size (float) - :param method: Method of pooling - string: "max" or "bilinear" - returns ROIPooling node + :param input: Input feature map `{N, C, ...}`. + :param coords: Coordinates of bounding boxes. + :param output_size: Height/Width of ROI output features (shape). + :param spatial_scale: Ratio of input feature map over input image size (float). + :param method: Method of pooling - string: "max" or "bilinear". + :return: ROIPooling node. """ method = method.lower() return _get_node_factory_opset2().create( @@ -164,7 +164,7 @@ def space_to_batch( :param pads_begin: Specifies the padding for the beginning along each axis of `data`. :param pads_end: Specifies the padding for the ending along each axis of `data`. :param name: Optional output node name. - returns The new node performing a SpaceToBatch operation. + :return: The new node performing a SpaceToBatch operation. """ return _get_node_factory_opset2().create( "SpaceToBatch", as_nodes(data, block_shape, pads_begin, pads_end) diff --git a/src/bindings/python/src/openvino/runtime/opset3/ops.py b/src/bindings/python/src/openvino/runtime/opset3/ops.py index 6afefdf20fb..47f745228ae 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset3/ops.py @@ -44,7 +44,7 @@ def assign(new_value: NodeInput, variable_id: str, name: Optional[str] = None) - :param new_value: Node producing a value to be assigned to a variable. :param variable_id: Id of a variable to be updated. :param name: Optional name for output node. - returns Assign node + :return: Assign node """ return _get_node_factory_opset3().create( "Assign", @@ -70,7 +70,7 @@ def broadcast( :param broadcast_spec: The type of broadcasting that specifies mapping of input tensor axes to output shape axes. Range of values: NUMPY, EXPLICIT, BIDIRECTIONAL. :param name: Optional new name for output node. - returns New node with broadcast shape. + :return: New node with broadcast shape. """ inputs = as_nodes(data, target_shape) if broadcast_spec.upper() == "EXPLICIT": @@ -96,7 +96,7 @@ def bucketize( :param with_right_bound: indicates whether bucket includes the right or left edge of interval. default true = includes right edge :param name: Optional name for output node. - returns Bucketize node + :return: Bucketize node """ return _get_node_factory_opset3().create( "Bucketize", @@ -119,7 +119,7 @@ def cum_sum( :param axis: zero dimension tensor specifying axis position along which sum will be performed. :param exclusive: if set to true, the top element is not included :param reverse: if set to true, will perform the sums in reverse direction - returns New node performing the operation + :return: New node performing the operation """ return _get_node_factory_opset3().create( "CumSum", as_nodes(arg, axis), {"exclusive": exclusive, "reverse": reverse} @@ -143,7 +143,7 @@ def embedding_bag_offsets_sum( :param per_sample_weights: Tensor with weights for each sample. :param default_index: Scalar containing default index in embedding table to fill empty bags. :param name: Optional name for output node. - returns The new node which performs EmbeddingBagOffsetsSum + :return: The new node which performs EmbeddingBagOffsetsSum """ inputs = [emb_table, as_node(indices), as_node(offsets)] if per_sample_weights is not None: @@ -171,7 +171,7 @@ def embedding_bag_packed_sum( :param indices: Tensor with indices. :param per_sample_weights: Weights to be multiplied with embedding table. :param name: Optional name for output node. - returns EmbeddingBagPackedSum node + :return: EmbeddingBagPackedSum node """ inputs = [as_node(emb_table), as_node(indices)] if per_sample_weights is not None: @@ -202,7 +202,7 @@ def embedding_segments_sum( :param default_index: Scalar containing default index in embedding table to fill empty bags. :param per_sample_weights: Weights to be multiplied with embedding table. :param name: Optional name for output node. - returns EmbeddingSegmentsSum node + :return: EmbeddingSegmentsSum node """ inputs = [as_node(emb_table), as_node(indices), as_node(segment_ids)] if per_sample_weights is not None: @@ -235,7 +235,7 @@ def extract_image_patches( :param rates: Element seleciton rate for creating a patch. :param auto_pad: Padding type. :param name: Optional name for output node. - returns ExtractImagePatches node + :return: ExtractImagePatches node """ return _get_node_factory_opset3().create( "ExtractImagePatches", @@ -288,7 +288,7 @@ def gru_cell( :param linear_before_reset: Flag denotes if the layer behaves according to the modification of GRUCell described in the formula in the ONNX documentation. :param name: Optional output node name. - returns The new node performing a GRUCell operation on tensor from input node. + :return: The new node performing a GRUCell operation on tensor from input node. """ if activations is None: activations = ["sigmoid", "tanh"] @@ -333,7 +333,7 @@ def non_max_suppression( :param sort_result_descending: Flag that specifies whenever it is necessary to sort selected boxes across batches or not. :param output_type: Output element type. - returns The new node which performs NonMaxSuppression + :return: The new node which performs NonMaxSuppression """ if max_output_boxes_per_class is None: max_output_boxes_per_class = make_constant_node(0, np.int64) @@ -359,7 +359,7 @@ def non_zero(data: NodeInput, output_type: str = "i64", name: Optional[str] = No :param data: Input data. :param output_type: Output tensor type. - returns The new node which performs NonZero + :return: The new node which performs NonZero """ return _get_node_factory_opset3().create( "NonZero", @@ -375,7 +375,7 @@ def read_value(init_value: NodeInput, variable_id: str, name: Optional[str] = No :param init_value: Node producing a value to be returned instead of an unassigned variable. :param variable_id: Id of a variable to be read. :param name: Optional name for output node. - returns ReadValue node + :return: ReadValue node """ return _get_node_factory_opset3().create( "ReadValue", @@ -422,7 +422,7 @@ def rnn_cell( :param clip: The value defining clipping range [-clip, clip] on input of activation functions. :param name: Optional output node name. - returns The new node performing a RNNCell operation on tensor from input node. + :return: The new node performing a RNNCell operation on tensor from input node. """ if activations is None: activations = ["tanh"] @@ -467,7 +467,7 @@ def roi_align( :param spatial_scale: Multiplicative spatial scale factor to translate ROI coordinates. :param mode: Method to perform pooling to produce output feature map elements. - returns The new node which performs ROIAlign + :return: The new node which performs ROIAlign """ inputs = as_nodes(data, rois, batch_indices) attributes = { @@ -494,7 +494,7 @@ def scatter_elements_update( :param indices: The tensor with indexes which will be updated. :param updates: The tensor with update values. :param axis: The axis for scatter. - returns ScatterElementsUpdate node + :return: ScatterElementsUpdate node ScatterElementsUpdate creates a copy of the first input tensor with updated elements specified with second and third input tensors. @@ -523,7 +523,7 @@ def scatter_update( :param indices: The tensor with indexes which will be updated. :param updates: The tensor with update values. :param axis: The axis at which elements will be updated. - returns ScatterUpdate node + :return: ScatterUpdate node """ return _get_node_factory_opset3().create( "ScatterUpdate", @@ -537,7 +537,7 @@ def shape_of(data: NodeInput, output_type: str = "i64", name: Optional[str] = No :param data: The tensor containing the input data. :param output_type: Output element type. - returns ShapeOf node + :return: ShapeOf node """ return _get_node_factory_opset3().create( "ShapeOf", @@ -557,7 +557,7 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No :param group: The channel dimension specified by the axis parameter should be split into this number of groups. :param name: Optional output node name. - returns The new node performing a permutation on data in the channel dimension + :return: The new node performing a permutation on data in the channel dimension of the input tensor. The operation is the equivalent with the following transformation of the input tensor @@ -617,7 +617,7 @@ def topk( :param mode: Compute TopK largest ('max') or smallest ('min') :param sort: Order of output elements (sort by: 'none', 'index' or 'value') :param index_element_type: Type of output tensor with indices. - returns The new node which performs TopK (both indices and values) + :return: The new node which performs TopK (both indices and values) """ return _get_node_factory_opset3().create( "TopK", diff --git a/src/bindings/python/src/openvino/runtime/opset4/ops.py b/src/bindings/python/src/openvino/runtime/opset4/ops.py index 8ee3ffef925..3c16762e549 100644 --- a/src/bindings/python/src/openvino/runtime/opset4/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset4/ops.py @@ -59,7 +59,7 @@ def ctc_loss( :param preprocess_collapse_repeated: Flag for preprocessing labels before loss calculation. :param ctc_merge_repeated: Flag for merging repeated characters in a potential alignment. :param unique: Flag to find unique elements in a target. - returns The new node which performs CTCLoss + :return: The new node which performs CTCLoss """ if blank_index is not None: inputs = as_nodes(logits, logit_length, labels, label_length, blank_index) @@ -99,7 +99,7 @@ def non_max_suppression( :param sort_result_descending: Flag that specifies whenever it is necessary to sort selected boxes across batches or not. :param output_type: Output element type. - returns The new node which performs NonMaxSuppression + :return: The new node which performs NonMaxSuppression """ if max_output_boxes_per_class is None: max_output_boxes_per_class = make_constant_node(0, np.int64) @@ -123,7 +123,7 @@ def softplus(data: NodeInput, name: Optional[str] = None) -> Node: """Apply SoftPlus operation on each element of input tensor. :param data: The tensor providing input data. - returns The new node with SoftPlus operation applied on each element. + :return: The new node with SoftPlus operation applied on each element. """ return _get_node_factory_opset4().create("SoftPlus", as_nodes(data), {}) @@ -133,7 +133,7 @@ def mish(data: NodeInput, name: Optional[str] = None,) -> Node: """Return a node which performs Mish. :param data: Tensor with input data floating point type. - returns The new node which performs Mish + :return: The new node which performs Mish """ return _get_node_factory_opset4().create("Mish", as_nodes(data), {}) @@ -143,7 +143,7 @@ def hswish(data: NodeInput, name: Optional[str] = None,) -> Node: """Return a node which performs HSwish (hard version of Swish). :param data: Tensor with input data floating point type. - returns The new node which performs HSwish + :return: The new node which performs HSwish """ return _get_node_factory_opset4().create("HSwish", as_nodes(data), {}) @@ -157,7 +157,7 @@ def swish( """Return a node which performing Swish activation function Swish(x, beta=1.0) = x * sigmoid(x * beta)). :param data: Tensor with input data floating point type. - returns The new node which performs Swish + :return: The new node which performs Swish """ if beta is None: beta = make_constant_node(1.0, np.float32) @@ -170,7 +170,7 @@ def acosh(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with arccosh operation applied on it. + :return: New node with arccosh operation applied on it. """ return _get_node_factory_opset4().create("Acosh", [node]) @@ -181,7 +181,7 @@ def asinh(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with arcsinh operation applied on it. + :return: New node with arcsinh operation applied on it. """ return _get_node_factory_opset4().create("Asinh", [node]) @@ -192,7 +192,7 @@ def atanh(node: NodeInput, name: Optional[str] = None) -> Node: :param node: One of: input node, array or scalar. :param name: Optional new name for output node. - returns New node with arctanh operation applied on it. + :return: New node with arctanh operation applied on it. """ return _get_node_factory_opset4().create("Atanh", [node]) @@ -292,7 +292,7 @@ def proposal( } Optional attributes which are absent from dictionary will be set with corresponding default. - returns Node representing Proposal operation. + :return: Node representing Proposal operation. """ requirements = [ ("base_size", True, np.unsignedinteger, is_positive_value), @@ -328,7 +328,7 @@ def reduce_l1( :param reduction_axes: The axes to eliminate through mean operation. :param keep_dims: If set to True it holds axes that are used for reduction :param name: Optional name for output node. - returns The new node performing mean-reduction operation. + :return: The new node performing mean-reduction operation. """ return _get_node_factory_opset4().create( "ReduceL1", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -345,7 +345,7 @@ def reduce_l2( :param reduction_axes: The axes to eliminate through mean operation. :param keep_dims: If set to True it holds axes that are used for reduction :param name: Optional name for output node. - returns The new node performing mean-reduction operation. + :return: The new node performing mean-reduction operation. """ return _get_node_factory_opset4().create( "ReduceL2", as_nodes(node, reduction_axes), {"keep_dims": keep_dims} @@ -382,7 +382,7 @@ def lstm_cell( :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. :param name: An optional name of the output node. - returns The new node represents LSTMCell. Node outputs count: 2. + :return: The new node represents LSTMCell. Node outputs count: 2. """ if activations is None: activations = ["sigmoid", "tanh", "tanh"] diff --git a/src/bindings/python/src/openvino/runtime/opset5/ops.py b/src/bindings/python/src/openvino/runtime/opset5/ops.py index b024210f4a8..f02200cf3ae 100644 --- a/src/bindings/python/src/openvino/runtime/opset5/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset5/ops.py @@ -57,7 +57,7 @@ def batch_norm_inference( :param epsilon: The number to be added to the variance to avoid division by zero when normalizing a value. :param name: The optional name of the output node. - @return: The new node which performs BatchNormInference. + :return: The new node which performs BatchNormInference. """ inputs = as_nodes(data, gamma, beta, mean, variance) return _get_node_factory_opset5().create("BatchNormInference", inputs, {"epsilon": epsilon}) @@ -75,7 +75,7 @@ def gather_nd( :param data: N-D tensor with data for gathering :param indices: K-D tensor of tuples with indices by which data is gathered :param batch_dims: Scalar value of batch dimensions - @return: The new node which performs GatherND + :return: The new node which performs GatherND """ inputs = as_nodes(data, indices) @@ -92,7 +92,7 @@ def log_softmax(data: NodeInput, axis: int, name: Optional[str] = None) -> Node: :param data: The tensor providing input data. :param axis: An axis along which LogSoftmax should be calculated - @return: The new node with LogSoftmax operation applied on each element. + :return: The new node with LogSoftmax operation applied on each element. """ return _get_node_factory_opset5().create("LogSoftmax", [as_node(data)], {"axis": axis}) @@ -123,7 +123,7 @@ def non_max_suppression( :param sort_result_descending: Flag that specifies whenever it is necessary to sort selected boxes across batches or not. :param output_type: Output element type. - @return: The new node which performs NonMaxSuppression + :return: The new node which performs NonMaxSuppression """ if max_output_boxes_per_class is None: max_output_boxes_per_class = make_constant_node(0, np.int64) @@ -158,7 +158,7 @@ def round(data: NodeInput, mode: str = "half_to_even", name: Optional[str] = Non integer or rounding in such a way that the result heads away from zero if `mode` attribute is 'half_away_from_zero`. :param name: An optional name of the output node. - @return: The new node with Round operation applied on each element. + :return: The new node with Round operation applied on each element. """ return _get_node_factory_opset5().create("Round", as_nodes(data), {"mode": mode.upper()}) @@ -205,7 +205,7 @@ def lstm_sequence( :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. :param name: An optional name of the output node. - @return: The new node represents LSTMSequence. Node outputs count: 3. + :return: The new node represents LSTMSequence. Node outputs count: 3. """ if activations is None: activations = ["sigmoid", "tanh", "tanh"] @@ -231,7 +231,7 @@ def hsigmoid(data: NodeInput, name: Optional[str] = None,) -> Node: """Return a node which performs HSigmoid. :param data: Tensor with input data floating point type. - @return: The new node which performs HSigmoid + :return: The new node which performs HSigmoid """ return _get_node_factory_opset5().create("HSigmoid", as_nodes(data), {}) @@ -277,7 +277,7 @@ def gru_sequence( of GRU described in the formula in the ONNX documentation. :param name: An optional name of the output node. - @return: The new node represents GRUSequence. Node outputs count: 2. + :return: The new node represents GRUSequence. Node outputs count: 2. """ if activations is None: activations = ["sigmoid", "tanh"] @@ -337,7 +337,7 @@ def rnn_sequence( :param clip: Specifies bound values [-C, C] for tensor clipping performed before activations. :param name: An optional name of the output node. - @return: The new node represents RNNSequence. Node outputs count: 2. + :return: The new node represents RNNSequence. Node outputs count: 2. """ if activations is None: activations = ["tanh"] diff --git a/src/bindings/python/src/openvino/runtime/opset6/ops.py b/src/bindings/python/src/openvino/runtime/opset6/ops.py index 08d62ea1b3b..45b295a216e 100644 --- a/src/bindings/python/src/openvino/runtime/opset6/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset6/ops.py @@ -53,7 +53,7 @@ def ctc_greedy_decoder_seq_len( :param sequence_length: Input 1D tensor with sequence length. Shape: [batch_size] :param blank_index: Scalar or 1D tensor with specifies the class index to use for the blank class. Optional parameter. Default value is num_classes-1. - @return: The new node which performs CTCGreedyDecoderSeqLen. + :return: The new node which performs CTCGreedyDecoderSeqLen. """ if blank_index is not None: inputs = as_nodes(data, sequence_length, blank_index) @@ -81,7 +81,7 @@ def gather_elements( :param data: N-D tensor with data for gathering :param indices: N-D tensor with indices by which data is gathered :param axis: axis along which elements are gathered - @return: The new node which performs GatherElements + :return: The new node which performs GatherElements """ inputs = as_nodes(data, indices) @@ -110,7 +110,7 @@ def mvn( when normalizing the value. Scalar value. :param eps_mode: how eps is applied (`inside_sqrt` or `outside_sqrt`) :param name: Optional output node name. - returns The new node performing a MVN operation on input tensor. + :return: The new node performing a MVN operation on input tensor. """ inputs = as_nodes(data, axes) @@ -130,7 +130,7 @@ def assign(new_value: NodeInput, variable_id: str, name: Optional[str] = None) - :param new_value: Node producing a value to be assigned to a variable. :param variable_id: Id of a variable to be updated. :param name: Optional name for output node. - returns Assign node + :return: Assign node """ return _get_node_factory_opset6().create( "Assign", @@ -146,7 +146,7 @@ def read_value(init_value: NodeInput, variable_id: str, name: Optional[str] = No :param init_value: Node producing a value to be returned instead of an unassigned variable. :param variable_id: Id of a variable to be read. :param name: Optional name for output node. - returns ReadValue node + :return: ReadValue node """ return _get_node_factory_opset6().create( "ReadValue", diff --git a/src/bindings/python/src/openvino/runtime/opset7/ops.py b/src/bindings/python/src/openvino/runtime/opset7/ops.py index b07772fb572..f5ee112beab 100644 --- a/src/bindings/python/src/openvino/runtime/opset7/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset7/ops.py @@ -46,7 +46,7 @@ def einsum( :param inputs: The list of input nodes :param equation: Einsum equation - @return: The new node performing Einsum operation on the inputs + :return: The new node performing Einsum operation on the inputs """ attributes = { "equation": equation @@ -66,7 +66,7 @@ def gelu( :param data: The node with data tensor. :param approximation_mode: defines which approximation to use ('tanh' or 'erf') :param name: Optional output node name. - returns The new node performing a Gelu activation with the input tensor. + :return: The new node performing a Gelu activation with the input tensor. """ inputs = as_nodes(data) @@ -88,7 +88,7 @@ def roll( :param data: The node with data tensor. :param shift: The node with the tensor with numbers of places by which elements are shifted. :param axes: The node with the tensor with axes along which elements are shifted. - returns The new node performing a Roll operation on the input tensor. + :return: The new node performing a Roll operation on the input tensor. """ inputs = as_nodes(data, shift, axes) @@ -108,7 +108,7 @@ def gather( :param indices: N-D tensor with indices by which data is gathered :param axis: axis along which elements are gathered :param batch_dims: number of batch dimensions - @return: The new node which performs Gather + :return: The new node which performs Gather """ inputs = as_nodes(data, indices, axis) attributes = { @@ -127,7 +127,7 @@ def dft( :param data: Tensor with transformed data. :param axes: Tensor with axes to transform. :param signal_size: Tensor specifying signal size with respect to axes from the input 'axes'. - @return: The new node which performs DFT operation on the input data tensor. + :return: The new node which performs DFT operation on the input data tensor. """ if signal_size is None: inputs = as_nodes(data, axes) @@ -148,7 +148,7 @@ def idft( :param data: Tensor with transformed data. :param axes: Tensor with axes to transform. :param signal_size: Tensor specifying signal size with respect to axes from the input 'axes'. - @return: The new node which performs IDFT operation on the input data tensor. + :return: The new node which performs IDFT operation on the input data tensor. """ if signal_size is None: inputs = as_nodes(data, axes) diff --git a/src/bindings/python/src/openvino/runtime/opset8/ops.py b/src/bindings/python/src/openvino/runtime/opset8/ops.py index 0a809f00bf5..0aa381592fb 100644 --- a/src/bindings/python/src/openvino/runtime/opset8/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset8/ops.py @@ -62,7 +62,7 @@ def deformable_convolution( :param bilinear_interpolation_pad: The flag that determines the mode of bilinear interpolation execution. :param name: The optional new name for output node. - returns New node performing deformable convolution operation. + :return: New node performing deformable convolution operation. """ if mask is None: inputs = as_nodes(data, offsets, filters) @@ -94,7 +94,7 @@ def adaptive_avg_pool( :param data: The list of input nodes :param output_shape: the shape of spatial dimentions after operation - @return: The new node performing AdaptiveAvgPool operation on the data + :return: The new node performing AdaptiveAvgPool operation on the data """ inputs = as_nodes(data, output_shape) return _get_node_factory_opset8().create("AdaptiveAvgPool", inputs) @@ -111,7 +111,7 @@ def adaptive_max_pool( :param data: The list of input nodes :param output_shape: the shape of spatial dimentions after operation :param index_element_type: Type of indices output. - @return: The new node performing AdaptiveMaxPool operation on the data + :return: The new node performing AdaptiveMaxPool operation on the data """ inputs = as_nodes(data, output_shape) @@ -158,7 +158,7 @@ def multiclass_nms( :param background_class: Specifies the background class id, -1 meaning to keep all classes :param nms_eta: Specifies eta parameter for adpative NMS, in close range [0, 1.0] :param normalized: Specifies whether boxes are normalized or not - @return: The new node which performs MuticlassNms + :return: The new node which performs MuticlassNms """ inputs = as_nodes(boxes, scores) @@ -218,7 +218,7 @@ def matrix_nms( :param post_threshold: Specifies threshold to filter out boxes with low confidence score after decaying :param normalized: Specifies whether boxes are normalized or not - @return: The new node which performs MatrixNms + :return: The new node which performs MatrixNms """ inputs = as_nodes(boxes, scores) @@ -253,7 +253,7 @@ def gather( indicate reverse indexing from the end :param axis: axis along which elements are gathered :param batch_dims: number of batch dimensions - @return: The new node which performs Gather + :return: The new node which performs Gather """ inputs = as_nodes(data, indices, axis) attributes = { @@ -296,7 +296,7 @@ def max_pool( starting at the provided axis. Defaults to 0. :param name: The optional name for the created output node. - returns The new node performing max pooling operation. + :return: The new node performing max pooling operation. """ if auto_pad is None: auto_pad = "explicit" @@ -335,7 +335,7 @@ def random_uniform( 'i64', 'i32', 'f64', 'f32', 'f16', 'bf16'. :param global_seed: Specifies global seed value. Required to be a positive integer or 0. :param op_seed: Specifies operational seed value. Required to be a positive integer or 0. - returns The new node which performs generation of random values from uniform distribution. + :return: The new node which performs generation of random values from uniform distribution. """ inputs = as_nodes(output_shape, min_val, max_val) @@ -370,7 +370,7 @@ def slice( :param step: The node providing step values. :param axes: The optional node providing axes to slice, default [0, 1, ..., len(start)-1]. :param name: The optional name for the created output node. - returns The new node performing Slice operation. + :return: The new node performing Slice operation. """ if axes is None: inputs = as_nodes(data, start, stop, step) @@ -392,7 +392,7 @@ def gather_nd( :param data: N-D tensor with data for gathering :param indices: K-D tensor of tuples with indices by which data is gathered :param batch_dims: Scalar value of batch dimensions - @return: The new node which performs GatherND + :return: The new node which performs GatherND """ inputs = as_nodes(data, indices) @@ -413,7 +413,7 @@ def prior_box( :param image_shape: Shape of image to which prior boxes are scaled. :param attrs: The dictionary containing key, value pairs for attributes. :param name: Optional name for the output node. - returns Node representing prior box operation. + :return: Node representing prior box operation. Available attributes are: * min_size The minimum box size (in pixels). Range of values: positive floating point numbers @@ -524,7 +524,7 @@ def i420_to_bgr( :param arg_u: The node providing U plane data. Required for separate planes. :param arg_v: The node providing V plane data. Required for separate planes. :param name: The optional name for the created output node. - returns The new node performing I420toBGR operation. + :return: The new node performing I420toBGR operation. """ if arg_u is None and arg_v is None: inputs = as_nodes(arg) @@ -551,7 +551,7 @@ def i420_to_rgb( :param arg_u: The node providing U plane data. Required for separate planes. :param arg_v: The node providing V plane data. Required for separate planes. :param name: The optional name for the created output node. - returns The new node performing I420toRGB operation. + :return: The new node performing I420toRGB operation. """ if arg_u is None and arg_v is None: inputs = as_nodes(arg) @@ -576,7 +576,7 @@ def nv12_to_bgr( :param arg: The node providing single or Y plane data. :param arg_uv: The node providing UV plane data. Required for separate planes. :param name: The optional name for the created output node. - returns The new node performing NV12toBGR operation. + :return: The new node performing NV12toBGR operation. """ if arg_uv is None: inputs = as_nodes(arg) @@ -597,7 +597,7 @@ def nv12_to_rgb( :param arg: The node providing single or Y plane data. :param arg_uv: The node providing UV plane data. Required for separate planes. :param name: The optional name for the created output node. - returns The new node performing NV12toRGB operation. + :return: The new node performing NV12toRGB operation. """ if arg_uv is None: inputs = as_nodes(arg) @@ -626,7 +626,7 @@ def detection_output( :param aux_class_preds: The 2D input tensor with additional class predictions information. :param aux_box_preds: The 2D input tensor with additional box predictions information. :param name: Optional name for the output node. - returns Node representing DetectionOutput operation. + :return: Node representing DetectionOutput operation. Available attributes are: * background_label_id The background label id. Range of values: integer value @@ -751,6 +751,6 @@ def softmax(data: NodeInput, axis: int, name: Optional[str] = None) -> Node: :param data: The tensor providing input data. :param axis: An axis along which Softmax should be calculated. Can be positive or negative. :param name: Optional name for the node. - returns The new node with softmax operation applied on each element. + :return: The new node with softmax operation applied on each element. """ return _get_node_factory_opset8().create("Softmax", [as_node(data)], {"axis": axis}) diff --git a/src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp b/src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp index 6d69aa81c6e..c6438228ea1 100644 --- a/src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp +++ b/src/bindings/python/src/pyopenvino/core/async_infer_queue.cpp @@ -202,7 +202,7 @@ void regclass_AsyncInferQueue(py::module m) { py::arg("inputs"), py::arg("userdata"), R"( - Run asynchronous inference using next available InferRequest. + Run asynchronous inference using the next available InferRequest. This function releases the GIL, so another Python thread can work while this function runs in the background. @@ -262,8 +262,8 @@ void regclass_AsyncInferQueue(py::module m) { }, R"( Sets unified callback on all InferRequests from queue's pool. - Signature of such function should have two arguments, where - first one is InferRequest object and second one is userdata + The signature of such function should have two arguments, where + the first one is InferRequest object and the second one is userdata connected to InferRequest from the AsyncInferQueue's pool. .. code-block:: python diff --git a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp index 884bcf9b0bf..768768a1872 100644 --- a/src/bindings/python/src/pyopenvino/core/compiled_model.cpp +++ b/src/bindings/python/src/pyopenvino/core/compiled_model.cpp @@ -53,11 +53,11 @@ void regclass_CompiledModel(py::module m) { py::arg("inputs"), R"( Infers specified input(s) in synchronous mode. - Blocks all methods of CompiledModel while request is running. + Blocks all methods of CompiledModel while the request is running. Method creates new temporary InferRequest and run inference on it. - It is advised to use dedicated InferRequest class for performance, - optimizing workflows and creating advanced pipelines. + It is advised to use a dedicated InferRequest class for performance, + optimizing workflows, and creating advanced pipelines. :param inputs: Data to set on input tensors. :type inputs: Dict[Union[int, str, openvino.runtime.ConstOutput], openvino.runtime.Tensor] @@ -108,10 +108,10 @@ void regclass_CompiledModel(py::module m) { R"( Exports the compiled model to bytes/output stream. - Advanced version of `export_model`. It utilizes, streams from standard + Advanced version of `export_model`. It utilizes, streams from the standard Python library `io`. - Function performs flushing of the stream, writes to it and then rewinds + Function performs flushing of the stream, writes to it, and then rewinds the stream to the beginning (using seek(0)). :param model_stream: A stream object to which the model will be serialized. @@ -168,12 +168,12 @@ void regclass_CompiledModel(py::module m) { R"( Gets runtime model information from a device. - This object (returned model) represents the internal device specific model - which is optimized for particular accelerator. It contains device specific nodes, - runtime information and can be used only to understand how the source model - is optimized and which kernels, element types and layouts are selected. + This object (returned model) represents the internal device-specific model + which is optimized for the particular accelerator. It contains device-specific nodes, + runtime information, and can be used only to understand how the source model + is optimized and which kernels, element types, and layouts are selected. - :return: Model containing Executable Graph information. + :return: Model, containing Executable Graph information. :rtype: openvino.runtime.Model )"); @@ -201,7 +201,7 @@ void regclass_CompiledModel(py::module m) { py::arg("index"), R"( Gets input of a compiled model identified by an index. - If an input with given index is not found, this method throws an exception. + If the input with given index is not found, this method throws an exception. :param index: An input index. :type index: int @@ -214,9 +214,9 @@ void regclass_CompiledModel(py::module m) { py::arg("tensor_name"), R"( Gets input of a compiled model identified by a tensor_name. - If an input with given tensor name is not found, this method throws an exception. + If the input with given tensor name is not found, this method throws an exception. - :param tensor_name: An input tensor's name. + :param tensor_name: An input tensor name. :type tensor_name: str :return: A compiled model input. :rtype: openvino.runtime.ConstOutput @@ -235,7 +235,7 @@ void regclass_CompiledModel(py::module m) { (ov::Output(ov::CompiledModel::*)() const) & ov::CompiledModel::output, R"( Gets a single output of a compiled model. - If a model has more than one output, this method throws an exception. + If the model has more than one output, this method throws an exception. :return: A compiled model output. :rtype: openvino.runtime.ConstOutput @@ -246,7 +246,7 @@ void regclass_CompiledModel(py::module m) { py::arg("index"), R"( Gets output of a compiled model identified by an index. - If an output with given index is not found, this method throws an exception. + If the output with given index is not found, this method throws an exception. :param index: An output index. :type index: int @@ -259,9 +259,9 @@ void regclass_CompiledModel(py::module m) { py::arg("tensor_name"), R"( Gets output of a compiled model identified by a tensor_name. - If an output with given tensor name is not found, this method throws an exception. + If the output with given tensor name is not found, this method throws an exception. - :param tensor_name: An output tensor's name. + :param tensor_name: An output tensor name. :type tensor_name: str :return: A compiled model output. :rtype: openvino.runtime.ConstOutput diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index 20b32618bdf..7163bbe129b 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -27,7 +27,7 @@ void regclass_Core(py::module m) { py::class_> cls(m, "Core"); cls.doc() = "openvino.runtime.Core class represents OpenVINO runtime Core entity. User applications can create several " - "Core class instances, but in this case the underlying plugins are created multiple times and not shared " + "Core class instances, but in this case, the underlying plugins are created multiple times and not shared " "between several Core instances. The recommended way is to have a single Core instance per application."; cls.def(py::init(), py::arg("xml_config_file") = ""); @@ -82,12 +82,12 @@ void regclass_Core(py::module m) { py::arg("config") = py::dict(), R"( Creates a compiled model from a source model object. - Users can create as many compiled models as they need and use them simultaneously + Users can create as many compiled models as they need, and use them simultaneously (up to the limitation of the hardware resources). :param model: Model acquired from read_model function. :type model: openvino.runtime.Model - :param device_name: Name of the device to load the model to. + :param device_name: Name of the device which will load the model. :type device_name: str :param properties: Optional dict of pairs: (property name, property value) relevant only for this load operation. :type properties: dict @@ -106,7 +106,7 @@ void regclass_Core(py::module m) { py::arg("config") = py::dict(), R"( Creates and loads a compiled model from a source model to the default OpenVINO device - selected by AUTO plugin. Users can create as many compiled models as they need and use + selected by AUTO plugin. Users can create as many compiled models as they need, and use them simultaneously (up to the limitation of the hardware resources). :param model: Model acquired from read_model function. @@ -216,8 +216,8 @@ void regclass_Core(py::module m) { :param model: A path to a model in IR / ONNX / PDPD format. :type model: str :param weights: A path to a data file For IR format (*.bin): if path is empty, - will try to read bin file with the same name as xml and if bin - file with the same name was not found, will load IR without weights. + it tries to read a bin file with the same name as xml and if the bin + file with the same name was not found, loads IR without weights. For ONNX format (*.onnx): weights parameter is not used. For PDPD format (*.pdmodel) weights parameter is not used. :type weights: str @@ -255,9 +255,8 @@ void regclass_Core(py::module m) { :param model: A string with model in IR / ONNX / PDPD format. :type model: str :param weights: A path to a data file For IR format (*.bin): if path is empty, - will try to read bin file with the same name as xml and if bin - file with the same name was not found, will load IR without weights. - For ONNX format (*.onnx): weights parameter is not used. + it tries to read a bin file with the same name as xml and if the bin + file with the same name was not found, loads IR without weights. For ONNX format (*.onnx): weights parameter is not used. For PDPD format (*.pdmodel) weights parameter is not used. :type weights: str :return: A model. @@ -280,10 +279,10 @@ void regclass_Core(py::module m) { R"( Imports a compiled model from a previously exported one. - :param model_stream: Input stream containing a model previously exported using export_model method. + :param model_stream: Input stream, containing a model previously exported, using export_model method. :type model_stream: bytes - :param device_name: Name of device to import compiled model for. - Note, if device_name device was not used to compile the original mode, an exception is thrown. + :param device_name: Name of device to which compiled model is imported. + Note: if device_name is not used to compile the original model, an exception is thrown. :type device_name: str :param properties: Optional map of pairs: (property name, property value) relevant only for this load operation. :type properties: dict, optional @@ -332,10 +331,10 @@ void regclass_Core(py::module m) { Python library `io`. - :param model_stream: Input stream containing a model previously exported using export_model method. + :param model_stream: Input stream, containing a model previously exported, using export_model method. :type model_stream: io.BytesIO - :param device_name: Name of device to import compiled model for. - Note, if device_name device was not used to compile the original mode, an exception is thrown. + :param device_name: Name of device to which compiled model is imported. + Note: if device_name is not used to compile the original model, an exception is thrown. :type device_name: str :param properties: Optional map of pairs: (property name, property value) relevant only for this load operation. :type properties: dict, optional diff --git a/src/bindings/python/src/pyopenvino/core/infer_request.cpp b/src/bindings/python/src/pyopenvino/core/infer_request.cpp index c9aac19a9e1..8d70c7bd23c 100644 --- a/src/bindings/python/src/pyopenvino/core/infer_request.cpp +++ b/src/bindings/python/src/pyopenvino/core/infer_request.cpp @@ -51,8 +51,8 @@ void regclass_InferRequest(py::module m) { py::arg("tensors"), R"( Sets batch of tensors for input data to infer by tensor name. - Model input shall have batch dimension and number of tensors shall - match with batch size. Current version supports set tensors to model inputs only. + Model input needs to have batch dimension and the number of tensors needs to be + matched with batch size. Current version supports set tensors to model inputs only. In case if `tensor_name` is associated with output (or any other non-input node), an exception will be thrown. @@ -60,7 +60,7 @@ void regclass_InferRequest(py::module m) { :type tensor_name: str :param tensors: Input tensors for batched infer request. The type of each tensor must match the model input element type and shape (except batch dimension). - Total size of tensors shall match with input's size. + Total size of tensors needs to match with input's size. :type tensors: List[openvino.runtime.Tensor] )"); @@ -73,8 +73,8 @@ void regclass_InferRequest(py::module m) { py::arg("tensors"), R"( Sets batch of tensors for input data to infer by tensor name. - Model input shall have batch dimension and number of tensors shall - match with batch size. Current version supports set tensors to model inputs only. + Model input needs to have batch dimension and the number of tensors needs to be + matched with batch size. Current version supports set tensors to model inputs only. In case if `port` is associated with output (or any other non-input node), an exception will be thrown. @@ -83,7 +83,7 @@ void regclass_InferRequest(py::module m) { :type port: openvino.runtime.ConstOutput :param tensors: Input tensors for batched infer request. The type of each tensor must match the model input element type and shape (except batch dimension). - Total size of tensors shall match with input's size. + Total size of tensors needs to match with input's size. :type tensors: List[openvino.runtime.Tensor] :rtype: None )"); @@ -130,12 +130,12 @@ void regclass_InferRequest(py::module m) { py::arg("tensors"), R"( Sets batch of tensors for single input data. - Model input shall have batch dimension and number of `tensors` - shall match with batch size. + Model input needs to have batch dimension and the number of `tensors` + needs to match with batch size. :param tensors: Input tensors for batched infer request. The type of each tensor must match the model input element type and shape (except batch dimension). - Total size of tensors shall match with input's size. + Total size of tensors needs to match with input's size. :type tensors: List[openvino.runtime.Tensor] )"); @@ -148,14 +148,14 @@ void regclass_InferRequest(py::module m) { py::arg("tensors"), R"( Sets batch of tensors for single input data to infer by index. - Model input shall have batch dimension and number of `tensors` - shall match with batch size. + Model input needs to have batch dimension and the number of `tensors` + needs to match with batch size. :param idx: Index of input tensor. :type idx: int :param tensors: Input tensors for batched infer request. The type of each tensor must match the model input element type and shape (except batch dimension). - Total size of tensors shall match with input's size. + Total size of tensors needs to match with input's size. )"); cls.def( @@ -513,8 +513,8 @@ void regclass_InferRequest(py::module m) { return self._request.get_profiling_info(); }, R"( - Queries performance measures per layer to get feedback of what - is the most time consuming operation, not all plugins provide + Queries performance is measured per layer to get feedback on what + is the most time-consuming operation, not all plugins provide meaningful data. :return: List of profiling information for operations in model. @@ -616,7 +616,7 @@ void regclass_InferRequest(py::module m) { return self._request.get_profiling_info(); }, R"( - Performance measures per layer to get feedback of what is the most time consuming operation. + Performance is measured per layer to get feedback on the most time-consuming operation. Not all plugins provide meaningful data! :return: Inference time. diff --git a/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp b/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp index 319f1ad2705..3495033cf26 100644 --- a/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp +++ b/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp @@ -146,7 +146,7 @@ void regmodule_offline_transformations(py::module m) { py::arg("weights_path"), py::arg("version") = "UNSPECIFIED", R"( - Serialize given model into IR. The generated .xml and .bin files will be save + Serialize given model into IR. The generated .xml and .bin files will be saved into provided paths. :param model: model which will be converted to IR representation diff --git a/src/bindings/python/src/pyopenvino/core/tensor.cpp b/src/bindings/python/src/pyopenvino/core/tensor.cpp index bedf9b2186d..7d3d77098f4 100644 --- a/src/bindings/python/src/pyopenvino/core/tensor.cpp +++ b/src/bindings/python/src/pyopenvino/core/tensor.cpp @@ -26,10 +26,10 @@ void regclass_Tensor(py::module m) { :param array: Array to create tensor from. :type array: numpy.array - :param shared_memory: If `True` this Tensor memory is being shared with a host, + :param shared_memory: If `True`, this Tensor memory is being shared with a host, that means the responsibility of keeping host memory is on the side of a user. Any action performed on the host - memory will be reflected on this Tensor's memory! + memory is reflected on this Tensor's memory! If `False`, data is being copied to this Tensor. Requires data to be C_CONTIGUOUS if `True`. :type shared_memory: bool @@ -43,8 +43,8 @@ void regclass_Tensor(py::module m) { R"( Another Tensor's special constructor. - It take an array or slice of it and shape that will be - selected starting from the first element of given array/slice. + It takes an array or slice of it, and shape that will be + selected, starting from the first element of the given array/slice. Please use it only in advanced cases if necessary! :param array: Underlaying methods will retrieve pointer on first element diff --git a/src/bindings/python/src/pyopenvino/graph/model.cpp b/src/bindings/python/src/pyopenvino/graph/model.cpp index d4a17bd0686..b2e3af2811a 100644 --- a/src/bindings/python/src/pyopenvino/graph/model.cpp +++ b/src/bindings/python/src/pyopenvino/graph/model.cpp @@ -631,7 +631,7 @@ void regclass_graph_Model(py::module m) { Return -1 if parameter not matched. - :param parameter: Parameter which index is to be found. + :param parameter: Parameter, which index is to be found. :type parameter: op.Parameter :return: Index for parameter :rtype: int diff --git a/src/bindings/python/src/pyopenvino/graph/node_output.hpp b/src/bindings/python/src/pyopenvino/graph/node_output.hpp index 30305555040..b891a828aa1 100644 --- a/src/bindings/python/src/pyopenvino/graph/node_output.hpp +++ b/src/bindings/python/src/pyopenvino/graph/node_output.hpp @@ -101,7 +101,7 @@ void regclass_graph_Output(py::module m, std::string typestring) output.def("get_target_inputs", &ov::Output::get_target_inputs, R"( - A set containing handles for all inputs targeted by the output + A set containing handles for all inputs, targeted by the output, referenced by this output handle. :return: Set of Inputs. diff --git a/src/bindings/python/src/pyopenvino/graph/util.cpp b/src/bindings/python/src/pyopenvino/graph/util.cpp index 5bfdf7f6a16..497cbe52fde 100644 --- a/src/bindings/python/src/pyopenvino/graph/util.cpp +++ b/src/bindings/python/src/pyopenvino/graph/util.cpp @@ -27,7 +27,7 @@ void regmodule_graph_util(py::module m) { :param index: Output node. :type index: openvino.runtime.Output :return: If it succeeded to calculate both bounds and - they are the same returns Constant operation + they are the same, returns Constant operation from the resulting bound, otherwise Null. :rtype: openvino.runtime.op.Constant or openvino.runtime.Node )"); From e9e59cb9547614aaa102f602ec9f955169c8eaa3 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 25 Feb 2022 15:47:21 +0300 Subject: [PATCH 110/310] Moved ngraphConfig.cmake to root (#10618) --- CMakeLists.txt | 4 +++- .../developer_package/download/download_and_extract.cmake | 3 --- .../InferenceEngineDeveloperPackageConfig.cmake.in | 2 +- src/core/CMakeLists.txt | 8 ++++---- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a3ae0134db..fe62c631885 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,9 @@ endif() message (STATUS "PROJECT ............................... " ${PROJECT_NAME}) message (STATUS "CMAKE_VERSION ......................... " ${CMAKE_VERSION}) message (STATUS "CMAKE_BINARY_DIR ...................... " ${CMAKE_BINARY_DIR}) +message (STATUS "CMAKE_SOURCE_DIR ...................... " ${CMAKE_SOURCE_DIR}) message (STATUS "OpenVINO_SOURCE_DIR ................... " ${OpenVINO_SOURCE_DIR}) +message (STATUS "OpenVINO_BINARY_DIR ................... " ${OpenVINO_BINARY_DIR}) message (STATUS "CMAKE_GENERATOR ....................... " ${CMAKE_GENERATOR}) message (STATUS "CMAKE_C_COMPILER_ID ................... " ${CMAKE_C_COMPILER_ID}) message (STATUS "CMAKE_CXX_COMPILER_ID ................. " ${CMAKE_CXX_COMPILER_ID}) @@ -42,7 +44,7 @@ message (STATUS "CMAKE_BUILD_TYPE ...................... " ${CMAKE_BUILD_TYPE}) message (STATUS "CMAKE_TOOLCHAIN_FILE .................. " ${CMAKE_TOOLCHAIN_FILE}) # remove file with exported developer targets to force its regeneration -file(REMOVE "${CMAKE_BINARY_DIR}/ngraph/ngraphTargets.cmake") +file(REMOVE "${CMAKE_BINARY_DIR}/ngraphTargets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/InferenceEngineTargets.cmake") file(REMOVE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") foreach(component IN LISTS openvino_export_components) diff --git a/cmake/developer_package/download/download_and_extract.cmake b/cmake/developer_package/download/download_and_extract.cmake index a9b71ace064..01662589e2c 100644 --- a/cmake/developer_package/download/download_and_extract.cmake +++ b/cmake/developer_package/download/download_and_extract.cmake @@ -146,8 +146,6 @@ function (DownloadOrExtractInternal URL archive_path unpacked_path folder fattal endfunction(DownloadOrExtractInternal) -file(REMOVE ${CMAKE_BINARY_DIR}/dependencies_64.txt) - function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked_path result_path folder fattal resultExt use_alternatives sha256 files_to_extract) set (archive_path ${TEMP}/download/${archive_name}) set (status "ON") @@ -164,7 +162,6 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked if (${use_alternatives}) set(DEP_INFO "${component}=${URL}") debug_message (STATUS "DEPENDENCY_URL: ${DEP_INFO}") - file(APPEND ${CMAKE_BINARY_DIR}/dependencies_64.txt "${DEP_INFO}\n") endif() debug_message ("checking that unpacked directory exist: ${unpacked_path}") diff --git a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in index 5a895708354..bcec89b34a0 100644 --- a/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in +++ b/cmake/templates/InferenceEngineDeveloperPackageConfig.cmake.in @@ -44,7 +44,7 @@ find_dependency(InferenceEngine NO_DEFAULT_PATH) find_dependency(ngraph - PATHS "${CMAKE_CURRENT_LIST_DIR}/src/core" + PATHS "${CMAKE_CURRENT_LIST_DIR}" NO_CMAKE_FIND_ROOT_PATH NO_DEFAULT_PATH) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 6301fa70523..cc64b94ca32 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -121,14 +121,14 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ PATTERN "*.h") configure_package_config_file(${OpenVINO_SOURCE_DIR}/cmake/templates/ngraphConfig.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/ngraphConfig.cmake + ${CMAKE_BINARY_DIR}/ngraphConfig.cmake INSTALL_DESTINATION cmake) -write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/ngraphConfigVersion.cmake +write_basic_package_version_file(${CMAKE_BINARY_DIR}/ngraphConfigVersion.cmake VERSION ${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERSION_PATCH} COMPATIBILITY SameMajorVersion) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ngraphConfig.cmake - ${CMAKE_CURRENT_BINARY_DIR}/ngraphConfigVersion.cmake +install(FILES ${CMAKE_BINARY_DIR}/ngraphConfig.cmake + ${CMAKE_BINARY_DIR}/ngraphConfigVersion.cmake DESTINATION "runtime/cmake" COMPONENT core_dev) From 94cbbe063b6bb10dabd5a438134cb490f2104070 Mon Sep 17 00:00:00 2001 From: Ilya Znamenskiy Date: Fri, 25 Feb 2022 15:48:17 +0300 Subject: [PATCH 111/310] [GPU] Cum sum int32/64 support (#10629) --- .../intel_gpu/src/graph/impls/ocl/cum_sum.cpp | 6 + .../cum_sum/cum_sum_kernel_base.cpp | 4 + .../cum_sum/cum_sum_kernel_partial_sum.cpp | 20 ++ .../cum_sum/cum_sum_kernel_partial_sum.h | 2 + .../tests/test_cases/cum_sum_gpu_test.cpp | 305 +++++++++--------- 5 files changed, 176 insertions(+), 161 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp index b4f98213e41..3a6b08e365f 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/cum_sum.cpp @@ -72,6 +72,12 @@ namespace detail { attach_cum_sum_impl::attach_cum_sum_impl() { implementation_map::add(impl_types::ocl, cum_sum_impl::create, { + std::make_tuple(data_types::i32, format::bfyx), + std::make_tuple(data_types::i32, format::bfzyx), + std::make_tuple(data_types::i32, format::bfwzyx), + std::make_tuple(data_types::i64, format::bfyx), + std::make_tuple(data_types::i64, format::bfzyx), + std::make_tuple(data_types::i64, format::bfwzyx), std::make_tuple(data_types::f16, format::bfyx), std::make_tuple(data_types::f16, format::bfzyx), std::make_tuple(data_types::f16, format::bfwzyx), diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp index 7bd6402fb17..6268dd691bc 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_base.cpp @@ -42,8 +42,12 @@ ParamsKey CumSumKernelBase::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT32); + k.EnableInputDataType(Datatype::INT64); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT32); + k.EnableOutputDataType(Datatype::INT64); k.EnableInputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp index d61f244cfe4..e046bfebbf6 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.cpp @@ -13,6 +13,26 @@ namespace kernel_selector { static constexpr size_t simd = 16; static constexpr size_t BLOCK_SIZE = 16; +ParamsKey CumSumKernelPartialSum::GetSupportedKey() const { + ParamsKey k; + k.EnableInputDataType(Datatype::F16); + k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::F16); + k.EnableOutputDataType(Datatype::F32); + + k.EnableInputLayout(DataLayout::bfyx); + k.EnableInputLayout(DataLayout::bfzyx); + k.EnableInputLayout(DataLayout::bfwzyx); + k.EnableOutputLayout(DataLayout::bfyx); + k.EnableOutputLayout(DataLayout::bfzyx); + k.EnableOutputLayout(DataLayout::bfwzyx); + + k.EnableTensorOffset(); + k.EnableTensorPitches(); + k.EnableBatching(); + return k; +} + JitConstants CumSumKernelPartialSum::GetJitConstants(const cum_sum_params& params, DispatchData dispatchData) const { auto jits = CumSumKernelBase::GetJitConstants(params, dispatchData); diff --git a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h index 26277d9d9ba..1734e10c0cb 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h +++ b/src/plugins/intel_gpu/src/kernel_selector/core/actual_kernels/cum_sum/cum_sum_kernel_partial_sum.h @@ -11,6 +11,8 @@ class CumSumKernelPartialSum : public CumSumKernelBase { public: CumSumKernelPartialSum() : CumSumKernelBase("cum_sum_partial_sum") {} virtual ~CumSumKernelPartialSum() = default; + + ParamsKey GetSupportedKey() const override; protected: struct MultiDispatchData { DispatchData stage_1; diff --git a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp index 0fda0dda745..ed4f0c8b962 100644 --- a/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/test_cases/cum_sum_gpu_test.cpp @@ -26,6 +26,7 @@ static std::vector cumsum(const std::vector& input, std::vector output(input.size()); int dimNum = 0; std::vector reordered_shape = shape; + if (format == format::bfwzyx) { dimNum = 6; } else if (format == format::bfzyx) { @@ -39,6 +40,7 @@ static std::vector cumsum(const std::vector& input, reordered_shape[i] = shape[i + 2]; } } + std::vector sizeDim(dimNum); sizeDim[dimNum - 1] = 1; for (size_t i = dimNum - 1, mult = 1; i > 0; --i) { @@ -55,6 +57,7 @@ static std::vector cumsum(const std::vector& input, } return fullInd; }; + auto getIndex = [&sizeDim](std::vector fullInd) { size_t index = 0; for (size_t i = 0; i < fullInd.size(); ++i) { @@ -69,22 +72,23 @@ static std::vector cumsum(const std::vector& input, int stopInd = fullInd[axis] + 1; if (reverse) { stopInd = reordered_shape[axis]; - if (exclusive) + if (exclusive) { ++fullInd[axis]; - } - else { + } + } else { fullInd[axis] = 0; if (exclusive) { --stopInd; } } - float res = 0.f; + T res = (T)0; for (; fullInd[axis] < stopInd; ++fullInd[axis]) { auto ind = getIndex(fullInd); res += input[ind]; } - output[i] = res; + + output[i] = (T)res; } return output; } @@ -98,16 +102,6 @@ static std::vector vectorCast(const std::vector& vec) { return ret; } -template -static std::vector generateVector(size_t sz) { - std::vector vec(sz); - T n = 0; - std::generate(vec.begin(), vec.end(), [&n]() { - return n++; - }); - return vec; -} - static cldnn::cum_sum::cum_sum_axis getCumSumAxis(int axis, unsigned sz) { unsigned cldnn_axis = axis; if (axis >= 2) { @@ -133,6 +127,31 @@ static cldnn::cum_sum::cum_sum_axis getCumSumAxis(int axis, unsigned sz) { } } +#define CASE_CUM_SUM_AXIS_0 ::testing::Values(5), ::testing::Values(1), ::testing::Values(1), \ + ::testing::Values(1), ::testing::Values(1), ::testing::Values(1), \ + ::testing::Values(format::bfyx), ::testing::ValuesIn(axes[0]), \ + ::testing::ValuesIn(variants), ::testing::ValuesIn(variants) +#define CASE_CUM_SUM_AXIS_1 ::testing::Values(2), ::testing::Values(5), ::testing::Values(1), \ + ::testing::Values(1), ::testing::Values(1), ::testing::Values(1), \ + ::testing::Values(format::bfyx), ::testing::ValuesIn(axes[1]), \ + ::testing::ValuesIn(variants), ::testing::ValuesIn(variants) +#define CASE_CUM_SUM_AXIS_2 ::testing::Values(5), ::testing::Values(5), ::testing::Values(1), \ + ::testing::Values(1), ::testing::Values(5), ::testing::Values(1), \ + ::testing::Values(format::bfyx), ::testing::ValuesIn(axes[2]), \ + ::testing::ValuesIn(variants), ::testing::ValuesIn(variants) +#define CASE_CUM_SUM_AXIS_3 ::testing::Values(5), ::testing::Values(5), ::testing::Values(1), \ + ::testing::Values(1), ::testing::Values(5), ::testing::Values(5), \ + ::testing::Values(format::bfyx), ::testing::ValuesIn(axes[3]), \ + ::testing::ValuesIn(variants), ::testing::ValuesIn(variants) +#define CASE_CUM_SUM_AXIS_4 ::testing::Values(5), ::testing::Values(5), ::testing::Values(1), \ + ::testing::Values(5), ::testing::Values(5), ::testing::Values(5), \ + ::testing::Values(format::bfzyx), ::testing::ValuesIn(axes[4]), \ + ::testing::ValuesIn(variants), ::testing::ValuesIn(variants) +#define CASE_CUM_SUM_AXIS_5 ::testing::Values(5), ::testing::Values(5), ::testing::Values(5), \ + ::testing::Values(5), ::testing::Values(5), ::testing::Values(5), \ + ::testing::Values(format::bfwzyx), ::testing::ValuesIn(axes[5]), \ + ::testing::ValuesIn(variants), ::testing::ValuesIn(variants) + using cum_sum_test_params = std::tuple; // reverse -class cum_sum_gpu : public ::testing::TestWithParam {}; -TEST_P(cum_sum_gpu, basic_test) { - auto p = GetParam(); - auto& engine = get_test_engine(); +template +class cum_sum_gpu : public ::testing::TestWithParam { +public: - auto b = std::get<0>(p); - auto f = std::get<1>(p); - auto w = std::get<2>(p); - auto z = std::get<3>(p); - auto y = std::get<4>(p); - auto x = std::get<5>(p); - tensor shape = tensor{batch(b), feature(f), spatial(x, y, z, w)}; - auto in_out_format = std::get<6>(p); - auto axis = std::get<7>(p); - auto exclusive = std::get<8>(p); - auto reverse = std::get<9>(p); - auto size = 4; - if (in_out_format == format::bfwzyx) - size = 6; - else if (in_out_format == format::bfzyx) - size = 5; - - auto input = engine.allocate_memory({ data_types::f32, in_out_format, shape }); - const int inputSize = b * f * w * z * y * x; - auto inputVals = generateVector(inputSize); - - set_values(input, inputVals); - - topology topology; - topology.add(input_layout("Input0", input->get_layout())); - topology.add(cum_sum("cum_sum", "Input0", getCumSumAxis(axis, size), exclusive, reverse)); - - network network(engine, topology); - - network.set_input_data("Input0", input); - - auto outputs = network.execute(); - - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "cum_sum"); - - auto output = outputs.at("cum_sum").get_memory(); - cldnn::mem_lock output_ptr(output, get_test_stream()); - - auto answers = cumsum(inputVals, in_out_format, { b, f, w, z, y, x }, axis, exclusive, reverse); - ASSERT_EQ(output_ptr.size(), answers.size()); - for (size_t i = 0; i < answers.size(); ++i) - { - EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + data_types get_alloc_data_type(void) { + if (std::is_same::value) + return data_types::f32; + else if (std::is_same::value) + return data_types::f16; + else if (std::is_same::value) + return data_types::i32; + else if (std::is_same::value) + return data_types::i64; + else + throw std::runtime_error("Unsupported cum sum data type in cum_sum_gpu_test.cpp"); } -} + + void execute(cum_sum_params& p) { + auto& engine = get_test_engine(); + + auto b = std::get<0>(p); + auto f = std::get<1>(p); + auto w = std::get<2>(p); + auto z = std::get<3>(p); + auto y = std::get<4>(p); + auto x = std::get<5>(p); + tensor shape = tensor{ batch(b), feature(f), spatial(x, y, z, w) }; + + auto in_out_format = std::get<6>(p); + auto axis = std::get<7>(p); + auto exclusive = std::get<8>(p); + auto reverse = std::get<9>(p); + + auto size = 4; + if (in_out_format == format::bfzyx) + size = 5; + else if (in_out_format == format::bfwzyx) + size = 6; + + auto input = engine.allocate_memory({ get_alloc_data_type(), in_out_format, shape }); + const int inputSize = b * f * w * z * y * x; + VF inputVals = std::is_same::value ? + generate_random_1d(inputSize, -1, 1, 1) : + generate_random_1d(inputSize, -100, 100, 8); + + set_values(input, inputVals); + + topology topology; + topology.add(input_layout("Input0", input->get_layout())); + topology.add(cum_sum("cum_sum", "Input0", getCumSumAxis(axis, size), exclusive, reverse)); + + network network(engine, topology); + + network.set_input_data("Input0", input); + + auto outputs = network.execute(); + + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, "cum_sum"); + + auto output = outputs.at("cum_sum").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + auto answers = cumsum(inputVals, in_out_format, { b, f, w, z, y, x }, axis, exclusive, reverse); + ASSERT_EQ(output_ptr.size(), answers.size()); + for (size_t i = 0; i < answers.size(); ++i) { + EXPECT_TRUE(are_equal(answers[i], output_ptr[i])) << i; + } + } +}; + +class cum_sum_gpu_fp16 : public ::cum_sum_gpu {}; +class cum_sum_gpu_fp32 : public ::cum_sum_gpu {}; +class cum_sum_gpu_int32 : public ::cum_sum_gpu {}; +class cum_sum_gpu_int64 : public ::cum_sum_gpu {}; + +TEST_P(cum_sum_gpu_fp16, basic) { auto p = GetParam(); execute(p); } +TEST_P(cum_sum_gpu_fp32, basic) { auto p = GetParam(); execute(p); } +TEST_P(cum_sum_gpu_int32, basic) { auto p = GetParam(); execute(p); } +TEST_P(cum_sum_gpu_int64, basic) { auto p = GetParam(); execute(p); } namespace { std::vector> axes = { - {0}, - {0, 1}, - {0, 1, 2}, - {0, 1, 2, 3}, - {0, 1, 2, 3, 4}, - {0, 1, 2, 3, 4, 5}, + { 0 }, + { 0, 1 }, + { 0, 1, 2 }, + { 0, 1, 2, 3 }, + { 0, 1, 2, 3, 4 }, + { 0, 1, 2, 3, 4, 5 }, }; - std::vector variants = {false, true}; + std::vector variants = { false, true }; } -INSTANTIATE_TEST_SUITE_P( - axis_0, - cum_sum_gpu, - ::testing::Combine( - ::testing::Values(5), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(format::bfyx), - ::testing::ValuesIn(axes[0]), - ::testing::ValuesIn(variants), - ::testing::ValuesIn(variants) - )); -INSTANTIATE_TEST_SUITE_P( - axis_1, - cum_sum_gpu, - ::testing::Combine( - ::testing::Values(2), - ::testing::Values(5), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(format::bfyx), - ::testing::ValuesIn(axes[1]), - ::testing::ValuesIn(variants), - ::testing::ValuesIn(variants) - )); +INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_0)); +INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_0)); +INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_0)); +INSTANTIATE_TEST_SUITE_P(axis_0, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_0)); -INSTANTIATE_TEST_SUITE_P( - axis_2, - cum_sum_gpu, - ::testing::Combine( - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(5), - ::testing::Values(1), - ::testing::Values(format::bfyx), - ::testing::ValuesIn(axes[2]), - ::testing::ValuesIn(variants), - ::testing::ValuesIn(variants) - )); +INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_1)); +INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_1)); +INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_1)); +INSTANTIATE_TEST_SUITE_P(axis_1, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_1)); -INSTANTIATE_TEST_SUITE_P( - axis_3, - cum_sum_gpu, - ::testing::Combine( - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(format::bfyx), - ::testing::ValuesIn(axes[3]), - ::testing::ValuesIn(variants), - ::testing::ValuesIn(variants) - )); +INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_2)); +INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_2)); +INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_2)); +INSTANTIATE_TEST_SUITE_P(axis_2, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_2)); -INSTANTIATE_TEST_SUITE_P( - axis_4, - cum_sum_gpu, - ::testing::Combine( - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(1), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(format::bfzyx), - ::testing::ValuesIn(axes[4]), - ::testing::ValuesIn(variants), - ::testing::ValuesIn(variants) - )); +INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_3)); +INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_3)); +INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_3)); +INSTANTIATE_TEST_SUITE_P(axis_3, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_3)); -INSTANTIATE_TEST_SUITE_P( - axis_5, - cum_sum_gpu, - ::testing::Combine( - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(5), - ::testing::Values(format::bfwzyx), - ::testing::ValuesIn(axes[5]), - ::testing::ValuesIn(variants), - ::testing::ValuesIn(variants) - )); +INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_4)); +INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_4)); +INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_4)); +INSTANTIATE_TEST_SUITE_P(axis_4, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_4)); + +INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_fp16, ::testing::Combine(CASE_CUM_SUM_AXIS_5)); +INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_fp32, ::testing::Combine(CASE_CUM_SUM_AXIS_5)); +INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_int32, ::testing::Combine(CASE_CUM_SUM_AXIS_5)); +INSTANTIATE_TEST_SUITE_P(axis_5, cum_sum_gpu_int64, ::testing::Combine(CASE_CUM_SUM_AXIS_5)); // FIXME: This test fails on some driver versions. Looks like UB in impl or driver issue TEST(cum_sum_gpu_f16, DISABLED_basic_1d) { From 18ff8afe639de70aad164efdb69a91200f58d35c Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Fri, 25 Feb 2022 16:11:16 +0300 Subject: [PATCH 112/310] [IE TESTS] Avoid extra checks for test skipping (#10609) Avoid double iteration over skip patterns Skip test after first pattern match --- .../shared_test_classes/src/base/ov_subgraph.cpp | 14 +++++++++----- .../src/skip_tests_config.cpp | 10 +++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 3d5de5ec2d1..e3313db12b8 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -46,12 +46,16 @@ void SubgraphBaseTest::run() { #else if (sigsetjmp(CommonTestUtils::env, 1) == 0) { #endif - LayerTestsUtils::PassRate::Statuses status = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled() - ? LayerTestsUtils::PassRate::Statuses::SKIPPED - : LayerTestsUtils::PassRate::Statuses::CRASHED; + bool isCurrentTestDisabled = FuncTestUtils::SkipTestsConfig::currentTestIsDisabled(); + + LayerTestsUtils::PassRate::Statuses status = isCurrentTestDisabled ? + LayerTestsUtils::PassRate::Statuses::SKIPPED : + LayerTestsUtils::PassRate::Statuses::CRASHED; summary.setDeviceName(targetDevice); summary.updateOPsStats(function, status); - SKIP_IF_CURRENT_TEST_IS_DISABLED(); + + if (isCurrentTestDisabled) + GTEST_SKIP() << "Disabled test due to configuration" << std::endl; ASSERT_FALSE(targetStaticShapes.empty()) << "Target Static Shape is empty!!!"; std::string errorMessage; @@ -68,7 +72,7 @@ void SubgraphBaseTest::run() { generate_inputs(targetStaticShapeVec); } catch (const std::exception& ex) { throw std::runtime_error("Incorrect target static shape: " + - CommonTestUtils::vec2str(targetStaticShapeVec) + " " + ex.what()); + CommonTestUtils::vec2str(targetStaticShapeVec) + " " + ex.what()); } infer(); validate(); diff --git a/src/tests/ie_test_utils/functional_test_utils/src/skip_tests_config.cpp b/src/tests/ie_test_utils/functional_test_utils/src/skip_tests_config.cpp index 56e90a7ce97..0eac0439e96 100644 --- a/src/tests/ie_test_utils/functional_test_utils/src/skip_tests_config.cpp +++ b/src/tests/ie_test_utils/functional_test_utils/src/skip_tests_config.cpp @@ -14,15 +14,19 @@ namespace SkipTestsConfig { bool disable_tests_skipping = false; bool currentTestIsDisabled() { - bool skip_test = false; + if (disable_tests_skipping) + return false; + const auto fullName = ::testing::UnitTest::GetInstance()->current_test_info()->test_case_name() + std::string(".") + ::testing::UnitTest::GetInstance()->current_test_info()->name(); + for (const auto &pattern : disabledTestPatterns()) { std::regex re(pattern); if (std::regex_match(fullName, re)) - skip_test = true; + return true; } - return skip_test && !disable_tests_skipping; + + return false; } std::vector readSkipTestConfigFiles(const std::vector& filePaths) { From 7b58f931b58742a8f3c0593f932949eca5371942 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Fri, 25 Feb 2022 18:22:13 +0300 Subject: [PATCH 113/310] [DOCS] Add wb images for nightly docs fix (#10663) * add img * wb img for input * dataset added * add img * wb img for input * dataset added * ov_fix --- docs/_static/images/DL_WB_start.png | 3 +++ docs/_static/images/accuracy_yolov4.png | 3 +++ docs/_static/images/color_transformations.png | 3 +++ docs/_static/images/configure_input.png | 3 +++ docs/_static/images/convert_omz_to_IR.png | 3 +++ docs/_static/images/create_project.png | 3 +++ .../_static/images/create_project_selected.png | 3 +++ docs/_static/images/custom_dataset.png | 3 +++ docs/_static/images/dataset.png | 3 +++ docs/_static/images/dataset_import.png | 3 +++ docs/_static/images/dataset_imported.png | 3 +++ docs/_static/images/dataset_selection.png | 3 +++ .../images/generate_datasets_closeup.png | 3 +++ docs/_static/images/github.png | 3 +++ docs/_static/images/horizontal_flip.png | 3 +++ .../_static/images/horizontal_flip_closeup.png | 3 +++ .../images/import_annotated_dataset.png | 3 +++ docs/_static/images/import_dataset_page.png | 3 +++ docs/_static/images/import_image_dataset.png | 3 +++ docs/_static/images/import_model.png | 3 +++ docs/_static/images/import_tokenizer.png | 3 +++ docs/_static/images/import_yolov4.png | 3 +++ docs/_static/images/input_name.png | 3 +++ docs/_static/images/input_shape.png | 3 +++ docs/_static/images/inputs.png | 3 +++ docs/_static/images/model_analysis.png | 3 +++ docs/_static/images/model_analysis_details.png | 3 +++ docs/_static/images/noise_injection.png | 3 +++ docs/_static/images/omz_import.png | 3 +++ docs/_static/images/optimization_settings.png | 3 +++ .../_static/images/optimize_face_detection.png | 3 +++ docs/_static/images/pack.png | 3 +++ docs/_static/images/performance.png | 3 +++ docs/_static/images/performance_change.jpeg | Bin 0 -> 166310 bytes docs/_static/images/random_erase.png | 3 +++ docs/_static/images/rgb.png | 3 +++ docs/_static/images/scales.png | 3 +++ docs/_static/images/select_tokenizer.png | 3 +++ docs/_static/images/start_page_dl_wb.png | 3 +++ docs/_static/images/text_dataset.png | 3 +++ docs/_static/images/tokenizer_fill.png | 3 +++ docs/_static/images/vertical_flip.png | 3 +++ docs/_static/images/yolov4_imported.png | 3 +++ 43 files changed, 126 insertions(+) create mode 100644 docs/_static/images/DL_WB_start.png create mode 100644 docs/_static/images/accuracy_yolov4.png create mode 100644 docs/_static/images/color_transformations.png create mode 100644 docs/_static/images/configure_input.png create mode 100644 docs/_static/images/convert_omz_to_IR.png create mode 100644 docs/_static/images/create_project.png create mode 100644 docs/_static/images/create_project_selected.png create mode 100644 docs/_static/images/custom_dataset.png create mode 100644 docs/_static/images/dataset.png create mode 100644 docs/_static/images/dataset_import.png create mode 100644 docs/_static/images/dataset_imported.png create mode 100644 docs/_static/images/dataset_selection.png create mode 100644 docs/_static/images/generate_datasets_closeup.png create mode 100644 docs/_static/images/github.png create mode 100644 docs/_static/images/horizontal_flip.png create mode 100644 docs/_static/images/horizontal_flip_closeup.png create mode 100644 docs/_static/images/import_annotated_dataset.png create mode 100644 docs/_static/images/import_dataset_page.png create mode 100644 docs/_static/images/import_image_dataset.png create mode 100644 docs/_static/images/import_model.png create mode 100644 docs/_static/images/import_tokenizer.png create mode 100644 docs/_static/images/import_yolov4.png create mode 100644 docs/_static/images/input_name.png create mode 100644 docs/_static/images/input_shape.png create mode 100644 docs/_static/images/inputs.png create mode 100644 docs/_static/images/model_analysis.png create mode 100644 docs/_static/images/model_analysis_details.png create mode 100644 docs/_static/images/noise_injection.png create mode 100644 docs/_static/images/omz_import.png create mode 100644 docs/_static/images/optimization_settings.png create mode 100644 docs/_static/images/optimize_face_detection.png create mode 100644 docs/_static/images/pack.png create mode 100644 docs/_static/images/performance.png create mode 100644 docs/_static/images/performance_change.jpeg create mode 100644 docs/_static/images/random_erase.png create mode 100644 docs/_static/images/rgb.png create mode 100644 docs/_static/images/scales.png create mode 100644 docs/_static/images/select_tokenizer.png create mode 100644 docs/_static/images/start_page_dl_wb.png create mode 100644 docs/_static/images/text_dataset.png create mode 100644 docs/_static/images/tokenizer_fill.png create mode 100644 docs/_static/images/vertical_flip.png create mode 100644 docs/_static/images/yolov4_imported.png diff --git a/docs/_static/images/DL_WB_start.png b/docs/_static/images/DL_WB_start.png new file mode 100644 index 00000000000..322c4480219 --- /dev/null +++ b/docs/_static/images/DL_WB_start.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7dae1b0f7316d11d4d0459fc372d632e1b47edf0addcb90d26be7dc1492e21 +size 109620 diff --git a/docs/_static/images/accuracy_yolov4.png b/docs/_static/images/accuracy_yolov4.png new file mode 100644 index 00000000000..281395e2c0b --- /dev/null +++ b/docs/_static/images/accuracy_yolov4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44a115c712032a30e705771a0f1a49221090c340901d0497b26d208fed6cc8c +size 110224 diff --git a/docs/_static/images/color_transformations.png b/docs/_static/images/color_transformations.png new file mode 100644 index 00000000000..08efcf7b536 --- /dev/null +++ b/docs/_static/images/color_transformations.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e4d3d29a4cec7dfffa3c743336925ce565722f9950c8cd3f9c6b6bf75c38c53 +size 508773 diff --git a/docs/_static/images/configure_input.png b/docs/_static/images/configure_input.png new file mode 100644 index 00000000000..c6996203bbd --- /dev/null +++ b/docs/_static/images/configure_input.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75196ce75c33936fb8b06bc27e7328b23cf08f066a6f996f6a413fee42aff6d6 +size 28423 diff --git a/docs/_static/images/convert_omz_to_IR.png b/docs/_static/images/convert_omz_to_IR.png new file mode 100644 index 00000000000..963666d2ab6 --- /dev/null +++ b/docs/_static/images/convert_omz_to_IR.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0cd7ef06dc11c0f25409b2625869f6b725a8a004d9016a858b25c59dd01465 +size 19178 diff --git a/docs/_static/images/create_project.png b/docs/_static/images/create_project.png new file mode 100644 index 00000000000..9abf32f56da --- /dev/null +++ b/docs/_static/images/create_project.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6f29d1c04a9affba1ca4f065f6d403001033a58b0cb586f1e39dc3701034c0 +size 83105 diff --git a/docs/_static/images/create_project_selected.png b/docs/_static/images/create_project_selected.png new file mode 100644 index 00000000000..ebd1deb909c --- /dev/null +++ b/docs/_static/images/create_project_selected.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6895aa172437708eda816bb50acce4307c58da3b85b095c7f3e3c1fc9f24140a +size 121292 diff --git a/docs/_static/images/custom_dataset.png b/docs/_static/images/custom_dataset.png new file mode 100644 index 00000000000..5a11ef4c81f --- /dev/null +++ b/docs/_static/images/custom_dataset.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8bcf5b310b6d2d6cf4a14635f82491ec4fa8f1efbf6f0485b4968d3f01e36ee +size 540797 diff --git a/docs/_static/images/dataset.png b/docs/_static/images/dataset.png new file mode 100644 index 00000000000..960d4d032a7 --- /dev/null +++ b/docs/_static/images/dataset.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf1538ca8b64cbc243ab8e4a87b38a7eb071c2f19955fe881cd221807f485b7 +size 312545 diff --git a/docs/_static/images/dataset_import.png b/docs/_static/images/dataset_import.png new file mode 100644 index 00000000000..8495b48a5e8 --- /dev/null +++ b/docs/_static/images/dataset_import.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bea2db4a7618e4370398d29358bffaad8bb9fe61316012b38ba54d305615a77 +size 37413 diff --git a/docs/_static/images/dataset_imported.png b/docs/_static/images/dataset_imported.png new file mode 100644 index 00000000000..8187b1a8c1a --- /dev/null +++ b/docs/_static/images/dataset_imported.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974bdd3a7cfba238ec7647af349fbd5e291fb79f30f3ab337df274c338b6ee6e +size 20895 diff --git a/docs/_static/images/dataset_selection.png b/docs/_static/images/dataset_selection.png new file mode 100644 index 00000000000..64352e54679 --- /dev/null +++ b/docs/_static/images/dataset_selection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f551cf331e3dd23598f80511b6b4be27829a578eca9aa8a83e63f70c743822bb +size 25656 diff --git a/docs/_static/images/generate_datasets_closeup.png b/docs/_static/images/generate_datasets_closeup.png new file mode 100644 index 00000000000..051a6f3a43d --- /dev/null +++ b/docs/_static/images/generate_datasets_closeup.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bef9577a4ff23b7f102a314c90fffd167fab56b0f6a9492970e02de3a0de998 +size 399477 diff --git a/docs/_static/images/github.png b/docs/_static/images/github.png new file mode 100644 index 00000000000..4bf56a3d3e4 --- /dev/null +++ b/docs/_static/images/github.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2749d93dff16fc9062a0fa01fd694bf19385a0b4f3d0e409eb56f2648e3cfc +size 11929 diff --git a/docs/_static/images/horizontal_flip.png b/docs/_static/images/horizontal_flip.png new file mode 100644 index 00000000000..ff4dc034864 --- /dev/null +++ b/docs/_static/images/horizontal_flip.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f66d41137ef6cfd0fe4bd8f635814203bc963b546a20da9718b42f58b6fa4bd9 +size 168160 diff --git a/docs/_static/images/horizontal_flip_closeup.png b/docs/_static/images/horizontal_flip_closeup.png new file mode 100644 index 00000000000..1ba92fd8615 --- /dev/null +++ b/docs/_static/images/horizontal_flip_closeup.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57758f726846dc6f83dc14ed0f96622a7d527287330b6c6f8ac95cecfd30cbf +size 432513 diff --git a/docs/_static/images/import_annotated_dataset.png b/docs/_static/images/import_annotated_dataset.png new file mode 100644 index 00000000000..12da21ab981 --- /dev/null +++ b/docs/_static/images/import_annotated_dataset.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c33a7b19d2d21bab642372efcbeccdbcb6f8813d98cfe8fb9c03f801287474 +size 38600 diff --git a/docs/_static/images/import_dataset_page.png b/docs/_static/images/import_dataset_page.png new file mode 100644 index 00000000000..18eac942c2f --- /dev/null +++ b/docs/_static/images/import_dataset_page.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3994e201f3077a11da2870ae6d13242d0d0d353d35f837cdd7e01686a445859d +size 266463 diff --git a/docs/_static/images/import_image_dataset.png b/docs/_static/images/import_image_dataset.png new file mode 100644 index 00000000000..144a8dd4de7 --- /dev/null +++ b/docs/_static/images/import_image_dataset.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f055ebe36900ad24d4501d89f52e343f7c2b14a025e020b747c7272325d2c4a2 +size 10521 diff --git a/docs/_static/images/import_model.png b/docs/_static/images/import_model.png new file mode 100644 index 00000000000..34c266a1a69 --- /dev/null +++ b/docs/_static/images/import_model.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e628143bbd02dc43ccff24e7f436f8f22fcffb3a606fdf65e49c18097741949 +size 50825 diff --git a/docs/_static/images/import_tokenizer.png b/docs/_static/images/import_tokenizer.png new file mode 100644 index 00000000000..192c5a0823b --- /dev/null +++ b/docs/_static/images/import_tokenizer.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9b185567143692721ea68b0afd1c63d000df1d6b216c51567306d98bac5de8 +size 31986 diff --git a/docs/_static/images/import_yolov4.png b/docs/_static/images/import_yolov4.png new file mode 100644 index 00000000000..77c88ad2ac3 --- /dev/null +++ b/docs/_static/images/import_yolov4.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e10d4ec22ca3822faad9d0eea18661b3feec09611ae7dd19b64b8d636487cd +size 47182 diff --git a/docs/_static/images/input_name.png b/docs/_static/images/input_name.png new file mode 100644 index 00000000000..6ed2f94db7e --- /dev/null +++ b/docs/_static/images/input_name.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdb6eac41fb75ae1248c7103046fb51298f0d0c08b52ff22cc5f8c81b394741 +size 15202 diff --git a/docs/_static/images/input_shape.png b/docs/_static/images/input_shape.png new file mode 100644 index 00000000000..d407c3b17c8 --- /dev/null +++ b/docs/_static/images/input_shape.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af7835134b35136d8b50e2b22944a88f2bcfb3ff114afdfd052d52b4358212b3 +size 22970 diff --git a/docs/_static/images/inputs.png b/docs/_static/images/inputs.png new file mode 100644 index 00000000000..d4518d9de79 --- /dev/null +++ b/docs/_static/images/inputs.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f8bc12837e03b1a2c1386c2bac512c21b1fb073d990379079c317488e9ce1c +size 39015 diff --git a/docs/_static/images/model_analysis.png b/docs/_static/images/model_analysis.png new file mode 100644 index 00000000000..55f0323e314 --- /dev/null +++ b/docs/_static/images/model_analysis.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9640f8e9260a4ea796b7768de3368762719644319cfdad332fe1a8dc79946a4c +size 49674 diff --git a/docs/_static/images/model_analysis_details.png b/docs/_static/images/model_analysis_details.png new file mode 100644 index 00000000000..975b8714506 --- /dev/null +++ b/docs/_static/images/model_analysis_details.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02a91f6ae4a94f7a5bc08987b2bd5a239d13dd80ae485189f79a2ee30bc5528 +size 13546 diff --git a/docs/_static/images/noise_injection.png b/docs/_static/images/noise_injection.png new file mode 100644 index 00000000000..3011823cf99 --- /dev/null +++ b/docs/_static/images/noise_injection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd558b8355e04507aedc1b18ca8075e74d934d311320ea25942a646d47f9a21 +size 366753 diff --git a/docs/_static/images/omz_import.png b/docs/_static/images/omz_import.png new file mode 100644 index 00000000000..b66de979251 --- /dev/null +++ b/docs/_static/images/omz_import.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:349e96028d651807b887cf73a4be79eb2123e62c6abe2ed3ec4b419e306ce3d5 +size 56444 diff --git a/docs/_static/images/optimization_settings.png b/docs/_static/images/optimization_settings.png new file mode 100644 index 00000000000..91a3314121f --- /dev/null +++ b/docs/_static/images/optimization_settings.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c013e8d76cf87c461407132ef6eb59e57c14f8c4cf242f71ae617140bf4209 +size 120403 diff --git a/docs/_static/images/optimize_face_detection.png b/docs/_static/images/optimize_face_detection.png new file mode 100644 index 00000000000..d984d5f1e53 --- /dev/null +++ b/docs/_static/images/optimize_face_detection.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499c44eb2099f8f0de23800b6dd75395c110783eb80f92f1bb91034c7f93724d +size 132642 diff --git a/docs/_static/images/pack.png b/docs/_static/images/pack.png new file mode 100644 index 00000000000..6e9c713f394 --- /dev/null +++ b/docs/_static/images/pack.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b43730b70b6d83bffb6cfbe9982cf806b0ac7f8a58001ba74596cface2fe5d +size 92680 diff --git a/docs/_static/images/performance.png b/docs/_static/images/performance.png new file mode 100644 index 00000000000..e874daf3eb5 --- /dev/null +++ b/docs/_static/images/performance.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64c598656309f5156e291d93a2865537ebe2b87f62688b2d1fd2da35c63a471 +size 110383 diff --git a/docs/_static/images/performance_change.jpeg b/docs/_static/images/performance_change.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..71ba4eab00c9284d4bf5629e968a4f9a7dd5881d GIT binary patch literal 166310 zcmeFa2Ut`~wl=&;5(PmJ5D+D2lpq-dC3GV>XOs*!NCp8LQ4mQ=LxVKgCPxWP4o4+N z$XcuRU|`%sq3@-2cD-o%#Nm=bp_&ckjK6>Q$>&RlVz7YZusW*zdp?>5D^iP5Rp(26H`!-l9EzfrobJS z{~HG$5eW$yG1<=wE>n>GxBm1`--2zy`v3rVM0md*z`q@M_ymMRmxxJ7$;j~lf!O_Xt z)63h(*U$gi%U7XcufyL&z~bWJ35oBM5Fe0P**Up+`32<_m8h!fn%cV0Us_t*+B-VC z28V`6M#sL5qi5&l7Z#V6f2^$T?C$L!93CB?oKoXN#wR2sASC`NG9JD!?w5d?km%N( zOEii)#E;ysvWdJPp;d~_Ec-;tE~>jt_t;~A>>7vIEa%Qosecvv|DDVs|DS~Zv&?@M z8T$)BO+s*!kdXinCp|SD0W}^L4Nwr^;Z6vs0R>>2Y(qhr4B!DW@Pq%x+t+d|py!DN z%;~WJ_-2-a;QSgEsOsp?aij&b@$L};xVKATNf=b~T4+Beu3N1yAaEWFWV6dBW&Hd7 ze=GK{@^%T~Slu@#%=X}MnnPcXIQ8-k8_^Aew;c8=w}^eRJ>~tf1By3t>JEoW6P5#J znTln;KppGT?(=`(cpj)m*3RBctz=x-(HEiuQ(P?IcvxU(f+uqDb*?DaDB+HwX}v_9 zx-Wz-wFp)J4Be*iMgZc?qU7W?SH?&JlVpAJSl><;c5j|~6AtzJDv_|$(Z+DTOgkE> zXQ~^2Gyb3|y7<&IG<*%^ z;#yAQ-v3{>VBAgy1uZ3wuh43;hGMrLyh`#u>+!6J6W{(jl zeczw>yxXc{Y(la#?^73qYsi_8`i!@FWObvYc)!d@Z@VL8dg{8nuVdGqiKSVqK$=}m zp!vN4YgvykqJ>2s4D&{ad%Kt&H6eYzsm{FJZe^a%Y$vlR-|-xHNy$1qA?#rJO~Q=o zb?HwYZnCkCA1ALi+xE({cI^%%!3|942-rlwoVD9pccCZS#7gzhd#VZNV=-ffgGT{V z)2H=vbp5>28mnESOVjRR&W!VSB)xPE?u%?p7O3+O28(n_#~OJKE=R}L!qfUz+X|2E z{VK-~NF`Lo;Vs&eW*{bx*sLGDwCz5WqqdHR8-z5u91=taWr-+F_mB~=U_Q4O`dMk) zvXR4f@8;tdh!q=zXZGgXF77VO2*P-7a-zK_wyokeT7<)(<0c}_ZERaAjaRu|Re3|a z3De7U>g_tipHy*-zdB@`temE-ce$~3*s~xZ#_v%WD0V2e<%39V=eM&LoqAfy>Ii`c zb{Yl|(>jr6ViTa;6(~ZGJ9q=a9_4a9-ZW@3pE*r+eEGdn1(Dd~TL^&ck5Bk6qJ=f50%K#)H@P?>w6_FAz}iFtP3AZJbo1HMQ|vBn#E?9; znW(I&EtTIWjY1l4wk~yXD@}?z3Xn0lucQG*klK6p!+gtDPmfhdZX)o4C3jp7YFd0)Nk$& zEshzWHa%!=MMvBqgHZ-4wm0vItAfXL;wk=%B?K=Q`C|-mG3;~QOGB-nnO4axgMw3WjOg8|%yea4uw%t?WiwomYFuIS!Q0$=*pbK7lXk9tGkh zro7#y)k$m8ZIP(8C))&m-#;xS{nT_w-RD%((jw8VanF)xTmMpzU)&evO)n>gH&+&V zZMbMFB?w>!zL(2wqqBYQOul8;LeA_-^BM)t7`2#+8}?_7`C);?bJ~WkqJ8{eF|q!G zV%$oyX1(BL-0#H?mbTm8+#o_>0SP9z(8Rckl1Fc_%>5J|4taIUscH{%g=v6^5j-~K zG%w1!AWrgQth&VVR=~bwUOqQzSG0JqO|vx&@ZaKjoHEH);< z2Tl$=JuB&*NUW-AhM&Ic{>~KMAlAgar*d8Ua@A)>YX`@KptIV`eCy;D47aKe3sM?W zX0?1dk}+$rcqEipMKCv~&Me<+hb-G^cJNrQvh=wF9laRLR5IVCR23Byh6QH58_UgA zFJ%l)n|$L!a%kDxgtv5*=$n(j0iH6ET^FBLxpYYlwWSZ|*|lO(zCAD74x3kPPUusAx2>9MZq@qq)|}pxg28)w zz4B%uzg(L>HZ(+g=s(&J+z=bRw(r!bEVeOyWLh~|pW}9)wv6m$)RWcGP)2?%aCx|1 z%gtZ1ur%znBr)*y>jwTy**5aU`BPZH!PpGMjIT&qA1rW#gF_OZlQ)Kcud2^$OBEG?v7a%+Cf@ zC0`y{%FJU}-{{Q`aTJur0+q^s*&pEAlMnQA25UXvJyrW8CABL#gmBMW(~OL%179(a zsLF4hQAQ3KRg1aDWX1C;Z44bogUw?EHMQeS-Jj($ZB>l+{fN^!Hgox)?CqWJEJ%kc zN?3hMmjquhd*9G8j&yk+^-#)C$^2#CQ|F*;M5HAaK&A4#)@G9Qty<_s)jJNE#%VF~ z?P?=6kFu{f?bkj@1r0e*mO3y^pXQc3w~{_*O!7(&^3|+!DO68Jy3Q7#=d(riS(|kE z54!3eQ)EPE$GhHy2?^E?<%%KuHoUg%7lsYH=OWy^3l>LK^~)k|phey`t0d>a_tOQ% zb#*k+9y0+lS&P|43TDR^T*(|dCR5~Xae5FT#f1}l-X_E7aGK6q+vv#j`U-|_Z?dSy z!jK9bMBWy%JX=>gbrg|J&r$C)cvz;MP?UWwRr)Ee;&=!0Er_6u-uP5gvYZU`mAl2% zeK$&Eoz&K5k?fF5aL(W*_`<|t%zY8DN4^wP>F#HhNO@Na!Cs|oJKsUNOpx%IzlP?8 zJd@n0Zkr@uVs5d5%?J3_hQ37;3$uUGm|<~dEbHgOBSE}(f@tU1l@Dy`x2VW9g#xF> zch6DNv?`$c?>u)$l$-;L&+^YSU1p{Zouv4^1G3AX$&62~B=Au$!$zqyuMZ^d?TzRo znOq!t1uf8;E9>LQ#eO1vV_M02gTigObnwp;Q&bhnK2e>q%x{ux_t<=NGn@3%XWQIm zH;0$zhWYk*ssr|vc|A}Nxv?Ni1b1Os7GF17nr?P|m1mFhx$XLiZK>Usa-$K%4RwyG zk`DP>82z6BZc$d^;rxQ}MALgQhf7>)OBEr#4WA#Y4RvW%HpD!oF>V_PxohYtH61>9 zkGf)4w#l(?J8}f9J7hRD!>3*l(76F&sw^@8I9z01RXxPf5H`9DzS>SWcBOB$I>9$a z=sp#%rJAkL<9oJDNb92e_`!QsSRnLGyzOKGj|Ag5>RNmzPl;ufhFL$fVO3VNbo@Tc zbYjjl!{CtmFn6+!n(YI#$ihN8u4!ifd@#ZN_R}$>_GQ^3I+1G|1(e*Ub{{ z)g@J|oDr~5c(dW_yEsrL*6S)KGCit3Ze`#I*_x9sQ{Q5noUCNypeOq{-QYOT&D4A4 z62VhL*>_#!K0^>OED*+=Ut7z=N!~>@K24t}xOw6o;<}P`v^d;bztfuC;CbK86Tko3 zTVFTY(VUCs0J)LEEK<*B8}3}+=3|H95t5O0NapR0lauG61vC%|dIv)hXY)w4Hc)39 zsj~Cn>J+@^LZbDV-O9(ZQ6Y8HsTEqeGfe5+3`Zu z%zcnfivKcLe&3n*lVhAFpU8ew^`6p?rn_Y|!@{MT3)j}%iyX(UWE!481UBvZr-(0< zo8w7VRi}s<2uwNZ?`Xb?zeU22h8s8Ufcf{}ZN~v^&s;95W(?w7%vAGPMCK_xs@;2; zOz+aB#taG!&;Y8GqAQK91LTb{g80 zevv`F1=$oV;Y?tB<;48f@v&iyK~VA><&Oav-uP&wcy<-7Q(tsOCa;Hul1{v?^F}uR zDxxtpt~u$;+Vz7N)Evnci><}oyQpCN7L7wQuNaorxUAyq^fJ%x{L;ZZXlS_Vqm+_A z_|$MYu)iCj(bOok?g|c+auI2@Q$SqfH5YG5bRyT}2qU67VX`yuo3E>nYJZs={`lRy zY+mQbjTJ_Ss{DWgL3Hm*keKToUyR}C(XDDL^025k%r@FGDM^D`ACtHZ9^Z!xE|U22 zwZV8HpH+zlE<>qWayJeJ0-2m#1B8W+MmPKgnP9WBl83$y{XN}J!@7k%`WNhnOTIr> zyrojJcf>=^Bd^Ct@&)lGz>a4nmw$QiZgb-v>IAabR9(42ANnvcVwdDI>`9N7he2>v ztz=w4$GCrBX1(tWy53MC?o!rtW2PO3KW}C(W>nn{nut&)N&J#Ce^gFbb?qa=G*_*J zpaNQ8)T)w{;Q_PKn9Y^YD3$C)&l9^|_2+1`Z~($cD={%K!Eh0W7=%t|f`r|qQ*Oq# zKs`MnSiqN?#eZeZ(6<XCbbVV`R`Lu+(7uB>9HO zwritzsk7dvoSAOg>Lb^fgt&5Pv0XYlyQnZjLON@;n<13ETtz4|M(t9tSbnZ9X`Zs! zB2#WxLtmI03QBfNgp73)t6d%gmw!Z{K{taG2(S2vwyeLuxyX=13Er;s*vehCPL=*v zSYnc`+659Xlh?refe@-kvQl+s zthOC;x$O9rn|`4{;^UF%ElnyZc5(-+-0A10=JaJ0obSZ~aUNJ;m-}k8@hIFGd>I_? zgyQBH$?XqfJ5QYU7WF$)PgJcQ$gCd~n5znL@l~^~x--dM4;OjqSHsD`4^WEyat*ki z)Y|8YtZ3j>g|BCmjH6aJb|c19;-4Py^cgTaNlBOIW%mj>PEB!cx>nD445!nIImz~l zuIrX0pq~yUXtzues_jNuOR(BeR1-FR_njv!QxN2f^)@ida^U6t#Oar>h}n=p>O{F^ z1qlx8Z^L=(45Ws0NxM9?)S4Z!K!5{7en6+POAf*~(&#wXrAW~?N8_-6eJzsQF~gke zhnrx?`PqX{fp3hT?@<~DQ}!$v79N*WRvW(`eSJ(hB zqf((!Wqe5~z3oH3wU_TAtQqXhE2<75L=IbSWjJl?>uId&FA{TOntmQUsc(k z)2|S{lI@kyW*Hz{f=01@zHd;d)Yyn|$K`*!wAKI0dMXxsSs z>vi!QGwNUwkYrOqFBf91=}qpKq<*xK<;&q?t_fxPa-u%avN_N=^aQoc8jQUocaMWB>lVR5;=Oza&chbW4jPIo(pGa( zwp$Q4hjPi4@A1yOBa4Vb2>7fnCM#zFUIfp!D?W$$m`qi)P2 zA}ZOuL^P!Y76~#nG~C2IyPHlcD3&lZUv*|Nd2Mipe5*1`ZS3}R4=nB#LUE}O|GYeU z_UZ9sMGdiw{dj#{uwcEUrfx20fOA~*nI*`{zH^qv*iZ|u>5AZk?jI#vpo$dErrvEr;Qdw-vhs{cehg?>>g$<_h z&6lHN0z|;%KhLDm2IrM&i;FKE)s5E|H!L^LmvGn-6oasog@WN}+%)sOUBfn)MIj;+6%$WdVmo)} zJmWvHCV%H;i(0~1LK{hZ;@&Nf-J&SKp6pmPlI0esd5ZmfM0{~sI~ir1$E%XV3PZya z=k&C~vkwz~&hr+PyTvzT(#t(5K7AIB-$)42;Tnxlj*Tkg4=%afsrrr&A#Ck!AY}<^ zPxf|MO>8Mf8V;bU>%CAc0gi!U{SIO?wu%I1eX)>$$j|FA_1$IaiA1#oUJg5>2=O$A zMSE|4cjYrPkdQ+_;R=K4%CoNCDZ%6l=ML4KaSgE*pTm5b@HCQ{9tSo?3Aa5wK((?DlM{(RQy~3%MAiLDY-djw3)T#Z8-)qEgoAgh5 z=K4eaR_*qBso+Odnv(Vy?N$bS5^fjvOS&ROMN~Rn75IqSXs~+$3_gO=aBUV<&>7|$ zakXuzy_F5Gz6)qe0z?GC1%LrT2B1?F+Qvt!B*O+5`TYAXmzY5F@FW%(Bf@dv;k=C9 zey3Pqqg-?a@oPBbR|WlTc=6}3$v>$ge>YSRw1Wl6j6Pw3y;&UOF-kv=HE-Ce-GN%R zKov0RRG0?}v*2R|OrXN9^Bxv}@hk^@S^M@D7c}U6gatZYpYvJ~V}btH7!6D4t}^ph zO;7;!ITkn(P&m1QbL9BH<34ZW3K#Zc)R3u1RHcs<(y;*NYb?OVx}6=!iUs6ZPsIIk zC1^dHW)Id?abL3V5%+r^LtFY13oL)Z0$imECu}jklk$z>80dKd>v=fzGkhC*ehgh0 z!3n~l(|7iItco!TYueJRhdoJ zKic}&{R-p$R7$GCX!r$R89lQikrLc>j0N7t$lK|)Q8D~bLna;3kGWG$V z3jayv?ic^cb^sH8X-V^HQ=NjQsCj44&(E9ZOLEL=%d@Bj>pfM`P44IxIp(vV_xocf zYB=Kbuf|9FA6ERwi2Y-B`eP>kW7YW;PyDgM{(+qSK#6}K;y=*;AD+(tCa+FQH&6Ea z-^#zdl3IzkEYW;*znlCG#F8Jjt+rF9AL+!?3evSFCX5lmYuD!QOAt!om>VuhHWNbl z%D26xoZF$zw7mKOsY{K})T`44gAa2?)(%{;2QU^$lYkzzSe-wATQamETnpV*#1z>mEH6@>ljULo zKC0hVw;$w_?iAFGbFZlmY#6bC0uS`Nfx?M4^z-;&^K$ge#SSioloh#Fd8WQ;b^Z=o z$k2asavuxe=VO7cmzZmPa4aC}iUqd9n~yJ8|6R0~7shSS{w~&IIxUPo>vok_>B+n= z76??v0_{IeH{=J;5^=sg>uQ@Q=D9W&_@w~4n}K5#Dzc&dOwDI=!WaeBF{i`C$>x)r zSita3`rhtq43QodfD@k!rt)L_aH%0P)X<$%`FgF&rbQgVKzv#jg`)-H6}I`(&+`;! zpR8EzwQXUJBXOSXzyUfa`$!n0rtrT`T`US==N3+LP%+c2vJ)l=|ALn?NAs>@f%A|0 z+$R_}M!*d&GqK_>mWON4dk>k zc(uPpGyj%!{--(Po4%vgc_SWgR-QRVJz8CdI_2t95Ef{TGfE1>aJub0FPEQ}#sZ$* z3Ty3{`HW{IgFH*r=mC|S|9iJNLo7hQ#=a?9dL&zcX@G7@6IW5fak2{j%<=vgAuGoq5oTu@ z>0QD6{;<7I2)p7vPpS0^N$k z0 zBgV0_G+g}CQsIb&U|eO1LuiEjMF%%GaLCT0TKvr6OUyWT0dxjOs77WG*rws8&p=j; zQ1E5gv)qeq=y4rz7qpsw^a%^h{Q&;4CCL(1??^te^OOAHM|^S}*cHadk-bJbxUd2i zIMaQ$cYCI#7$ZEYI_Y4w>F;oX%cY$3qH?kS7TR1#dLD{v3H-b>NFVwOu4ia@fEnQe z++@uCYqJ8I3USR!|JkhF)6$FGpUsl!UTr?={Iyws$D;Se!pT{(DAj-%`TDoJ8RYSR zHlav|cBr}S!orzN^$lbrp#1;0{ro+x5;rbz`c%dAIO2ERJ)n>YhDrn8|My0?vO5N6 zuv2aej`-Jq|G^Wjd997$UfG+UkCWtZUMgHHI{VFvu`OTAV3ob+Q%n`pA2qIZJi^i;A^zokK z?iNeTSRI+2WCM49uPFX+E|TK1h)1~33eQ6U_5XVTH)?Ze0Td_Vbs+4gGP=Si4$(d7DeoobCe5&8OFvJ3K-k&zEJYuczWj^?X4aCqmn~UhQ*kr z!KDX&kdnD*3P*YU+Vmrdh)*$`!77*V#=|-Q@3-MXUTX{+1~wwe{8#PXee+3vLQxKU zuTH#W2uW7(`O++gS0;xkyjI8(uV<=GddcalX;1lmR}tEzrf(g#v2EYl-(?2-v|S}6 zPMvJ@=!@uj=Jn5bSY2#?{xzE(mzdv5Q9tE!%a#* zF5e?MIz{ehP~F%TJ`eR~J>9}3)^e4KUOorU%w5<9wRvNf6QE;nFWT=N`aNqt!^My8 zf%p4gbN!0jx2JSZxktp+mTN~wml)ZvxsS6b&gnvs*0MEp8y>bC@b`dc0+WBSv|uyA zw^yd25)4C&PJ2?j+&l6mgA-*wZY-;ZW{`(B?55zT`l&Wt?!#rVUDd|S7lOVU||Gxx#pO&=I*|g{~+4sSyQnuln{S(db-y@Ckupd z=Sb+}+froXCF$LJ^zP${%AkZux#4ZEm7yr6C69zdCw^XWWW~&qM-x?T(@frVdq=?HDyZ(j_=$lsQHY#svlGcI|s^ z2#u6(Zd9&0Zm`!lG{pSqTq@@`m9?&vF{a@#>M&Zk#t4lMGgQ~(R!Onp?d+Ofi!O%x z%UuZ8ouLLXWE1yV`Z0A(f7KK>iArpH!Uz!Ul{eKZ?CV%^F&?wTg zE+;ogupDAKWzzP7{!c8#c;hB~cE~j=Izrm4r>jfN zj_vNv5GleNip($XF{3IWnN_I*qUIfmJ#1-Gn_RVosQMlbl^XvWcMab%sE`=M7~N6> zigWy`&WvK^`x%W(T0%t7LC0oK>CjSkwNN4SER%CcteCM7zWPn3a6(i2iae7aRc_)f z!tU<%N6`>pPhNx0&*(1#ag&nmeRIYm^n|#_mHHw1+HdE%>fszhkN z5+n0F2zO!+E*$Y3fsFrRo+Cgsrac&4&op|(G+`-v^D$ins%o%1g{D5)iJjfOUGE;P ztn!@H{#jO8C863Q(Taalu_m44&Yg~%`e)YzvjmRFen}mV_F)cos;UnXs1^oZ2tovHYVbx?;Al15cU%u5 z3JMHBPwqT|<7qMj8rjZATfRS^#~AOLHbwP3ri*>3OrePM@n;58A;@af%0kF)RN^sB z+-=O8QWP7z^2KH$%gIglo!x@goot^svFQZ`$b={&j!cfPmwZx-ofBOk4g{Z;C7X=W z{8D$HEUOuu*JQoAdQ<8tBVYrUWHQF*ZE)}~Wb#s^8;X1@1smKw6~8;=PjQX^A*jlz z)5upl^3(LBHqp0(DI@JW?GWa>F{4&a?<|z1?A-ae`uPJX={_&eqkoVH#Atp6Cgg&U z2`u2nBtdb0Jo&cAofKcW5woJd?6G!y_k7L3cm8=!h{p{u%ghqF(W!{50AOeq#yO-Tz_!FQ$OYZx9 zH-cp(H&|QfeREtQ-(Tx`Z;g`U0p~i-=G$evMYF}G?E!e|}J;n&qpTac%t)RJ8l%4Dgu>mKIAtW600CbP9}gwti#gK6{49}kNL zikx5QR0n3z`*6qd8zOJ)UPF>qzTrIxKn&zkhcYP@{gT%= zNXiw=HFPeEunqnq*FXNXLe<$qHoSWwwcf@g>A6jn6emn<{;|-=FT{$!B8c?1^exs4 zygcB~t_3bRo~|N6l}{&aH~x*7P+`?)IPJ@;PdL+~}t=!mWVNAEZBL zw&7oAHXrE+(m}1g!^+(=YA`j1kW83(6d|4>fpduOHL3oa?wU)mWLT6%G40?<{i@xn zJZ;$N`i!dFNJ{yKc7^e5>Nf+Tm7sVJvV|8MDpJ7wbKYt6b(O*>e`Xtw)G?7}{(uRC zBbicmrS1nIZD#pMCvvOuhUE8G$Xd!Sr}HRyE6PODj7zoCmOLns;>mUMRo?R*!w^Nq zeV;I>Kvjg9y=T%SG@DfUQIT=85dWb)Hzg&bDO)b^{mNw-#F@V*7^1v)uH!VZS9jJ~ zi3LbaR^rSil?G1|3ZLTnQrNj97$tx}7QO5uAtZd4)Fz?;^CZLg_zxRS9_z)f7BrTo z;Z4+)#;_}UL@~#!z~;k~UUoHo%+?J<bCD08Y0s($Ojcxb57 z(W`cI^O1va-b3#AJGykEJhOPW3zRmZH5tk#L4CLJo4-5o*57Q#gLi!}=vMoQ>F92Fw^?dzqxsk4>kFsa;{nx+76UMe)jt;a+@ zsc#5x-+J6EbY%22EBF^teBeUiOJ|N6h3Mo(DmlZ3dTuOZ;Ib?GRgv@dCm!k?PHim~ zg~4R`vyz!HlHY6VLW|SxlV`pTbngz?zWh~0OqJlqV_78dMv(v@g?v9#*Eb$nwthFI zL@r%H*YFO+E1<{uk}CG?lPyaB4B{z2Q~viY_;afa4@unw9CtPl9J-n2vYOg(4$+DC z7B3cS%mJ^C5OICf$y-a8B7ipiSh|griC7W_o$>Dt{U(gK5d@ zi)>j5_sL0oIyKm+qlZw&@(NC2^q?Qpth#O^6lwMHDiqNb|)D-vlt8$gz znyfd6ZWZrHn83f3)O3C1d;^4?eSSK<$?8}iY9DN-$qomliBr(`_|t$0N)Auv{l+U^ z3rJ>=@BFx}64!S9p@q^rdGG>8&FrrOak-mMpvb=wPU3A_1~XlT~VOHH|`0^R$B z!=UC3(#aS+u}I@Tpw66T6TNOKa(_2e191F;Y^6x^IkCX>33mEUg2RMsU=sA4F`7Q} z!5@M}OBy)s{BtK(bdw(54P%R9ASDU6ak;Hc){%^iC&G(O^6Lk#k6pJlqQ5;=W_{Sr zcaxBaO*^xi;_5qE$R~znsSFCUW@2N@m5)xM{$c#0j1l*`IQ6I*WU3%1VFqnIBCy*s zfJx)_oQ3Aml}YpQ%I~D3Qv-(9NJjyV7^Nus#l2g1)bs(XMRNdRYT9Z;W@eVB_CY%w znPI>{koL@1HZ2zcl>PIpD!2jrTLugtdL7yT-M^H9e|DjBs!jaXj~=5`_x1wEO{4z? z{}&)DXLT`)1u{5s%tzc`)%S0;|BsRBUosH?B=LVWg#NnhZyTxMhRIi$vRdH5H~oEo z`X7;({326;Eb%ePr?XV5^Gm^3*Kwe{8Y6=RTE5~PLJudf01J-)aRkm3&Tx;X#Nfy~ zF(4@Y0{1XjJMXO= zV`n={DrQJi;&LJt4$`z~3ZO?>{W(MHkPFaLvP|J`+Z zA5vpoVZC#%fa@~QV)fC)t`>$Zm`bp@X$A(}p#{FU)r44-ix5w3MPUJ|kN=}S`@0MO z54ARfR~ck1!2|@GUaZ^K0pEm~8MjV_;|M^u3Ul@xgtIhlyx-B6Kjh-@97kk_XApF| zZB;L%pDP15%%DNd9Jpb_1ME(mZJgi;R_c4ie^mH~4*xvN{$e)&h2zFM^D7^_&7?ZA zFLRb(l<}surDvwmu8p8p_YNQRyUFFWRQ`6IQm{9tCTTvX(GG@sbo6QnWX2MS9CS%L^b=ai)}QWvheJUipOdqp!Hwc7fE6)!wGUGu+27zo`(n5|g?`K$i!HFsH2Na8-GJOi=rl)%p7% zGhB2$qYeukQ#7BXD9j4(Xq}&${z<&N&vBu_&etR5!l&e=xVTm~ z7D)Pnp_!q@rPXKWUhFHJ@WuSQ=5C)>;lksJ-dJFp5xOS{{nyu<2jjWHoUd-FizD7X zJI+d?|CWBY>9U87d?n}|*xe&N?SYYU7zu! zSfdLqRVTeqpfemz0=`_+CCINE<<`r~suJTd{nz^Z8d&;kU@>U+;{7L+g$Y z0$5t!K2>YY!xuhQO$es?pS}>xj)>u9Kg!BZ>d3fU693l-`L}rakF(zg{QpqBP{7YC z?QxvJv^d~t{c&JBPYR=7yPj2kXl0EfW-xcc|e1EG%4B1gr=5^GT1rJKI{w0Odan^@oo z=kM+41OCr(zFSZ;7NEOF{Kw^Ab%ICB-z5IHAC{4xS{HkeK^oZX`{&`@eFQz*z=4+n zK=xnvDoF@tyJYzn1BIYE*!b2u55AQ!E>mx-8t2C30lOD7m@_DbFPQ3o@kIf1EPxB= zGvUJdh`%HkM3~H116|z=_9XT1g-mJ;7}*8dgNeC6B0JqMpz0%znGa87nj(!Bs)iGS z*%JJ|^KsbSXD8I?xwLfiT_H@Q1^*tSB;kC7gpb+k#!BH?LXqsL;CacQRiyez)}K=q zXg3a+%xZ0`LmaaalP|=a*hE{*WFAChm0x`s{EGuI3om*tDNw@sLa5qkv!tYsc5dF) zb_92n)R(LoRC_G4Dg8y@Ur8h$WgJp(=Ej5#<8l5u6{TwA1uZ?bg-XS}H33Zte&i)X z;!HzbMdD#*y@_ooE(1!u;BrY_&d{#Yc4Voz;iV1% zZg~&5D-&eG-@oH{Gx~rNL_xsIr*`vYKj9Vsf(pZC1Mj7@{;w8e`s?TE4ofCk|8YUa zQ_?W(>oo*ec)3S0>9Q-T!{P>1LTdt7G1?IWqx*_c?kGI_T;bz`~P zWnyskqNKD|?mtUF$dB8@bxLb#|M$cruo&eOTHwp$e4hdkbHMxnN{}4y-47{QgCL9Q z4Q5|u2zQ9MeWlZnAqz&{d`_PF z+yWQ*NB9r+dVD;c{jJRI|qKwRq$bri-3j7fB|s#h<# zT)n#9_++Z_NpiLAjLN@RC1JI~YWhVRMkV-9Ng2!MCIRj_KgPy<{Lda-tLVITNzFSt z;+@R*sQD&-K7?w3MXLlV4yG}a9p`!>A^mY6YEADIhMv=VrhB=%XHQBjQj4z4bsX&QoN4&JX`tPF$P?d%+D{h&HP@Ps_E&&j(S25lDy_!7VZ(l#$aab zSubS2h~0PL<+fqF@}oCVQV*~+^f2fYCeY)~vx>zS`$f-KOfW;@lU_SS(?1mp; zWRU)cF7<-^iy#7p(J>9>mlXZQMhJ5{_*H6{|0A%!)~&yXls=$O#x^ZkT3w6mmj`ugl- zaL#LJk0ahrw8T=J`OsKStdWmLq@W+a;SMEI?CK21pz*XBV;Tsz;oKZNL`-Se%v@dc z^J*UznzrNis~V2Hm3VfowB+f#i*9oe84yU=+i8m*VqdbE)Nh~%l7@Kl*3`78O{N`| zdq!DFJKu3~+`JfZd2hNHow^m_maqp-G-2!N?%qw9d=GL-ferQJy@fJ3c2ie#GcdN1 zo=K?bRC`L!r5^ z^)?h-UTR6`qH4-(-X*5ahtb1M{RZYkX*o)7tguADy_l2ZmOWEUUCH9T2St-Z7kN3i zIJp`p%r@?B+1h8S^c5~f@fqZO$nN+1EL%FI9SPP-PU4kO=A>ifK>yfJy>B^!dg0)! zaQN&~&iJIcnJO(!dga40(wip6Ae2Y&wk@Lp`rb@KrD@UJRQ#~bwCsL-U9maGW=Rox zZ0hSWB2-eZe8~fD!7iRFJ4lOnq(zvhb>t-UEZ>h0bSD1J9 zP}xaY!Tv|9$}Xt3z$Ixd=ql>NTq?4n{+Z|TYcrT}Wq9c|nOCE%TCi=imgy%7j{~x~ z*kn;jA_Wt7OZEv%1_zQN^t`6K^w+8q_jJ2dYxn!3_ zbboBynis+ix#&~p@;=gJ>|la(L@1*P&ntP2-)}C!kqlgsjp*RC{-BYa>qMN;@qNLK zA7wBI&Mo(;t}Hf8ZBF3RP?>ULk(fs3OnuVJLFDo@9~l{R%E&vm6{RMWnNRoitz-ve z6_*tEhkIY9vC#ayx!(nuFM~~gN4E_YPJ?&+Nh$1k7C65O2;Ra|e<>=R-;>=>`ypMT z>JU7-1Ur0+7`0qYDA^n>M=u0OVm5R6!1}9Q&KoN7UDF2_D`wO6(GxzE5Ko5VQuAo; zGAFx`xf#PDh@txXCuk_wj*Wu}Yy>eJTLX7YGJ6ETpSnBqxke+vdMzr3O$Djf(9MQr z@5>dxxf&Gx!`|zBp9h7mOLl5xl_wasQGDWyNFT2+?R71Ps6%bW!AJH{aphXNoFqyI z5bt%TVeR^AvDG@6bMu19Udsz9*faGa7wJvdAzGx*O-N|d!Jrlu^L2{k)(cLO9dog3 zAPoq*OXGf#N){twtM#!~NyTa+-;!dROs!gSc zx!4Pv88O$wMRe>(K2&YISkZf^@On#1*8!rrB2}|;ZoUc@S0AmV)+Sr7KIQFS36{<7 zpK@2nSemMz{X+*WD0AWxj|v}`PVpF6QK>ftndzDeUKZpx9v1g6p32!} zGZU2^XubaYRBhQYW;E6U#*2S7Sbc*ZH5}6WS{j^fo6d)PUC3bNV4CVwITaV!P-Y<> z;ZqTB>2!A4D7-5o+vK@J81F5@3+&NvtBL4h;tJ8oBO zXQ-=gKOBSfQHOpmvsRC#X5!C{i|x7^psI4@+2FJ2%10>#aYk1Low)e9o_-3dDn@10 z!lw?FHI!wXd~vIXfMJm^X z2Q^wRR!~T-3N1a;(u5RSOs}kY<-oMqk51E-;=w8LDe+9oX3kZ`!WB;o5MN2+VP#zR z3l{Q=Y>rou*N@)3x_7y2h_B-D;|U9c__;YutEqc^#ZuHM$aG9Y;sa=kt0?s-tJp9S z;}0@{&=YG-N8{Y~#icuEZ+TG_BujVAc9pmxNES=gH0X_Y>q}7mu9vRzV_9;;x9dkISC0^JFC{$29;&$-{J{aPoL!j)`Nva@`RVEBz+= zlHTcFu91=b#`yL1QWx&xN?%{_L>A1cGOgIfC8a7Z%Xszr<2CxCk2A7w*=JxM94#Ck zEfy6Yv1xo@WoHwqRnZ0Ye3^`*tyezz( zjg8>OkA-D-jhS(<=@~dB8p`Lrs3oqlRSF@a3sjlUqg}6X}C=3{DM3xG| z5$egwW;|}*8sM1?i8YT0hKTsPosh-*WwY=N8}HaIMqer^#9h^g%)~U5e7IPRkU#1W zmcpw3{IrN|q^`su;Kv{;t^skJYFl~ublI|yimX^+vkG0^k_-V)mYLj<>x0W@d(ySU zY?T!`e7Ok2xR*uqmzYHb&zgxtK}c+Go?_cm13H5|5xjGsi&jQScScE>md zPQC7O<9wxT@}8|}5TQG&lb=&rXu-dW*{sMJZ8~b~sy*;=nS`0vLWvxmYweV|HhlZ? z`Zm!4C6IRIp;S4Y3KyuiyDwKdVaV4>XNecNF+72vn6G^Pq%8(vdbrqTo5blRXnaLY zw?^*>NrcJ_Pe#Ef-92*PMT3U$McZWOqlT={S_uX>(pebz_m@7mA%iB4OpSa%hY{h& z2Xf$5N3U5A8{5V@RKhG@cBoFrO=NPp?HO zdSr9(Syk$K)}XHrEJfFJR#THZ5`XHq7;g~@Q3^t}x%vE$a@&paKTLRN; zLxB|kGhsrUGST(I|yHwB|fg|JChv|Z}_2wF}FiRDIeH*loZ#N-=B(R z6y)U5j>DW6sL;gQop*V}g;-qG2`y&g-jXg$=rVYmxQm-uqLAms%6utPW}X4CX&33x zDCvX@PNz^*2kzejQGcd;7&dOGV*jnLW3UV_CqgdkwH%9XGpBe%L2uhB7p&AU6&WOz z@))*M2f44jB_-~_7pZ*ReQB)F*w4V%$wIk0h+ceRbh0cGIgE1Q4~X+W%)5Gb4IS{7bbuM1tKXtOP1Lb6C=203dXI$y3 zHM+H!Aju(iChEYAAwK5v-Je zfCLDVP#)iM{gvvUno$VOP^hVa ziXXN_rVb(by#%b+Br&u)NlDhMpiBi*Xe?U2p|$26y-T5R(jE(I+Jxi_^A8tlEf@{kDXD>~#BK5+0n5M!$QCZJZ+t zgSTAaWoBYfCeRkGD?2f_Et;hrLE_EfQ}J>&8}2#7S$6NUG-XX0zHoI{HGNG}#5 zxp@82c$CRlo)k^FH86!l_P;O*a(@-d;kp{0fGZ+Uj)nxJv`&7cL-HABOgTuW`o0&R zr39+3+vv11B$_g;F3-3WBYK?d#GoT&g|gbqsbL^Ev+e z%Hlw|!RVm_V(3SEybMo@q{I$)gU^bVob?SGZtN)QCT?>3wT8cC8mh0QKYhn5(@a#2 znGaxPmlCE`!_Rbr%y}PY;eJ`HF2N~y(+ew9LS&M+WY;SC$CIu_!k}vEM>C9cj#*>O zgzQ~Wo-HPOBB#+AED7nZoK&-T=R06H|+*^@Vm9` zZL5q^aQmG&u8X3f7*f3qqaxexZIsktmU`3Mhps_&P_F)FJUC-uk~d47R-`N`RBUL_ z4Y>0^2GFT5?zh=&iO=h;4zr9$!K5Yz!xleHZSM>e-{8{)96- z7B6?5OPdOMnCftBUy8gf_H@yKBO-KlOzZ(+WfMQINDG*cm=vIEsC^0rpUr;@1w04t zXIYs}kesM|_TVrvG)UO!a$Z=Ma>(abFA95Lgqigxd!#Qv{veJA+i3b0EuTUUp&~8M z-#hWq{EEVK!n3U#MQn80R_h5f)o@I-?KV zpAue3;V4_n&7M*wkH@Q)9z)#;43GBUZ^eHc;is6DU%JIihi(;Le_H@aQFPaU9C5Jd z!gSfki#|;LR;5CYhEmb)aF2?1eMn$to6%xr;|O`d(%M+T&Jv5$=05PeTsJTqzC&GBg2FlD|~KypPJJKTB7*Rv*(nc5cUYe&!(C z%HmH9iM#%W4QqCG*2zW!fm{JH2w&HmjPO))C>ViD19&OX7MR2xebZ1vc|B+ zOlH5#l?zo7hfiI`xM{-xR}Yu!+UY!-gzRv?Twy!XUOIpG=ZZpHyk&`o&qP+!V980H z0u;=u35PM?(q z!n_wi{MVTK%j0t1#S(Q`j2-zVp;tfp2g%CU5U6p7t~L*JW`-M;G$zVq_@j97gF-W! zB-2NRUQarz6gT4XN_Oe-lx-%CJ$zP}_h%?8Y>gdF^b7r9j++?{emqHqH%6y%Y*0OR zkC-RyAT!valZ~OUxh2cXqz&L0XLg49VrISTRP1^JL9!qI>1Y?2p^$i)au#Q+BEy4- zs$`(`k>^ca2a)dSU|ut6D!h>ea~A|TaTNOzuQ>FE4FiIBu8j|+K{e)Gi*K)?!Ah|0 z+$)Gac-r3ju3sDS?$B$pu#p8|4>Q+!q4CwSuw%+kG(c2t{_G{S=&NJ+tcD z^P9L$Ae&9ccs;uAd^$G+ke2i8NWpbVoWy<_EKeb(z)B2BXmXO@o<%~@7$Ecl7Cew~ zRD;@d%R|@;^3?5}yEom+a>;J+wH3!yBQ2|pyN~&$?PwOj;BkmiW2LQC2whmwDrJMo zYMzMO%!PBcG`yi+)2STEU%7A4pO%~}6%V`!sp-(apr-G2?COWpv5&T+Fv(5*)sTY& zoMcFDH8r9y#&Ouf9$PfQl$6k%S|F{oAnO!hXFAN9TcDlY1rwCX$F=Zr({ z7bo`YRn;T5**DZY`w6cMCyZ@B0%~kQL*lp@^+lc1UGhBWpwzS?w$Fv)=H^0?1Oemy%d3R2my^ z9D^kk(CYe^XPJ5)MkBHh5A|*&WrxFp2-Db`~N3JMFMDyC+dnIU8_bNl93o)-x^Qak`MQt0%u@6)CEg7LsH)vwOofWq5< z6Zu2(-SuWa^XJ8rzZq3;080*)Cluw9QX0)5rDewA<*F&PNSD{8jSdlyX;oXZx=Mh7 z=M&W`W_d(i+2igATnuJ1q}JB_FJnISV%qnEhdu3@8WZ>67UtG7>svnu{rvLjfzbaG z7hH}DyIvKN)z|S!!m#69x2d0Fna*GlsHbO?WvPYrW;8~6cUM4i&^7`%C)6y6sZpkw ze>lem_ zoHOZxA)td6j+(JX@Xd~_NB1B{I_b*wekIZEi`LPn1a5ohZNO zstSwPw(1kjXBv>8%azqNF3;?1F9ROlS+fP)&D}{=HBzmW5Pj%!Cp%h&HP&j)S_KEY z(A{AwuE1G@zd*NjjMZi|4Q8xT)2>iBb<~t<6iu>S6r$IB=9G2&t3&ACu#bl04xL0xE4@;2%kc-ARYnQyFmx7p?8yNdHy zMp>?~oT8Jaw24M<6?T&$MceoUjJdLJeDDk~oS~1OH#ZQwTwSv3hIH0gH~k8(iC6k7jENuI~=jzw>;$vWF2>} z!Xa4K6@j<3!76;?=CEUDWhUQ766Fr5WQMDl67!Z7j9M1lB++&57wbF=Az|eUf!|a6 znG;`1aM|9y7 zFtrdKa>?B$MmP78c-7X-?F>Y$yPO?uXc!nmi4#X618@H>-S+cT{isqOZ3I%g>R{r~ zPiYIS)878&ar|@88X>9q{#r$l^+1El)sf~qOH?DX zV!aq*l|rXA$B0!`CZ0J|wBKmc+A8ji|2uQtIj8uWmoU!7iqfa##WULWxXc}Yb)rin zULob)ftk&Wc+I#Gp=##1W1+Bdt^iC))5ND3RCc~H9)(fSL$pHoMZOq*$v@>12E3RN zh|rBaU4?O*rKDnpPeswy=PE;e#lds+&L27QwGWPt5S)iQ?}`OgHV)s*K%G|V?Ud2o zU}4!Q#PI`S)p1cX+X7$>SruuXZI*2pf?_(9cPmAQIuI`D4ZwgZ01ms4t`x3J6B6_1 z%ix4V>kl3(G*~G$M2Et42s9+#$~)cry4a9?{PkZKxYboo7eUkf!9d*l;^LyPToN@x zfTT=^6|Sa$)C0=v?_)CWe6-@g>lw?md>Vr4(%r0|H6JKCoPnxg#=p%359%aMti4&> zg`!WM3whk>kn&KWNWHGA)%nji{Y{zC1ms5j^QZW+vE<`8n!6^j)=pg>njiSb!0ZC! z34zVQ66>ZVfJATgg)?okon_{_WcHskDin5r`sCk4id=(95veC0FU9bSQVrjiySF{f z68KM-EgFZdGFP(%*S1|7f)az zj$Zq2=y-pSu)I8@ZWLYbtpS^zn^A>FvtxooI3~^^>{kZm_xbDkalQyvXD(fD4LUFy zL#~~~+Ed(?DO((VA~U4%685K61TWLk$$JA3f9t!hqgY#xB{E5EOYG zobtsIINVqKd30YAGJ2jh%;q4kuWPt@RfFfb#wf^uF3tRthhA!^Cl5{R z6+@V;dinGUfaj?^(R}&Oy}GW^3eaM=7bvYXguSC-c3(@1@?7;vz1+Eq1B+#rFor!C z*E{;Az`GJeb0OCR*TR}Q-Nj}vRq4HDA1r$b$3=vI0x0*r@I3de5H9HW^U#wD$RQV; zd{?sS)BZXaU)TuYyxm8wxFhLc0X}AZ2|z%E(7Dzu!?x1bC2`TYB(mj2o~6D3RUbl$ zZbb%`ZPkt+UEkd*#PA&xN*~u(jPnj|-|yweZ9e2J2^%0VXN8Rqicu-O>t({xY)P+{ zUP*ZKSEs!g_QJX3#~=0y02x}UJ zS||clOD^&!m7Ep*25oiHWFl`tG>tujp4zHW7Aw!+K71_ZMcM8q_#4ocfEQgnlu=(j zbc$Iu!$AKAeC@BX_1L+g8TFjSb4J96RoO`X?0A9r46zgf7m-{;`!%dsfN8^D)m zab1kWa`{lXcO{bJ^v+eo#+b%&Wl&TI1EgLQ-KOX>)8j<_T`l+m|&-H5gwfrs;sG&dpeB1TqVlOzm zu{b*soMaG1K(535|r@_!y<1D zHgs{(g8t${$e;9}Us}j6kk=J0j^HhP$;92aOjA=GZyhpSNa=(3v*5KpnZTY(R16uZ zH+cS}E0nwJfv?B(%@IiPG^LMF$TO{mw-0Xq_IA&6x-<0L$HI<-5%t)ia-cBhQ0i{k zZVlLUVZ=}!$(jarJ%PTM3v&3)S!t%bw$#mK@_dFbQfvL!Yr*Ootb>i=qPJW|0l^&` zFZmWpFWdXsdj{keEB5zx+3Y!vwmIml;nPC%mf7mFWWz_FDJe+P3c8ATT?6;Y@W4go z*)t~x>L*jJek9S4e8m7Tt#Xx~A(1bdP4R6!Kq)^c4nW|0tT%E9r>lunT-g^EKFrfU8LTM}MqW!sv$EbhA9}0bBu}i9J zOn2KT*5gqn(#5OSY!j_-UOv zOl^`20SdH&S@G3d;&rGu$*Iyi)v?{dN6y?z;JBV~^4KUrJIiWen?o}?M4c{DvK*y$ z*H~(E6lz*Z79yx#jr4Lwe(87KRL4MCAp~N{V6gqvhvUOKzwqzkPt)I6$qvhRVIwi7 zyftKIkbPjr*?o>SV=rY%4zOpM8abHIG;A?{EO*SjEZwqduyJ7M+Q3N6VUWQ*D`Yq} zqR%<7Nwd`7+K6*i0nzb2)x&w)`{Ao^?b4A*87=R7s|^9CM|K_b_AYDbElsgx=v({K z263s(W<0e7Y{vsl?^Z7HM%awf1vZS$VU-VGQ%-#mlNCkf9mFZZztpXMzR=OtkLfBa z_lW7a-2J-(3~wVCx|E!*3w>OshvP28Y`Ys{ppVOyqG?~JX;SamC=<(h*4Tl5J*bpM zjC!ft!E~J++Z;ehFD^qJU|BAkcwEXjLF}V|z1yUFNpv-& zNdqScJR4ugVH(q~3 z*37Uhfq5~x-8CPNvFcb{KzTkHEmAanT5%X3SAd!U*1*EXlOfi zA|tO9BXCk>jnY_&^@Z6(rq0pjY}3-`f!v7bvF!SZg-D&ckcj(B!%oC1{KCyzTBKK= z@j~!{59de6!z|`z8XzKfb5nV(vR^SGB5t!y5tC1Z-R3hWp|*Iycj!VE?F9_4b+WU`CH;#+fmgmow z%&oI9T=DmDz{+EkiH!?+r3Dbt%tifj|9!s`?1c#n`lX4h79Y4idGV1a`l$uWf__q~ z!+HNiwPN;Aqj6-jLO+DoSoD+X%9>{J*|oa-5{bgJR_-N*B90qlHEP7&3z}wC|D*$~ zdnN`ho-_nCXjN5g#U7A&`G<0cf#LUrXoelG0vwXL(o|d|IaZPEgw&))$TgnAh6N{k z5K6EvCE_&M;)UjB%aOOgovg`;7LxjYJX^KO-pkpZqGGZ>pgXP7=gw-#CA?p|C*cGy zi*+b6qQS6dbzp(cAvC9p2qzB5pcGF^)lR`BnECgEfXW=ddEdf0Le#ixlV6!5pnFz3Xs6s zR55oblR_P^Jp}JOgquHMkuk5WQ`4;vL{fO$Fk{2;DCjV={b_#bwUUC=QN7dTiy%OT#ry;9#jXYU z7b13zJ>t8iMGi!agt=nevz&(b>q)&!5d&n*BGiAQ=J7pG8+bSUIbVCfWn}HXL$9A6 z&%UcP(nWm`c;SX<#K26IpGw3(^u1cW9NlFGj#a@^w&rGJL)FL zUr=_7C~)@u`^&{E&8s7T(_ViQ$u;QgHc&+`3XZ4Z7MhE^{tWP1(l^gHnFKbPM97pq zV=PX5YhX{bEhe95FR6@b+6EU@_Raurij{Z?K)HnMrw(VMM31-OTUe6`> zuWlPyEUH93tog~-#P{a@#6VxK@o4rqB~1-h`vhvaS+Ozy&03eZdnBQ$?WK$IQkj_A z*QxG1H}fnzK4IqGJwC5=e=a9opx#;1NSG)tF3wy{0^jEk*IG{NSdI0&Mh5n8rfTTd z6Nl9TxFiVsdKdTXyS$MAmbP3;z_nfR#&(?lEA-8{f@PL(eL%z(d zN2V_=-V^%DJr#F{PchaN`j={bf=xk6!@ZWZe{ zPlfJ|&M9Ww%p%n?tE=*tLY5*I+qIHN^f&D)y2LCF$r@Sc%-VNTD}DoL)E;4Yo_7d< zR!B%63zdW)tkqJp;rmw;n0X6|iFC7HS*lhytM}2QZ1wN?Z_L6&T}flw$%Wa0PJB8c ztey&lWVJ$L%6V(3{AOIxukOp>s*HDVXT`*unWFKjJg0r;GC?B8F)rnBMvl{O*d$zF z105N2pvs%Esj-o;fhvyzNvf^{qSDBn+DypJxfBCL1{4|;Njsh#R{~%{Kmbe^$j8I# zI7rueYyt!617u=^w`xne`tiPFnWP!^eI%7M24PdZ@i&B+%AL|C(|;3*{F}(K-s}B; z4|Xni{cO)wo8%Q5z4}!3$oa|PbLSGye6djHUVX3svQKJsm+!e#^1@Xql$!9#=+VHR zBz8nfJ#uC5Lg0||GNqmXj1gWSI56uvjaK&Gzg) zxyw+8z<(2#GCwgo%Fh!_WndyG(NM-mf!8D%-I81Qx{fva4n)9n1re|Fqy{ki)sq9| ztZRdDk-Q9nZ&GqCBiyIR zD5pX<5R88+@5F%oQ$+9yB*l*lxoG1Rk#xr8spomDWxPYet_1DZ{0?ynpw809k_EL^ zCJ_*>>fk;OIYLEGT_)Bg^7Kkm?ELA4%^prBrVnVP|FlXe$E685d3TowJkxve_l?Vl zbE#iT_b_w4?mp}lXQt2tySqt=?p;N7-i1Sep)4;&BG!!_vbH$OZ$u4HDaf*jSkCff z&C~CGM1X5QEK5t4Bu<(0tUFztsc`Mwi~a26oRXptSVO~Rb~1irTq<^X?hChEsm9-3 zr#NGyc(cBF8cN_M`Nk|Le7eT+!$|?&GUQ(!`{URyc}&OGy=I0G2|XyEIIb=O!3K5$ zhCdDoAc0n6na-T)iDC=~iUwN;4A_Cn;@Y@1zXNTpBI>nj0mK@k%e5e_^x&EsL5n&Y z5l;&+P)0>=AvnlfxOR5#3ezx{@!0Nd4%3l_SW-OJP@YkedO?!Q9H zs)aXRHimo4yFsrXe!p}E5LtQ*kSY^wLB{|?&wBtc(l=goSp>WQH%SzXn7gG@1opN5 zs*FB2ev-9B9tPfv;EaT`UlGJhd{&}nn~OHD6z9runAfE9zDE*$7oX85yLivR*Kzyw zUi;kckqQ>U?1oFX=LGthG1mRr*j(aVP7dD5U$qY^7aoG;c}*}`BW9{%L;N)l;&KzQ zAM}O1m4V292&_#A&UbhGbzo=jh&Q9EKflpuCDNVV^uM}@5d|qx;m&i z=0dEdFOXo;39hg9tw1yvZ_B=IXxMN`&p7XmKI2uW6xL1$JTQ0!I^jQuu^1Wl|Mqm~ zV_IWX3~Sa$5Zv;S!x;_Wj0InbbY3MRh&}AjbWWsH(ZKw7=k(i13Oe!{;F<63WDCGo ziQ5-07))dQT^tJLimPQnFh&TrxH$xy-_vPzz8Y;)D3th)^=KtKairT~s%dR+-zMD> z(lG`53Z<_uf`B`6#$&4ZS9`=I&pRDxxGmC0R*DQ9Ysu!1K~@4GYz2kWl1XIK2^<}5 zNtaSJ6QT`7_5LO@ld(0lef=LrrQTE472MogW`*+w$4*9aczZNXoMfkbHf*;Q$q`X@ zo54U;ba<2B@wR;QQvb%esKv24F4u+G!Ye~iVXwG3+cIw(4xQ2iFX^`b=HXRDi3P6x zcP2pcqpyK#s0wq@Kky*sVq8uW-C1ZB00s$cko2w<>52Y zrgE=W5DcnZsHRFA&E)*i=z*VQRQvFU*fe&4n@|hbpSfOcy#y+wOh!?4aqTJ@)Lnionf&{TyP0Vp<-w;fOGpq*+cwGirk0*&oO|Hj@&w4O2RR0%zAr&f4YC;8<4 z-j`-BxQz45qH} znPDeKDGShZTuMLaJWku}2>tFoo^|U})_Bmj8WoruGM5m>>Kmz>ry!MlOZsDvEtF6K z#~f=H{rTMppN*Pb-`GcM;(VWz*!-mn)p9MbsBzg-!2M0a{$%~^$;j+R4nEM$EvMx$ zjq5s;N$?tgmlRoZ1s+iMYCQpSBQToFgfufD^_i7r@j73=Vn#fJG8h>h+}O`}pQ{hO zPee9{bnMjZAxfuXK!B{cr>qdYaikjWK9XYfsx&V}4_|wiLqiE`LX@PYh`ANpaBttj zdZ6uon0lAM2{EaY73Y_7voa)C<3_B!10K0=DPQ}G9DiE9AmDwE8j;`-MX{0wm;}Qzzr{ZtW=c8bZa>J+w7Gow<9R=b+-z1Wuz2xcj*D&33A@Q&y=|xU(KRf=as*HNBFQw3KA+ zEU39Zzml4&2Z!0PZ;@;9h?kBStJ1@oz30umWg_-iStG8c1!}Hj|^T;O;EDTBzMw|IQxM5+N3QJJWA$v}vSk+1#`BYJ#|_R;z(OWP(*=6dk{9M1Yd(kiK$0>_ z8Ut49+((>fMO#DO{Y)8TvoCD`-9I0|A^Qt$n=6(2#O??q*a07_?0bpyec{hv`2-&~ z*in>l#?0dTxkEP--w}5>irOzQfANqrJee}MK6>suA-cW-0?r%|b zBADABP4{aC&QXEJquIlF7ykOjSQoA@isp%Q$y>}^*^`YTmA+S0A33G{3g!6WFs^5g z1oGiDVD998)+xmfB;!(`Y@mXdaQ5R8fX)G8_q)r@!`7;s$u!OanoHbRQJ!v=dDrh^ zg~I40=0PtH^oDfgc*`RaytNA6GYT9HPVU`FT!1ZTCalDUN7KS-KcR;`X!-D|$q^`1Xl8!{SNPaWUi6JmPTfNbMXQ1bO_ zbsx-P3{r@9n;8&hQ&{S%>E#+3_UM^)21`jXTaZVgg?q=!e1t?HoBV#b-SsE++^W5# zignC9=X~~<8874}P;yS??hCCx7_PEh#>6t!$u>fgQ^;BH%C4bh-N?ga8VpbcUyCZ( zxeAdmA2!$8l{1ERZ4_SW(eH}W-`?>!Mx>Xfg{fYU!+w@KDXd+VhDzPQSO9Xm@^wnU z5EN>WVhx&EN&7e-fgotixi|qlZB8jynsp1)BgM@Mg-*3h#)h0qT}0y`vMelKJ&YHM zl2W{(mcR?4O30-e2zDRfyVPJ#{BcNetqkmb!sZ}hTxq{0IC?4c8UU9=Ah9zGbs9Zw zRQtf-SlvXdr#LlwbofU9lKZ_MJ{5SiDxZgb!PSIy=kjWD20t5)8*$%W1(kncq6 zc6rvJ%cSJR>NCv4Pxl6i0tkfzV^b8QuDZe3LA9QQBu|XaWcK(q2PK3NWBEpnWNf9r70rJ`iCwQ0qxJYd=gsv_;qx zA9vx$aJ1X{^hp(k2`mN;fe9k4(vIF4ElzI=5M4^jQc-)?l2dRs9boR1^1$>g)mysv z>_s0ZDn0P+#Q4O-1aGFK*QhLPFb9d;wU#f`cuX-0i+%_LZYCMDP#!h;@nCzxq-!_w z99|wLE5|-hJxj?QJNuMt3r|XHc}?8XpGw=R5H6XoOi_B?W0RUAOGXrx*`_ z%7%SQQxvH>_#t`KD+l1|Hvs!j?8ckEV)tZsFJ(H4cIv;H0#F@x$z5tR^k{pP5Q`fn z+UWK7I8D|Qyn$2#9tneJEJT+`#iBAMooEZb6@iZ4f$>lt>6tg27issET}#o5JW1{- z0YsoZ`l;pT@!+|rsdJ~Ik>30UXFwJZ>(gfgj|aTD+t^jza!*+STpc0o29R5KeD z4t^s1J*5 z9%wy_V}09o_Q$|H_?n4tS5RrLxrXnrHdDs%h;G&CBE<{=4BkejF|5tH)g5vOKW7!Q zc^KyY`ULj>G@XHvj+qgU`FK5VmLH^#75s_~*Wx$m@e% zUVkj>>^O=w-!XdB7Q0;n+N8hU#Ry#cA*}rVMZ=7=7V3-6{u?4wrDdvUTd2S@W#_XXk@p+4y+Wd~)c?ElFW>tEhqie2Z0n!CXIuIITA2TZ z4Mpwm;|D9F$|L#$=L-+Gv;@R+)^w<0R z{)v$Dzx`Vj8-%@yLZS$HQD?{D{}cQ7&j!x_jqVu3XN4p{rjP`9UZ{-zNAP_}c*EKg z{2wPc9dO5?>4}`O!mH;ZJLrHskzE+uof&Zd)QmPMzO{DWbcXeOfj=yoX`2I^j#RkVit@Vwnoe9g6kJ< z6NL26;-_8?O9z#&Qj?DB#qTnz9Y7iO-U_`*>ulU;BGZ=Z$+T56EYW18F}#_{6tI}w zadl!rQ-Uuk46m|h0;6{%+1L@ADL;B#6^}44?Ohm(opk^Vu-8dx{HPOuXE%zh{`(mJ zvw4pH`A_-3`V08+Eujm*-?ug$k=uLoVk`$y0d>FCkpM)bE|489uPu8Se)06&v)?fP z%)M`V%;j}eP5KF!G`}Gm!8Gm--dS=+CEIhYQobfjlS-xpd*T`5-W>1>sy$6WL29dko zD`>^1kQS5-LhM9T59q6DgZ;CZ0NT){cW~5@9;rEjO6r^u= z`;#(=EM;Tw{xvAM9$M{3T!Z$ROc=T)Wk7-}_gNK9PGQ65rM$cH2;sWI(x2S;r(x2c zFhbiSwn2<#%%mhct7i|g!JTrsK+>E$I?9yok-pbs94sM^!Vf-yEueF`q~>z>h_b*dR&{n22qaV^ot^jS2kwyy9z9-#s5L>u!lBA4Kw^`TY6sp^jRb}+V z@5gOYwLhYU^2kVET_$d|WAT*Uh2dn31t)Z(@d660=egT9b+k;*4Tr;IV95WVxdX~!}0!79aT8MfidRNej=CY*38${kJ0C}km) z>AJOGC$|)x0q6|*N7FM0_k;No{^M=HmhH>iy^F3USAsWsf3NO6`$6z4|Jx6Qs!MaH zZ5nH+dBZhiEwG!^@{k5A6N>-xahtu%E$Q_|zw@R(o>Hr!G(4IDBVrZ8_n%V7@%Mbr zo!I&gYnS*ExJF9ex#KX@%^F;$F9ET%v0i=>ZiD7Vu{xvMImra^8JqnZ%5R)C>-7i< z2e+~)(auI2!YTc^DBmJpVL1mG|sA#_STlv6~2}bte za`@nKn#c9oWTWUI2+VoE_<^o%C_dT}P%Ml2BLMkPoltgtVB*55uic=^08Z*c@> z8l;}{d4KN2qLnX-Ac-UOTv6t5j^wMsy~cJ{KPB4jSC$dk#_Ri6BFnnmWwvB+IZ^z$T^Lkfy&&)z>1XW z&0d6Ga<}<+$)eI}Xol@!ia$#SH!b&Ao^3%sl&x^B5pbXbcAz)&oL5ffPQezoyv#q- zRY|fxrjMh7B;t_pSV&fMyC(j2z9p&{I#OwpRt2qV+2iN_JM9xb1}9l_t!%5(KJS~v zt{%@+z!T7!wV?g!N5(b2jJSxD*X4e2Zro6=W0e_N!ZiH46}+iQNtQjm*|1`-d_Kpg zZmPJD!h&^KNXo z5GaPzV!;kh9Mx@Go;_Xvnw`-QmON7+vZ+Z=I3Ki; z6=ZM{zEdoN%71Q6|F&lD#;hs_SqW{873bz_{nf29e{j>x*aAmH-Bzx_pfwV%Q#B-j zaun-l!q6tFLu$Hk9ol`PoA$m6%V4NZv0AX+aH_5T?w3byR21gKa)WAnr9W1c-1i4r zkHD3UDoY5tLxhOvHAD=7k*`hw+w3TE_-2$cG>jj=0s zA9@A&W&G~%#Nks4ljsT+}NX(EcQ4z2V4ow-Rb~- zbnog9KfOsyDJ!Xulb5NtlsMg(T{rAiaw@{jjR|sHwhpXl&-zp|`xw+jA(JphDJYA1 z8^zt{)4Ea(pN>3$r!oO={hrH0eSWJK1>{8nPhy9iIrDR&H2wTc|Z<3Opf3 z?b3-l{ZV6F5S_*JcdCH`otFY`ThUOFO}4w2e|822`hP2Ozp~G*n{}lY$ml$t-u3>G z>$W$r|83N_$dZ;ie2bg6f!q96e6<`WJYc%4R6FNxAx+7b45Zb$2cmpy=b%5w5_EI- zwBD2o5<1p-j~#>=Y?3pH>BVVcjEVRp1d?tFb~vV=XB~AyFw4kU#uga@0~+N@7#mc0 zxOen|cXyjig-x;U?TpFh)lhgni&G!S;B(yDTGk}0n_mdgdN+03TN2E-o8K=K{j%Do z4gF1IAmp+Td6M=9davTNxm9Op8C-0XzOTpkp|6V0jnfRn@UNR47xX>qk~392RC|Bl zXCA}D-t?$1RsS?78?fSx=&U)oT1wwD2h}SbobM{c+iqI^T4P%?=X_e*xT=S)HoR;8 z`P+c*+A-{p&Mnq{VJ7W$?Al(Mu1()3``p~h+ff1YdzI;te#O+n(S-xYQ}b3FHfdaE z&F(Aj83Ws=f8P%jcZc`-lrj$*!R6DsvJdd@7hGUTmf&BJ`;(XPnENp&L zK8!OHGB7gY9BSDY)lf+_ohKu4jA_N7ZP}kIS)VjX{;aPUUfbO2Dt)r^6zsML<{wW%^L%5v;hQIO~})9Z`kPA+E743-3jU^8{kq`YZsG$Ci{ zGg!uiK}%N4le&9I+#O9H&>>`WwDKeAb95Dj5Aa=#4yxzJ(>?b+%nci@>8;r~{j?U1 zR8Z_ot{|U4%tk66P`%7osgDZ*hUKG|>%s#GRfTm1))8%s_tIMNyxdl3r%B%1$Hp01 zq*sdGUVe06RWqiO>-^Kxh*ON^0!PJa=WAu$i%%<&3enXXLKOHFbkXBl8|(yLc<;-w zb{%PB*Dn;#2*M9yw^@>t(n)DYEWgKbBVS9ZdY)}sBn4AnCOytE0Gqn0xaAoW)bInu zjHvZU=(^h~B-!M}2a^>fD)G;!mECg-;Ydu>%llHl6zjjy_THj3JnF0SIRB7zxpZFA ze32Pqacq9{nwCiSqdl7wsf`GTcp1%fUmGD~75*8&!BL0SH_cZA+|^YEQwK+Rz!kT? zr4Y7HcE5mDT{1_tHu{9;mJ_(st3O$>&#d!wFlO5ON6x5k$lq=|Gy5znYj=gJMxD7mMq7F~bxs+fV9d zA6HrG7VwI!2rGZ)ZI^_=`CG}3fS1K-CmxtD?39ZieXXUmbD9{b8bQUvCR5fNu#t_f zD-o=9AZLaUZq%-gcd0nVf`GAsz6!__81y4!Jo7aLe-eo-NWh;R!#dtO zU3d&=3DHBD^apRWVyFh~N4NT`CNtpOacqW4{n!X=HHDvVUr;@E`*t5lkGi-4A+8&i zA&i6~da-+PZBoblSSGoJY?#7oiEOtlYTzL3-Zvj@Bkzgahk)Jg7jPanU-tlsh1iM^ z)9C{`Q%V{M?FY0#F!Wsh&CG@99Gwyg+dQbDWBI;zV`$D=9=RSfnN>}xs&XE@f6hP3 zEVsO5RIjtr?>i^8S8LuUb@HY|x_FIc!|sQWn5QBW$q(ZPv8RoQUNCbE{f?Ud+5>thSF#!sYyKThK3B z>}+=U*->$EcI-|&zeN9+ePSlF+i(%)8pvFaLhzS3i`Q%1=I}}U?397!Kq~~Q+B#OX zR_Fg}nEKDi9{%?n)PCZXp(`7N%@mOv!i+|!8}R4%-+AvxUL9_FGp%z%`H%-*ks~v2 zFuJ2dUS8E^-{rx&!2<9l%j4iQRQDCd{4rl&ggCygK;d4GhBnM^Zu!2o(r?*!KYM7^ zqp+i5-QKH4Co$hVYn;h0R%c>rwX$|dfJW_h?F_k?EYYu*Jmf?BQPz2STuK3jputj` zXk_GU4DY@Z=n5JF&Tv~wjNsMug*v-dXQoCm4~HN_-K>vT>5LiIfL2SgRI*KXTHw`q z*b|GFB$W0BO~@8WUB2`rw?MVw3F~(jO@>`W@n<^|$^ALl;3J^uzLIMro7(*l+lpFS z1jni*bCnpgJF;wKTFbBh{qQAu>e2fBqV|y$H>+X@J!PSijwfZml201xNBttKil5eN z9+pq`z)yE!(S3Rgg^-YNw0 zS%5hI<)YVkJ4CwoS94} z_kCX1S&s8}xNJ)sjA)ve824c0%aJzup9hww0!y0m*O$j^=EpS;{dw_^m@~1t?rH>y z66?M23u4l~Gwhx5Ld~t4SAW0CJa5_kc%&r9NyVvusnMHr84=QNA7xJz$Y@~i(P@~g z*E;<*`ZI#FwkQOcKXT0SJ{OqJ zx@5F$4Or>6-Gv`wk88#*UA7S(v{`_Q9qeP$Rn|RuEATP%etKuOJf~QdP4*??x$JfJ znn!P6tVw9eyn9KRs>Qv*rVg%3rbg4P+pni~KCuAEAnJS~zgfREb@%KL@KkK+16*=1 zC}OP+&;&ID2DD(-m%VJybF_aoESgl>eVISpUA#E@sb4YsILbH`Rk~>LGl^s<-E-u! zw`-5?J=nKww}lA>zXCX#!T$buDQIVxOnPiNR_5KG4Hh1#b!e459Kh~ z&pKLnG2v$g$0`oD(~lY-Pnvc~;TkXpl2XtMxNb@O(7F*HUrxfy&wj}zKb=a3j7Iwu zpuwhj#DUDg_%m}?GqcLnmcb-^Wk>K*ZK60+m+UhlE>~-1R!HvY%=DAqC4KeFAX`GF zexP!$g9>Sg1t^qYl}a{QGlY}^ol;W*+W=SV95X)z;&>$(f&6b2*bNe>Gg#f%AXHSw zmAhT?&)S+?58gPRW;5OxA;0Kw7ExR-I`3P0mNHJSxoi9*rhW*&fdN+QDo4AYYMw4B z2tEAzWw3hWVb4XT_L&dojy(p4tv!&+>DnVE?VV0&{#PXXO8#1U*QBV@ZJPN3JC%rzo=g~4>3;pXdm(5fA-ZzNTieFqmS+A0CeQ`qR zwl|t3``a*naWKg_;yq~W;Sw(D?4qteal(85x7GL_5Zf_>L5j-2+W)q->Dl0Lz5ckw z5HeRod)0G*QDw}rU1xJ-?_FT}T+at%E=r{Y$F*en$|v<4aEMSx+P6AZsPZS|hYC1w z_BV5hLiES!=nZ%l;r+otw!*1lIy)-F(-A>={hT|fJF}vHCFe?Mum1&c&ONPq!(I0v z)UX2@M;<*}sa!iDq=TJAy3}C2Dw!)n8<$jpkcx7%gBdF<*WzZ0DrV%B2(QBrUt0s? zRC$?YxRI+otkUF0(8u=*chkKtpF79%Hv2G#@?v>~6o{;L#YB_vSp2i2WFA;25ipVCKct6PSY8kR;v@gb-D6ArSu!teR zGZlv}C}o}?AADlNIM{ed_ME)Rx`x%R6o$>f6evJY6xH{;*~|XoLtIrw`GBWzwyHF3 zWb<-EU7uh<^5OQd$rqrv#&qj(mI2b-Z=}IGZ$qfMqwxpvrhUZhjcXk9WgA$&I5hfMO2+9;D`1q@`Q+2hwCDz}TFk!9rQEZ* zTxkHXL%yHH?-oKrQK^9spT`-V9L~@T_R_}*N+zlrrK9z-3cssB!;;lSoyUc2=Z1qNseQi7p+lj;>_zMurUZrkmMMmS~0H_)w!veEzrJ*{n*OM0a zaB91y$rMt;$vfb#!^iJ`eRAi$&BOB@D?95-8YD!cF)r5%1TIX~-S^0d2b&#!S1Pd1 z&H7$y(ClWL4W_erW$7ROguB%+s&pS6?ew!Ll2s0sn8`W#xAHC2rI+%XrF*1ycVKh4vVS4=6>gI)YANNpnZ-j=Z z6Z^gx!?aXsFmh3UE=J266^0lGa~8u1-yTh8VuW;mnc`hkd&vZhT71 zN=k3Vej&kRtU*lglh}N>SBw3L61W+)KRJ;)YmpM= zlm~1otJj~|8bUGLIg5o^&8Jbb)(>iDm{1ra5WHT5ZGY~Otx-0G|UBf8%5+)nSF8!YU~d3W#tHxyV| zbi|Q6q8^M{Yo^9sen2$r13OOHH(Sv?dNYTpahgOPzG_4LY>U36K84oWlVIX}DmsUxWT`Q2zpWy|2xui*yE_c+p2eHH$EVFe7_4fZn0(X(M}QCwatNR6zPb|cq} zD&m!851l9tGe8KxBimo6w8H7*T-f_4IBZPc>sfXoL8CcI65% zBFz`wb;45qG9h#gdBZHlO2d`E>%2=!ew5@4R8ItjJo|Dz zzq)iTBucj~kok$^P}c=?^(IC5rn*+pM|LEJWvgBPB&~ZuDNHG%yu6z>Ig|+`IHwPT z=OVsh z5xL~afMQa8hXu~CTCt;{PIb>g-(2b##u~Z<>r=-k$cy>e{)05|9ZOl zuOYtki=Mxt<*`x|X@GVp#LlT$MXxOV@g90L{V(zBKSIs=!thmW3WU#Nuf)Y9SL=fn z;z?jtp$h_{;r8y{e>oZd-xkyVKcNXY;qgdhlL;2tWWM-sCiVa3>iYj1gYliP*DJQ$ z^N%iGKlcBHs1y{sUG|R{J7h9oXLW7?$E)kkj%mf!tYy)*M8_6)^Q+jIut(94u3X>u z?;rmApYZQ*!oR(6aQ}CG+P}H%;{T*6{Xf0D{`*wrzm&6yd=3AfNF)CDf9L<#35Tp8 zkr@3O{BW9Vo7Er7HQfUIdecVEoXY(;S) zvbjYk3o3lB4mAyd0y3W{=-aotlB~c{4<|G(TXwlS9>`4teEw>0%S7tq&{Io@*ox^+ z#aT&_B?MwlWB7GPxZWnJHa^U}A@;^@d##L&7TE-8jh)pfl=$8uqt@Ij2YYI2Rb{mg z$trCO&2}+Uw+BFY6L+X*2Ti1>9HTzsSJ2rfT$&3ggIE`6VBbxr;SmW`pTZWq^%&vj zomT0RM4#7F_a}MxanF36(ssQ#Y01gBYGDuaB4lYH)=Ddg3%vMZ8bf38X-q^cIdy}d zhgrzuWRYG9f13uR$T!(#wV#smQta+w7d-0P5J7zZ5!37(TM~%yKKTCG`Z1n-4r1*R zgBMX%9T~boULxzShQPc8E<|1&+&|zqaxGYABGM>0Qva-8U%#P`)0)NK56bu1<%Wd; zKA^cJrNbi@AVEa}8Zl5s>WlcBP;Gx_mM3e^vVkeY#cx@cjr&+-tT~KcWqu96Xek@G zJ8ZAfE9l4l-vOy^tO9hrRH}bKgO}A84?I6JQAl-;ArMGq(AbG+C6A zd-ILK&9ZZJkVjl${q-XKmW&)Y^a%Gm{o;-Xy8~)NIzre#vj3PAm6a-B8pljHSTb(H zA1=27)n8PJ#Wt|lW`6os6~H+2ehm--?CAKz^Qkzj>Z^=f{-E~k%*&3FDQYP!_`WOA29)p;xOl3op!4RY(ob&o;(=c-w$z+4=X1igK z3T6i+)`L>p*rfO*>|PS`xvTgj!TRm7rp+s37i3dkDyL5War(#tuSPczL}IVPKBHL# zBae?J@#(`rPe(FlfxlG|4AeEz@nOO=P5keozoD=xQc?tn+MY)hOJ``3F51)tOFD$$qh5ce#b=JBhNt$DQ z4&F~JfbLlO*?S0=67X}^&dKm+L?}EutV9?_t8C^85d6*@Ap3GKeXFQz*h$9oAF;4t z77H9$?YFeLu&Vd>^x1&ulc`VYNS9)RIn*mDld97)547u5=LEISaI4lLi<>pQp)!y=_$* z-5JRQo3fo4{Oa<3MWpq5c9aW6&^Hf*HVwxYRiql^wis8OmX?3WKv+CAl&6-Ai^&(q zdq384NzbOt7lJNvnA>_H*v8y9zlBUZrz52FM|D5X>%IDRnZ)yt)VoU-ZI=tLG~{&q z-ul$u13Z#D@lG)bZqRX~I+}!^LL)ZSd+_B z8&^NJFNbQ%Yj$-W#X7~`zieScosE?FJ%w0B&UT>@T#%rCWrhePo*{uMghM^_%^sEq zX61V(;`s&at<-+QOXd!zjh|S*SZjUx(%$W&&z(OZi~%KVrMQUiXy$K}ouV9{S^U`@mAEpv1*T`Pi}{ z7bpp@dEVi_$5qAs1F;bm3UTL#h1A*sIJ{_x&iCJ8whR1qMO+qQOpAzKuF(|!MqXd? z-?;InRh{%ME39g{1Ksic*o$Rj_S~Hj^_%)~C(k zkym>ES0@#M^ctXSpygZ=ys<;Jgc~Q+YKp`a?(RK!+gqjOTx~l~y2o3QtFBs)V|0LfE@w2~ z65%n(2y~ywSrWS5VHrZxSs2=?_A?1Pu>01&y-?gs=t1ezXSKUDm+z{)>S$L?sFxjN zyP2S$mc=Kj3^!xpqU#meX^)3hm0RmUz)ZH_EOPUz;2p~Zq1qvSMYg7Lu6uV{xwx+< z;I@&$n#~V+t4__XcPR|{6H6#Bl z;4(EYMaOp~%2%I$?`U}_Dz;x$Zzsx0qF~o~t1?g^ClQDt*CLWvh{aXB5^^nZuJ;xZ z4pE>ES+lDj&g8gIh%2?^SL*%#$w4m+?z$`0YoZt^8V(Gg0Q9eu8gcAID|7qtP;TWXnCk>$38E|y;mTxPS_Z8?V1$Y zrXgvYRW4zABYS^lOugiDHrhIH6~pGFGyzCv*rt4Z72io9#mN!p%LgF#{eq&8P3IDw zIOEE^1Pg_ojvf^8lR?KD3$EO+;F3nP=m*0sm~q8}(J>r%bB#2fZUobrf%~8yH2h^C z5TY)D*r-s9b7C^rU&Y=$2{@mx{66!n=BmaApAbWmd7aN|Y0lb>EE!k14NDya3E!F7 zSwvi+VHzT7JlK5XT~^#FRSahcyvBy&s>=uLx~t_w`49cwQoihYTppcz9`ZvqIv)Ue zEFvQ4-xW*v{DP@pn?{~mB5Qs{vO?PCh7VWAsi9Ys9C|IBeeWeynwxe9F@`s_>ulED-baCTMIe9y%c9ve%r;X5X5Tn-s&nI7 zPJUGIvYxkL+ZA5!Dh~kb-r@isu{ixA?1mY+caoFa6f3%VH0#LQ8G1BHzg`Y&8PM60|c~{|We3_nKbZ#ig zbW@pzv(Er~bsDzmdkGh+@>v1hGdG;ePKQxIgmoPTpE8H7({BE$oOw8sQ1OZZKdrRe zp(;$nSNw942TCm6+tfkKG*YRx?p>j3HYjM2`5LP9MI0rvXK|P!yDDF+706bMI8^3W z>0=i}GH(TLH@_2+TPUQ}@*8hfG+{z3+-bR9-q@v!Co>0tjbM>)uFqA@&5s$^y_9~C zteB>E!se|voE%N<%lpyGz-+gG-%;qMJdcbC`-tM=HG5Pb;8Ew>L1>jFgBU;5M~uQ% z%rVoR!w)pp*E-i1czc>Oeq^#Nxpm5!dW~USOPZOL!S`-cUN?$G7NL>`*l0#+*MM^h zqAnD^RT=;&7WigdJWb9xukR~!LuZQ5Q-<91!S>x~!ZUe652IYrP9IOzJv*!O&iGzq znRl9nWB$335Dg!@3p>FtWFW&w=(w`4pwhFUfUlc&{X7$zQ$GThtr$_4p>EfoMnWa6 zK=tzRo?5};!{f;;-`6Gcr<>mIX{t>BBy{n+1om+`rM$Zp{G|+p+)211iP` zVMnz*>aXeR00=A|`a4TCmKL3*H5ukXX@a@A4NmNy-cRn{rw~OLmBRv5zmf`6;ib5> zU|tpHf>l)}6Nr2}jtZ&6brz~=>dEZif$sgRVn)Avtf8$#=Axk`r*@>t|Kw_H89BH& zQwxLh6RmPpYsl7wQ4m~;3&oxb;Q3dg0@VrZVoY8GEaZeoj%zFGgz?{j{ZF~)_br6) zPT?PUe{k=eQyp&r9e!rbrcN@c(t~NJw9|Q_yF2Dia!7d$nLna1`@X+SGk`(+n)qB5 z!c(wgHGlupP(`YvO;nfsA<8H28P&gR1wTyJDLb8(igRX7Jr< zNt*;(WoA-=PHq$`A-ET5B67+q8OQncbsh}1#@eUkGM~bH^+++11Z;*wGGK(J>1%w) zH^JytP{^csS-PX;L2|~<0^JmJp^{OPYxdWcd#|275ouQH;2>OQy#FSJIN-cB#FJ1} z4R@m+xje{Y1e8M$|E{7!DctixT^JXifO;O<7;`Y zT!x78XFba*)oxa+%m4#mef1GMWpkp8W7&Q-%|pQt4J=82B_DWj zvhiR0Umf4aC&Lv|`pu3HuB5&Sr_P+XA|B>puO)-r-1M0=#zN3{t^R)?wf)7Rs z4}uK84q!`Gr&GLo0ML1Y=P;C}F=!4Us+OJ+UAK0$Vfg8&C7WSH;{dEsRxv$PzTz0x zb>euP*~@dMw)p)vL(FuYG_zdgVy4S5I~AREi%Fxh6{FpB$TVzIXk*4Bto3ZvY-Ai7 z38`Y0!Uh5nzue;*Ni%u}b}7k!41zPSqDFRD+FPBp{uPUdMV`#$k$tj2UY+;_gAFEL zA_qR}gJExpM}dD6yZXjm?TXVL`jb&g{bnc?$q?PH(DLXoD`|F{*uI48^0JRZkb#34 zLF7hsOau`P=XAi3ioVSCOE^^;Dz$3NQ)C9)=@b+5I@6I3;+xL#j+6P$PJWS2 zL6sXa57lZi5Ib*dd@4Wvyz*UUS;tpaM1QjeEoi{@Cvm0eX^gUdk=+yo%kZ*EEW!S1 zOvPLLVOpM8md*_DKKCxn_?uW+YtlZCQ-?NUp2t7m!pZTu%LD~Yr(a`m1k`gjE*l7w z-_lg7wxp4lo3Q=1$Gi^0^N!Yi~yn zhFp2od1#-pUr4Fma58EcpdZPc)kD|8oGAPFYy|Y$rG?6ExFT$XCMxUUn*fP!ibt`& zR9?cT=iW!JsJJfdy)E|Mwa%)&Uiy$@0=%S>LSR$P<<|;s5dknH0gtGHcgGd3g$!W% zqKu5fBhlnaAfY}fI+s0w=c7^!ovtqu9&+UU((ifrz_(^O`>*h&zbp)uRj?7ZmtVQLzo9 z5;cSwM!AxDE|ZMO87pCptV^4dZS(l=yO&AA`PtHPmC{!~qjyF-RQmnI$BuuvctTZc z&p%>rjW2w^)SG3WyW-q_{45f~>&i)8AJH4eqhpxDe(ry)bvt|r9X{~=VbXxjr!(%l+r#EfB9^*Hqoo~FE zKksQVE_NRe^OXzfUgDCqqKH`9mM*vgFUsr+w;T&p0*B{IktK;|TaR08+zyWpoIQ!Y zXIzhqduK5TXnf=5lzn`{*}z}Q0OpLEVr`QwwI}+#YVA?;CmAcVg{xQ) zf%yIY!?aUx6fsxZf=>_ks3cs`sypYklGEGxo!2}0R-e@!=6C-vYc+)JswIHBn1l=lPW#s>r=*bhy%_)igpK)i>IJ3YMD>c9r7H#4tA%0icdRRBj# z8RtYv>DyM3=*V^_j?Y_3JjaTI$Y?vd2Jkp}< z?(^g{Sl+MXr(Bh7{-uq|4YpyB-tj0a1nE?g0zcdK>vG&Hq{r?CC8+Dx86>Jt{!g?HHFbYgb4MC`p{Sxofvp9dNtCuj7o~(WeRW zUwCk?8S2O=gV>r$M;Y=L+%4ZJKFvOcLnk8G_PPq-) z9UW=6HE4HvXqR~Cmeh9KcJQZns80zyXrfv}INQ1CDI^kB8sYK2;jl5F{FY;05zH$p za&mx%3JTejzanub{q}O>>r*42Qmm_`4sDRFE*va(BCk=ce3^i|O1#c;D1E-r7Q+!T zJ$d91pP~0e0S@cTpKQS3<0b@NK4@D0Gs@wGETKTHp}oHTy0lqs`R%!+>-=&t6F?h4 z5exhIbZ$H+jzba2noluN%x+c0>V>f?Javu=y_@6lf-?^E2ZV-5WkMe`IR-ermAwBm z(a4Hf-klqolCIy^l(@7`H5YB~H2H>@h-yIw(GE<&dV+{ieeT0iv!XukvBi`7tA>wv z!?|Clcc@f(8b4{+9lXg`p~?h3u1$EV!^z_3-XAA7<`q&+{jeatDq;W-4E_lvs$&X8 zh+H?F6#xr1cjq6mXU``ZGDgjHj(rU|v1U^f!j|vKd1C2%F7@4OZSH6Ki@EXYF3NI$ z!cuscmu|dEWSrj642anc?{v29zHy`4axq|G#ihS%h&rskP>hnk$$w#x%exb`*3;Ma zYDWhmvTgsa^j;6^A>mlI1se>@vB@OiNq9dFHsd$(w?{2zUEyCH!T3y?aLnu)Z>CSS z`{dvBl*Gx}_vuXAm3s$`+E_i&;`bh~=#N3BLk!YHh)zSEz}NR*FNuHL9?^CVz@X>J8E3#W#EU2Z-J>CXC)AtULRDtTSHY%$-i|Xk%bnd1X?WG!ro?e&CuO z*s%TnWoY#aP2@6c);<)14=5(tf+Fji1~Z4a14Fqo8?2D&?4P?#hX$)-hS*<08dd6w@$?RM$p9}-)0RyWrsRTWu<#fv(bs1M zY6OD;#`k;b`lR_~;~tvVADBwK$o<@va84rr{uJb`>I;R(nq&>Kje7)OiOLxw&{!dH zXvyKGOlVAlhA+%>^)+p>ie1_MkJwN7#4hL|{S!c?j5Vv8mp6BnTU9sRGWYe{UUdFO z|E5~M>|W)hipQ(sud)pYjE3p~OK zIo4UGPF?RLwz=)RVlTl;p+->tL}%qjtY%@bF(WMSf!T_gHLr{+MnSjx+^mU={L|KB zpbJWAPD%a&<9V3aGP&@jvLhb;ups|iF5lX}UDS><%+H_4LOkqgqD4!Ao9H_N7DPi2 zMLMsKBU;1%By#2KkMZ!$di9fb%pmDu5fz?poRn0&;6E*{EoN3*ruyRE64%Nb!LY1I zMtK6WmhCBErdIHf%UNoCA1XN>Ucc2*oFhq6E6&H9U& zgu-QqH7;V?!niL9ko(2eeYz22^nL`D#GLVPnr>eQV%CThYN{&{5J{n@L7o&LeA zs-fXgh!XQx>h*A{Q#Kk_3_Hk|xm_Dw`rvTpxx-TpzU@_OE;P0qb7>YKv}?-o?Dw;a z@(|o&qh>jsRY|L1y#Bd$uU~Jj&z`uB>XqO4@+GOzX(??aD)Eett$LI5=!DvJ$cV%l z?7tpRgJ*BamPB>YfMp;nPHin{b&A9EDj_q6q8Og&hpD)Ry0oEF4}()GE*?v2?@N$A zNcceSyL{=6-4oA?8lw)@e72$CzzpLD`u8d}knYh9^;K0YY<4nrrfg~-|;vh$u(%r$A5PQyzk=SEsIQus@MAS#B8?F(LcTDG20;__DK+fgQ7bIt9)oxThYxIg%iHW=)C9 zVPjMf%Q3GCT}N^F^T;vBnG7v`^`%qo!@kZ0ahDsZwN_y}d9iQ)dZC=b1bQ9vWwfbp z_D6Zztb2ij(=`D-CKpvTm*Qb~0>h230(<4>>Z$xwJnJ~%LV4wj%Wes&Q|3la*|F_^ zYb7}5Ym3Mh1y;+fY84W#x?F@uj&n{MWaKqB7R4u0SxbR)v$Xv3Uv7Ug|7unvgx`>B z1*zFve<+_zlK#ZHsD*JT$aZqw+1^!uX^h(4U8*wm=qJU+Gt0$GoI(+26cZa8uJL#g zYfDrU!f3+bFd&N+Fp(B;H{Y=MOyE25@>le$Jb&MfR^B7^kDoBV#iLM}X_sk~(q4%H zP!8zQ!YU~N?q)C_jf<@FK*v)EScD7M$Rl&wMNfZOjh7q%zmOGh$3~dxII$~S8}8)P zbD(W6pe(IC&OJ{=ZrGZkovI*hL|IQf5)tYx#xkPiwgH_X-^^}~D-|75OnP|cni3&w z|7sAX6{e(o@%{C0(=A!I<>*Ith}`SJ4NB{JIjNgCn0ZCX@20d4s%X4ORo2u!o5aT2 z@QjRN-t)#~e4%gl`H`?nQes-Y+A39DAKMRE*BE@Z{4s$M<+9a8GL}cXMc-|(~US!=aU(j95L+bm(j9mMPPK-C*C&Ru;892XDd3o*) z>pCJxSxWx#Q5oQ&AMR~h+df!GP(QS}dEGfEM*mEA$vQ-@fV*yFgtPvSk#qN}acJ*F zbJ?B`7ojIVjib3_MU@!E_GRnku9@B8`%4gM()3b{YE`_GQD-U(?0xlusj0J}G(^)s-qVLt&s>jQ z+UU^BBa+g(XP7fM`j{1UX?}RTkASrWK09-PSdV28x_g80$g2 z;4$|k{v`7gs@};Gdfo1r+ld&tuPt&dk0k*6P&=1xy2BMq)p8-C0!@j8X~fMIEm0^0 z_u*`1O$#QK_iIytTj14L{SURDyZRqO<@M+8%A)J<${uO# z2ZUb|5(d1+pB1gqVqXf~!A;;tth$tDC2I8d>GQt`i>kXWTmdFOx~TA?6Bymvd3?>b zxAQ1JuZH`=-~*{ahEXy)w(Oo;_HB78YQsK%%aJ$@G*==@Fda}CV?{CQTi>g-M%NWs z#Y0N>H{`me{K9%JnMa9toNhZ-6dd&YO*_@8!9taumYc`}mEeanJr-?=dFU9nM(PaE z(_e1-j7xl0LdjTu(k!%s{T@9-{se&Sc+3JKvcT+ItLA1v@$0)OWowwErp3<(9sWEy z8Z}Z%)Z+zLaL7d-L-R;4;4|WMC?}LeJA2_}w+?#Nk0Y!DUOSwliu&@G@`+yP^Olle zM;Z>%y|{q-OT&5zS1;Nk&FUMB^l`G)|<`@I{&^-s%4gbbh?n_eri;^KEgG{PXOS z=TE4rC?D)Tush7kKt?|rxCQ}$q6pYvJyuB?e|UuDQ7GChn7VTZhGQ_J#&|{*FyHcq zlT9)g_-@Z6T~tFabWV9#Qe4v{Jd~DLT@K{1qk8Ho-#%3SniqX@vVqcAxb zdd1MuSshh1TJC;_z zF7I029Q8dL#U2R{hlKdUs`8x11vpL`b7rNECj&0wmBzsr-71Vm0s+WEkKV{>vvaQm ziO;|2q}$ZIhxF+L;1r$3J}k>SIRyn7Sp;6S7OdsOMGrKkM#RYyV<2ukEtl6S+hC2{ zY6_B7o|d$(H*lN)tbNgPCGOSe+P2^HGHGor;7T-7?@{GiY_C55QK!jh|7&Xd*d~>T zAI7+!;`zb^2#S}bdNfZY1JHb=;R|KaR!Bo3cw!nva`WSpLudWLKcgBO6U<6N42!;B zRM{{8m~-!t*d9mJjiXWMoCRg|s~qj5crVF{sR9e2lkQnkQL~LFaD5EDNX~|BNRAu% zxH7Mqa`@ol-Q>(KfJ;VF&qo{5?El&WuWmoGPZ`;m8X^^@OB9@O@t<56!ZLf~WLEQR zl0drL&UVIdWzfpV=*v5O!NhAS*xPdH{BPK%-AR}J0HmnX9?m2855QP11x%<%kxsja&-@V;FhKgY?&7Z-AK!MbdN4veh~BM zhbGyK@(}T$rgchN20GprCjC_TVEn@uUu4CtTBLqm9xcesvyEerl7h8t>yj~5RAh&s zpB?-PnUur++-Z{v_$*+D3Fn+_57E_&$;Y6=C8^l=I!aEHaC(&Cam9>c4YwURYL;C) zmtCm#tg%u>iCQ~s2IhAYXRem)!i4`1jyAoJi0tWx{Bi9AZgB4>&Q2P*On}_8#FUf1 z4;pr-qY$}=CNUWUfMpwTa)txQt7zELAHJD9FG;BVP(7^yyk-?V$<}YZ z`?A2p6m9DsoW5F7%ySFzyC;@ToUt*Dbv70`j4BjE0GEb@5wl((BG_nVT^qdAqXqt8 z&MMmCf?@r=adN{j*n@{rt5JGiKP8PbWg_l%R%2vJrlfZLc_MZqS9Y9$4x2*t+foVxi{J-1X&el&;-L)D6B=guaCpC5lo!Mc|zE76=V{G9o2q$hodmiNU} zK#dA^Gba`?+;m9JV^L8I*EDtUTV#ynunie%T*Li#W`=t(?2GeKkwI|ctGBK3K}`?# z)q<;FEV#O_e|2Mi;}8TtD4B25tvI#Wj9Fl?1x?tmXP!rpdQT`cz8W|nIpmV*YJd3& zLSAt8ZEEvsQs%4}b4grV}s69AdAyhRuFONNx%wcxI z6f1&;`h)lj7p#1geso;CtYtMNAM|!2l^tb$@`q$xf#!`*CpQ%FMHM3ro^$iW92aVE z2aS!vBM4Myh&ZvJn}~F068k)8brc}6sctE+vFNXFsycV8e#~`VkS~@n++%(0(~7+z zCW6bfzNUBv7E{{8CyWF-Ff1EAngTt@zyCPp)0`;TsqcE!6Rxs6+Lt>*b;3k-{(1Rm zt>@_zSAIzd#V4+X8%vVaocoepm@Z*cAwWLMgFJ$d7!=VF`*}vec&QJ?0vJ_Z0AGjU zw0}mwMC)|GAE|{J+sgI%-N9c3NT<1^`CmH6@pRL^@B>xsFw#U$If1EME5wG?&Z?3` zS%(qL{EvdNt_<7VxnEGubw(hGK9#PEpzJee>OUSlK1G5>N(z@dPB*@Cye|__it*mP zWTw|QM6h9;H-R6_z*TL2%=Ou<4j55o`62@&?|CU$b&R-?7!z`WAsdzT`SPYJ_K_H5 z&R??v&|uWmU*F+HAgpJ_l*O;;mtE2|JKW^b$L|6THJrQa7c}UlN}yGgor>gdWQ^x; zl7cqsQAMA7s+KwkiiQs^c2@V8&}RFiD9uky>5ftl5_u*{{Od=I(~-p$+F zq@_#h^+#X8;bkS~gCk-e#qxXxBHRHg@ojVS+Yq{j_0{>pKz-tGTB`3pp1-|@ag}lT z3N##Q?%=YzQ9))weqTKt0!f+zf~f||3;jedm1`!QZznEw0PN72D$l>yEkIai87C5OBIh0%Qq@2h5;9^A z%v9m`rMO(j7*f*8AJ{Agu0YBBwkx0wR+Fdy2o(lvOx1R!4BRF67oEB?BI?|eRZ9Z9 zL*}#2M&jIp`CMc-KW?F8=Q@#rcrWTgF|tF z@kD30zM{W9_D*;dT-Sc7E(D{KDMDUCCkR%nvk^kUMe|3(W>OQTf;j)h2O+sR`;G4E z^bAV!x4gB>=N9V1nQ28E*KUA$bsRnSXSaGVf3kR7;9uz*@PX!sQ?fpeB4n8K!fa7T zj^w~K0|eb2ib(TP4RTQax zA?M#W_7lSoh24A9?=9N&mUNL~5r;0ZR{d^&)VB-osT}9^{-PPauVj&$VOae@>(S%~ z?+@5LbJ0pEs+g<*h0kQEW?hA<8bK8VMs2fKu#RIR-oW!3d6f}sY8v~5!Bs^^g^1ko`J;;T!$>H?lM*8BFaXcxxK1I%B7D|g1;IdE!J}FonVfs<@O9G6e50|8$TK8 z(E%&^d$5nsBV^r9o|RB*d=avc=LEbxU|jVxzSZo##shuS)$o*jx0~#;{jnN(ryo#3 z%#6U#;if#BtGJ4(K4@&Ng0e+Odi+u)Q%LwNL_I@!Zj-oye6|ra;KUn*zz#Rl zR`pe52lM5^XrH&!Ny?Zg4U0dRq@RyJOEVj3#iKtr`%tzUcTUP|kxpimil zDFwRgB@n593>^Pi4(`%s|*Syhg+^lj; zM*-rNuJsjoPVJO|$GVe^=hBDD27gI3%K`=Z?cCQ{+A5Uwju>B=avnC zIQ*I~`m6S6gI-s~p4y6A-6jnv<@^x!y!73VQw~VDOuC3ot^mSHRUdV>cPQ3bi>EX> z=Vjm*gJ^yFNHgK;r5tdT|K0+0qb7Z&mcg`U!QWXAp)5~II)@S7SS5;`muk7tk?3>$ zywS(&t=Fi%F5N~WUx1!&%V1wWd@zN{7Y>{I!fJ=@ZEXS!#_;TwSsV{H<`Ne~(gjKr z_rJHRIb--EyKUU%c)kBY#AN<{kMijzrjA2OW=H3+tBtCyo9aAq_3%$6+9YbJS;uE( z&9va6Apy|!m*nIqtC(;TGHJBb@3`ZdE z18bWtjB4H%6BU0MLP`Ch-R|#SE>x(c$t!f>8mm!69&Clwu=EE$SyTID=m7U4>+RH@cUhB4 za{vp~;jn9&ty(E%-B5ME63i9{Y@*L)m_*vBGV}=btM)F$sVNcH?Iae22n3n-I+5@> zE&Y@h66c$bpS|#{FR3dB0cgDil)q=yY4+r4ZqPY#im0LC!~El=9g?X(!s&Hni8NGD zjWi~*V^)}@j#r#7?_-(!`?Q(=WW7?u^W2TVPlFZqxD_YEj51wb_*kNhcO^WrKpL=6 zdF)DN;ISmk*5t&=oY}6tY9XQ-d_Dl%-xG$x7ukStA+?OnRajS0zSLXeaH~v8O0$2O z^x<*z%aj`NBB?3wyveJ1IANK_kwAjTQq>;HVU`pY{&oggcQEIAW&>J^Lk6V4i2-JK z37b=;UON47ET_97z+j_Fd#-{X2AlOx2Xz}C5+dD-< z{{))U-rbK`VN$_u$_l}t6WnkLQrFNi#w4g83uTQ%h-dh5h8b$ZHm(C~XvC-*VwY~fhPSv3)%5gq;%Eqhl}Ha(MAWW%Rxdl|Y9c_zCI*Qi4k ze5yM{re@s$_Lk6nWM$ZT{%76cQB*QgxgWaukJwe8u0@}OnW9dkwcTxPuEJn0IQUQt z8vv)Xv21pOrbP}}my-w$?d4E8v#KVT!lGr6>e4KJWw4ehKeZ=0EEsZ~y%H4d_p{_1 zdt<}qN-DLUofh!lM3F-Ihb)#Id5gLrc(l1=`>gVUT1e%7;?1KY&Tg_+z-((H?KMx& zzaPH{if+0`f4y=y_Z9JH`})oFXr^&Z$XZY(lTbC3s`zd-_*$R`Co2jC;R|5Q;dwZ0 zhIw-R!7J;&0+lXs9`1GTlw>>ob=JPJ&%M7Gy_}Tsra@)%m@g-in=zAkZrlq3D$D$8 z?V3#}YK^I}jr8~wP|&hJsh@S=Oq+qcoBORj?>Q3hE@)5d3#6p>e!F(Qo$cT-=5$^z z8U@u%;vbX*md93rH$xQGL`%#@^oBEi^Yq9{dIr2{#{u{ z)8Bok^FrvGXlFUvkt^>e*zr@ggi0^7WQoC z$+=A7g06YGJkLf(xQm{XlNXbg0{X?6uBcD}GlxG`Dhgc*=m)Sd^pz1`aNTA=2YdX# z(TGP5WiO&;j^C?!ASl}YD)Dh})4+=;gT4YKTVZnT?laSFuFZaQFrdD=<@t*cdV@}g zW->@hotHr1avk9_?og~DRE@PB2)dGXpybDOpwE>*PAW3LO33*z+3xN!H^D7g!sj!q zdtHgI00>jsd~IoxZW0SSKdhC&Yb40bV)5M!;FvZIHo(Ee(*!G8Dp@fRH(QBf_|gk- z+)hNzgHLLg((b$t{(K-DzIx`cG5>T@#r)EcsgODwg(xw_!bUJWn4skfkpV<>J7D?8 zWh&W=##CSJgpeE!g+4PR%hUEKAmPHG3O|Dm`qNmnV;VYhX6h$k&kjS|kpJ@1y%%&M zE4`>GEzSurQS262+rs*?YMRAMG*Q43gMldn^2>NHoBb!3^gh-G6Hva#kb8Gvwo9m* znstN@QznO5aX5?}gSZt92T@jP`N}%w<@kEmOAg(AK=do*)6aTD;?46uBcBHvn~DZD z`pyyFgl&!jgGMv2U+w*}u{2o$Tz4I4=!`HCr3@oXnCq$`-MHl_JAf|C59e#$!}oJ2 znvI7+X&?@XMDYJLyq4vx{AuJT7&@7$rvNx}=tQ0W>+nenpFC4J*ZjoG-y$rS>l%1>=mg6hpH6v$Od{>@E+mlFQ0qOHHr# ziC|1Yy2tiy7o=KmA}H9vi$;~1{r{DD4+BlYpv&Y-sY2Hz9e_2FHd>=JD2(w7oWT_( zoK#u#xextLI&|W7&#{Y!uPRCo5UA&5q;@NwJInojDb~!ZIhzcoitnJBo)CKDMz_~k zcb8`Fn!SYzk}|{r*e^g&B4g;o!=Q7H|J}J3$G#c{xo}iYMi#DhJx4AB#q~jD^Qbe(b1(p>`_Y5ZzY%h z9BB&b(qPw7+yL?F&7iZR5466Yl`xfQp<_9hb=MR6Xhwv#mZ9ZfVtf-8+?w2)2%p<> za5N$jvGM9Z9(AjPFNVb^5pIz7y(yQdQtR5Bn>lN{r=atu?n6t|=}_@0oxFfQK0~^n z)PP=TjKe#J&)_Sn#)n_BQ>6GZW8GJek7%K#6ftH+mrpII%kA6g*sAc|Zhhr9=FJZ{ zvW>31M|y<3$GK!h1WYC+(dLP#S*|R%Y543yV~)-mMzyX&$MYNCsiOL7a6bki`w+TP zUi2oX@a3gTT9WId(GN3ZcbAJBezi_>laN`NRt=M>7wQ&Ej0XFGnL@hE^cJ)`I&fmz zz6axFUIvkFX$G3E*&fb5I=-K*I+tjON&Vw!>l8c)l8Wd`QV!N|&hO~)DveFyET}HR zLk;~LhmgKKQQQkFMdP9QCpn4<=Psb~Z06Ac=WT*43iDv)@`G+|Gp2m2b&T=EKM5gN zrHw%^oskaj)V)I*Y8xuuC{58O)f%rUbMD*1w`BtJA`g6?|I;}R&a-EqP|BtQH6C`K zrthkMV7A5f_)c-y{SY$I9Yv95EoERSQUwd518Sr$sF z@H11%L}b+MB%ED$;&1uxn-L`{?AX@!pMAjxudHnE_YH3XafIvzC^04}@R|rVWa*v; z?gQvl&YzH?|C8A7$4(%jPs;ZT*2Xx2-3~sH!DZ25`mX_8jfeYoy0#ak=@E8sKQN^h z-q5v#-VekZNh{Eq*_EZ75svmA?K$g&2F>w$ZEsZ`T$WS9d-uK1exjwXKT zxq%ybbe!82HKyxQbN|crd{M~mtv(%F-uyR1A!V8Dba&a^f$vNDN797VFIs@edpG+!ZVcwVqaT6S#W%G&9qs(7PbEUWw1`UR$_nE%aT%d=YL=SJKxuQe(f* zl6f(=MFE#``_<3&-saY7DY|g|j68mtUHX!KCvFg5Dy*=rDTjtMI`-=)%_h{4GFee6 z23&47_)~b71&!re?p3kLO-T?E%ZQfyKvyQfsG*%jziWf1^0f?F=6s{KUFYvPSg?5D zzk^{4@A*;uvqo%AmZQ}$4Tp!W`3#JVShhx3i#8`R9g}=%$#lm8o7QjH!tq4cHc}_c zFr#AVs#K>9%&vc8-ENzeOM7C*53yjT>Szp--o>Ck#?w^Tgu!lB1VdEQ_xn@H#RYX? z19-sCm^ilJM#Mj=%LJO$#s?h^qjshtj03KPtlCOy5lk$c_Wk+p*AhIMDt)Y)3Q*mn zyPhP4+Thc9nkEXCs}!F`{-ZvE8b-lO^O zeUlf&I_*XGLkTt(v7d^ai6Pxp4xr>f>1CG4TQ{*as&|#Rt}{!GN{Q+rYcp4d(`*ST z=7SdCeZ`I=nn*KPr2e*FDdt~WTmyUdTCa8P)8!`A=Oq*@m~|!DJ2}jRHS~ltz2Pe> zzqZ``KEx@p@0+1Aoz*?mT+ZceG_U65oj)9QR{0JicGgB+dAKz5;W)&`*)einP;^gw z&c-srUNqa|*|o;$fDsVJxIof?&H!|NN-qW3?Rw6iWZ7r>H;E-VwFm03Dgo=4rI2Hi8K z=Y~rn)<0avIet9v%h9}g0;n^*>6&#mK>u9v8O+VeRRf}3j3*{Ga=5S+4|NBtVA0GB ztY^h~9@1ViaP+A*#R!2-XddE)#WPDG#QNEVunWS9d&-9&cN>9&W$nNDRqmB( zpT!1{XXXF`mESxW=zWK}LAPge*>*PO;W)HOH6N@gzm)lZ1v~veo`a>I;4p&CJ`dcv zh^y7Z*hya7_f*k91Eu}&%JxV8Nd32=NTxr=IiagvX`+cV)T%s%TPBtzS$BzC9r^>S z0qqt*Y`0Wk#NIzX`P)Z(2dhjVEdd^v4?R04wP&=`I#8=eSkXtjD?wba(AGW$P8cpj zDU&xMPI(IXenZpEDj`qgDi`i*`#|La=SqQ0&au=uSNiEEgoky}y3^uG2y)3%R6Y&Fc46kup zXjCKpiHnG1LMZ%pI>jCCyv0cZI$%M>%o&ewMG}OTy^(z7BGnwF-Ky8TgKzil=gQ9K zUq_tS?)rFVW2>|eNY?MJ9yKPTmw(>vo9+Uhg{SMQ}7K*3qJJ0p^mTjI9Sy;Pdagp`0bCvM-8(14&7}PX67u zQtyiQlr-FQACh%w@b{S$xm}-MId%K6KGN>qnCys>+|MBREo6{{fnw#7JJT4^9p{=F z)BL~gzd(dA9Z9}ysWm*39t&a=GK!mSM? zzKyxYQKO<+Vt4SO(j47q(Pnrm zi!wq!wu+pU*_>)2zzE;thYed#4$ee;iEk|h&}YFZQ8d=jr+RR6qStU%oWb>#o2 zEOpOe9n60VI$BZ}n&lh(CE(lRnvnG-yME)sc^DWC4R3@4s9*>Ndb@gcqHZZtob{#I zG724v<$!nyi%|@8LcO*vlhYe(9WCsndzfr<>BRlWD*k9Nb=K=hBPyu#NA0kM=!NFO z{OAHX&2??jrU?jlb}BF{!8lx?bmRAhu^B=Ao) zf<}XF&M7nfdH>Wu_MfQ+k9vbLfn@1F>WrKfM~^BUmXH|61mgNGC<@-|uU{xw1i!B` z=f@Ai*^mTEYC}`NZ03}4ni)ss~%Z;7kyUqOeg>2!JhL9 zqJ(@Gsb`nADg1g#E02-neI5jvdn{7C0+uV-&5nM3?#Y5Izc>)ReUUe{r3>RZ!lwth zI5Z+YL;A_X12TE`(>E0tz%Hm9=Vx-4_QCD>>Ql*0msRcYNKd8ZY*jUO=&D;`1wW(~ z)iN|5{G`4Kj4iK5r+jmV`9%sCylMy7Qe(0$BH_R=ENlDKD9AzPi1Y|MIZDwM74LaQ zW#_ZVq!)Y6g$O`BK8@v+@KJv-b-7uz!dwSKx+hvo(+3?PL+Ww;r@S3MrO1y1S(Y*{ z8Z2pzj@$1mf3QArvx_AkDslXig}22MS?%oY2KjDT*_*`yV9ga*w*uJUn)L_>0=3I7 zc-#VU^5D;?AibB7R8r=`XLL;3OJ9u-Y7b{B;~zR2vj1yu3fucPyGCq&CB!Dx)Aqf95`S#(pGF;S)H zoKl*!W)ZJU9@g<3(zyl;9k() z)p-;-C?YJ&LrS#$#Ivj4a_GMe_L|M!t8P{*l)P*CjWvA=qq}8>P0m~k*4$orx9Gqx zlMC7JysK-%wngAdnR@q;TN?$DCaeo25*f;GZf4Z=y8~u_zwheiB$iQ#NFbFZ!X`Lg zb*VU;bB=gABr|d6z~@2D&x3V+WfMP+?U`nM-+82{8$?&B%;?bBS~^B#4(IldRR)iB zTe2(GG%t660s|8*F#MX~u;Ti;;uT6r8^Qc@NRf-^;DNy>u720$jwtJ_hOka2hi7`2 zaC)ql9$%Zi;z-9A$vFNh0F$OfU}^wBgVfB6K#F&u8#v~grcyO(h)Hs+fQt7OBa%Bl zD(A~ep0x`0<$4aQ=D&qL_#OK&*T4zP>iz@OwE(Hsb*)-f=5lt);NgwVgR>Xki9mAw zEaeW`pJss0SzV~kDQ@os*fURZBGB0SQpxB*bE)HKZk4CTFCLyRJ;iXO>rIw2~+HM{rn;Ztd_xkm^bkd;cO1j~xAj$2; zU@7Unja{e{H|ww}sG9f}LmV*-n(AZ)Rz zi9}4CeJN`$f)8C#Cgt#^TUgUxm?nSFJEM-8lxIX#`FgW@v@-JgbGF90yqLuYqc+^z z=~P25w;(e~V`-8kiY6|z1Cunm9UPa>ZSh&dBVMVw<`yYmP&D^uko(-mwc*3F#}6Pi zdW!_U@6XHag&zwvh&|f>9<*}eX#0stcc5}}p?iQM8ZtW$GV=Bd;6ZeNmA>3^8jpdFv<#S zh-zvoa|PYTf13$BpkY1o4rRmor&epc=mwytyL^> ziQ#Xt8M!joJ*${!t=&{41|Z9bWVnM~{@acbv|i_p9B?qFL`!f`wYXUt-t#hcp-Dap zs;y~i8Hg}ctvCNCQSXLC0PQn-zHlhW5n7p}yoo%zuW{H*aVk>Awwzt&0uWb0g;SoGLd{o9AZ26Qxx?yQ7dDHkaVCNHtJ{uR$@La z6~E^2F#koI=0#PM1vy%PBYk?l_&P$lJ!`Y^s1PQ;P=S0{!Ltn#dI_gIvT}d>=M$g3 zqm><0i( z4sM-WKin$3&~~wc+`n#C+&kWE6=vmKEV{_86R733W&D%q>G9*3p?;={ylB{-RMJq7 zQ+zv@voO<*w}267WGYA(4FS3Fr+^4oDLLFiUQ6Zed)IWL-(7!>L_806N_Dw=t1xoX z89i$sHRRz+P9qMHXvZ~`Yl{E=JjEWEPD`L@!;;9#2mDZ*%bbiw#V2x+6D8708vJbu zq5mZOt-SUfh=!6Tf)$x_x!tUu z=vPk?b{0gON%=#t@t2zC`k(juVe>F{ci_%qPi47JTNUP61HyGYwS`HEsLWhb;~1`c zxTHo3y+NqSX8Kb^UU+E^vjTiWC%KMPDbc9tEoI}jL!&4(mayB7eMSi(dv|{>#2(2l zTqU~}E^YC=pKdwU;;|JI_}Y%ZpZ_GPa>Z*aDHiZg;s+4dFZvpss@`@-e#*KK0`REXuU z`Wiss0Zllv88&RQ=-E!lJ*t4~Q-gIEb*gYUaA|4Nic+q2O0I(~^&+j>_6eOoA$5w(W zpv+c%eF3u58JbRd-doB>xVgp5^`Xtv9?bi>4K%~-l@XTy27ivq@vlk21_BjYgAOOk z9Fo=OIg7LopU8iE^Nh#MHvTQ#A&HmoLS2VZJz$S1Mm@c{YO;|B{Yvq(Shq?+;8HAj zJ+!4o)mA3%CS5G{X?=82-!LcC_gFvfShID^n=5h-(mpWi>noC=_fI0`MQx~}S#31F zBZY$vVa`5i@dn1$!-;~;;SCDVJH>CX18C_r;aEhfHY`PyjaNDwU-S9#?2*Ow7g>72 z+payyD$>|~x20ymMrIb^xG}Ebfa8mOeD*AkA6n4*Pa;<@&@>LTrbY}#u>;^WU`${A zd(xal<%HmGbJNjLjlDC_sjG(;fLh(hwO(9Se)eOx%U)Phdp*5AqbAp)a4y>3Z|%%G(Nd zYB)ylt_v&xVdh%k-hL{T{bMkV%4XN3^)6NDNq$%u189`O04(d>&ellvyCOUcbst5+kS&QiB&riVYt`$YL?1*MD|r3rbuDMveHqYb z617^jwF5CW$Ba#T?`he6N&B0v5~(M8W?p1szh4R?3yf*K91=He_CsR3p6Wh7GgHjV zmo22I$_rg_x0|p79GY~2b~v(VgOLRZ`TPw?67YZFutRe>Jv6U7@$KZM0RL%Ic6Z@< zR)6=tSF1ijF(_J6h}Lek8N^tTB`1aoSyUrg+huC15W6cigoXs`l(s8o!muRdxEABXYd zDCixFwhcYGF%$|nxD?DngRUW5o<3_Tdo~%qLq+3LkpaEiNj`Gb16aIu9jgEdYozED zv^B5+Msg5BTdC{%gkTLE+}Sgs`OIKfZOB&FF^fE)e{X1LPVjfal>^Zy(U-tGxKA}2 z^^;VwfG$2iN?1pw;ZNS6;f_@q&UBd<%{(b#-A{H6R+D!4j-TChDPr|`Mv0Q7>?Ogx zM{h@-#w}A+0r*Gjo-cUUjJhgWDd`i;OAo)A6urw2w%zMP`2AKt>h|I2sl(x87X}!4j=`SUEk>)MoC#`l?bG3oiU7aDSdwKVKx8AeKUB)ku zENq#d{U@PQ${8qw^3X-=Ad#5UH~@6>LwKyX)mJkDBBC7UiGIF3IJ3viS<3%P<<&-o z{P-32+fa#YCaxS2I8% z88X&qOaaMsC9w2lgi7O|L{8+grkAeVqLPu&*P4&BKOHV! zFnATst2A!7daw1ZRs^iRpB$4_x@xZ%vp$X0A;B~E)~g@~mo2Ndio?VzlZZBv%%_ju zvFXC#F0sn*1Tq)ry?D4YwnouHE)V7Qi> z$9pG+_^N~>kETxuT(pWRa%H&-dXvQxs>LD9bL9|xh7tZqKtFv!I(N_0haWTNdu>1O0u@_46cny{ zwm`uP+6)d$6Zk11-g7;`i4h>JtuOizu!9n<;dP_ zDFI#jH2ywkVKJwM+wk-qy!`@dQ*r+WaUjq!0X~TSqZ!_Ml-$&)+wK_X83NV$oq)hR zY^CJOCj+}8rNXB|NTi)M!M+m*E-)VHOj)1bJGbBZeoK8Dkmp%9HkdQWb*Q2JsG_tk z&kWQ$hZfCM%#+zILtG`MZ`Y#qh;-iVF3ZHR+bG%|RNL(9XCZm=`0LppAk!Lf?q zTWv(gKdY~6l-QZ1cj`B~J@47TZQd#!j+P(lWLZVLPVtp( z#^?SV?CyyHO#?$1}w5g)0i_np8fl?{D=v1?NAgzBz5l989P_8M~kdlmPB=Jhu6d}cMh}MdxR0rf(tbX^2 zHuQvVx2ERHtRsXRZg9_OibPjK>O!d-Q?IE+r7^^$f^?1Qd@#Kj~IY^_E zV=cDc6GJZ^(68YkAHS&_I$UHPefey`PX9csn{ADG3h9|<+z&rIW8d_MaX^gOckRCi zAK>UIIt8GzSQXLJi<@Td~UmLUl$b zPg?|*^|MD*-nvcxTffi4gwb~t z2@mPjJF0h{i9y_HkhJxfTKZOdT0yKRiYp&AVA2>3E*34vsGxd;r*C ze{wQVPcrYxD+2!Wd;h#~^K^}x1#>uq1?c3I#^x*xnb|@#b+DNM3+BMi3tO1tO6D&< zb0MGcu(2l$pRsZG<*QetF@?BIsw~vGkLfrs&s|GO&YPFL(t=#=4SgZkfIfT5{q-`8 zBbkKw#3>+Aelc^FQs5fjdEPACcj(6>4kYkN&PhguvyxtQb$3qDcL%fB<0sDc?-WB- zaCvAIYIm)FQ`NPm-@^tko+PG9#3zE>WNID#>1@+ctntrrtl9Fq*k^9HBme~2Qh+Z|^y?38#I zg&YS1i8JCtanCFLL+xtv644~}Jl#Dvv_3|@Z_F7qK6Zm-gF3F%dUV~N3R)P{gmcqM z%1;|6R)vm{yd9_`f$#xHkmLnG{J{0hBlYPBpP@=g{!kHjAoeCI`=z&b|FruDiNs^b zcI}on?(yNDk1dzT2?F{TpdlkEF7awbJWo05;^TxdcAcYDa~ec(y`xfYND*6XbaMy zJRBBU+J_BfuNKu_8g?B8emo6nx?X%bAu}<1uIJIts2kCHd2eX~kCwK2xdgHp#Lt5= zaw`P*p+lT$Iw@O;lI&?&QMu_p+-DKaNM0juK;jHjuiDm2sO&mn*!xq1F`=k@cB>29lN)@xWAe<1NDKR*v$t9PTCvwRO84e zgvim8lZ&1szmHL7y461FZnUUoz8n#zF+R8I3W$>SUsmzm%OYq(O>TN)fQ;yZhP_XR?VS{xm_B{(!dwG0ph2)VR-b*o|kZ&jl@R9yW1QymDu zT9Q9AiOk5)#MU>5H>+GMg0B6i^rQ`{1srYlg=^%kyID5bm`FsR?*ts0bSZ3M{q`iVgBz~jHmivl}W{r46FOGpPZWMdYq!=4U8SQ_Df7lk__BKx; z!*{6UH_oF`Vou-I#*Bq9j@Fyzx|h>sZFG8IQ;liD{X$-XSG!WsAIB~TfvnQLYCLW+ z8-}X8lPO%z-zd5>lQHE9ggk4jKAs%T=%!yEyRiy?ewnIpRlU!@AiZ?VHD}nN%dp%n zRRpZ36|}jkH<+^RT7k40e0xH@qqhiEJ8ygZiJ+r}VMYd@eHg5FyGZ?9>|0rGrmarT z7pbROQhV8-8wnHu$Gq@oFDW9{H7QD_sKH{PabQdw>D%nmpyN%6$zf6ffi^>Jm5n_U zm{mEQ`B!v5zjxYA&D-TCW!=UEp<{`;z+Ew3{Z-CHZOPZ{i_EtqD&B6}(a zVH~k7E=7tsHdnei=@5C1UriO2dXJbu%z(i95Qt^PQCH*!U>dM<45)T+Ut4f+a8sUL z{~!Izva;!&Cv^1ufDU3D6PU%SqwqqBM0`s87i?Pru{1)oz;LKXFMECc*z!#<`f1n8 z4Eg`9v4XxlwMRA|cIgY0M3?5;Hp(Q}t|vXrQ)jAX`u=BD1JAJ`ws43=F-1H84^%wG zGLQ5Yv!|`YNI8qzd{tNE%GbX0)=1S-lXseO;lc4*CXpIgGF&Q`OQ;s$SRXiLeuX~ml#lOAgcS`I=7&Dy+UD+mg0k1&j!_!det-B! zM$kUfu1m6>=s(&~(#LVdPQ0H zV2x6IO=pKpdKVDn1xN~G1pS;^k7cFiXlnW~2~ZduPK7Ix2F?(}EA|)PL#Om!*1s^$ zJ9U*@WNE{B9={`8!jd$g_C|R?Rjq4v><<;ngN@-P##2@c%NAXu0|07NUuDATd&8X> zm&m1eWC4a{12tWXqtpiPoJq|$8?`SIJ{?2?%6!$9TwRtOqHHlt;}A<=6l|4tvM^pq z7~urER{hSslqJRPvTQFn@;LrBwbIxkvo$`|A>{rU+np6ndyX0P#jnI<`^|>e)y!0j zMJP$5GDRsKXr;9=C!uhtmPKswdTmsBZx=E#rsY9OiFWF(+SQnaGrwzQtZsJAzI~q? z%Ux*b;9K&!r$APLPi#kHkn1{*N!|8N|0E2XE+;(i)E>C0^92GiYCD5keURUnys?cs z<$u^?yOWK%Be2gC`M2?MC&y%(F~gZRhl%+DHW7gd!(7$B2N^B9GEG^t+!E$^m!#rZ zAJovb%(5c@)ol(`yYI3GvuEs!3SJg9!ks|=k@M}|Xf}ZQy}x4O`ASSP*$E=vBWb&N z6R3o1um8eEwZNKP<{x$zy|r_n&IH}X+`iW3Y3-@FM^4MP6|I}tCAM-!g~g8+7Kg!8 zE;QvkaJa4mHvU~OQ6h9!yaZBOV)oc=eF}rh$<7eYIaBcFy(c2LwCk{{_$F+n! z`B9HupvFL#lX0^!`?V3}cJvc5Yr(?S2bZ;+6pmZ+Ce2e-Pzv3u-bgbU4tA&z-`(U% zKG3lG&NV%j-WZ6uc?U* z4D~szpw|6ESR3mR#+@y{RmF1mi|qpF6g$l(B`-`XsGj;06NRlU2QD|SuN_GXzU0^y z9SodLZ2YcyPrl=!#P)5Ta$Twy(oMH(3lLW2gQ&H7&D$HLjtSSrb_64ey_J`f^Pg$( zQaI-LW9$Tj;ow!!m4mu3MHM159yyJkes*J{mA%jhPD?t<+ap6oGoJ=z`0dUa* zgtx)X4^=kNGpurXU92w)z$F+c->Bk&vDXCi{Pi2>EpjJ1x^#Rh(IXj=BdT{z>oozf zM8Sv+$IOio2&>q-<()N6C&#t}$%swy4E+E*?aLiWIYM)CKtKH+vV@B#j)AN88|^8}8(N0*t5ox+ zGp_Zz6jrLqi81*{u4gKJr}eF;Fm|8)Ohvmjp5@poi7SPGZ8L3Jy#nX;Ns>A57OgG9 zQ47@$aBeuMoOUea7&Yz_iZ}x;qhVR1DwnU5?fK}%(7Qdy3)C_SB^7r)IoNt4hsr)E zXY=7O;Dkpk$6gm4*~W)#GAVySC62UTO-5oX1y+|49^ANmJKZ_=}0yNG#*p^^}fXr(Z3liI=W0xp5O= zXtZG|bL^_tZ|R5HoSNX>K6y8<3Ucn=Ec(!?81v@m-OdxV8CC7NRrINxtq7iNSfjm& za4co$7&*Nq(4$>@XxYz#4RhGMfBW^9?U8nZ8xAl}MM-%@8>)W&9>^&m{7lTSfo_VC zPV-DEDSe6jre#@3{67yMb!A~88qVZ*ah8|0{byf;fctGnrD4zz(}eKPYI89kG}RQd z!_xOEc>-D;s=y#IK4fft+@{RYf@7h7t1zgm4cg6^YvV@_4>FUgYr{cYmWE?|fPB=dYj$V$t&0PrqOt%YP#!WnNV)|k98l#UH0u+pwSeH)Y{walUH{G8 zqcn0zP}q%+`EOcv;8LG1M4R={TCpposYPB4?%c))aEjw3Ldg?+Ii)b| z^=|K^o(bdtZQM}?x2BA*mTF17Emmp0T;k)s>oOOvjmN~(j?G?nvnb0rcYRB6IVv5f z)T|Gz6?0QMvP(j_S;35U2W=!s3xy4CA~DEf2>`wqo^Q6IZ`I4*U_)6MDO8VmS*Ao?o)(3=)`q0c+z9o*ags>DZS>YI_~8 zQ9<{ywz99H!hkY4mmQI}UCBE0ST|BuX0GG$j@;HdHkFQxX-FN+!A2GhfTP|PA_=5S z{Tyn;hBpsBzN|a?UAzKEt_yftp(X`WQinf)BM+YmD*t^iZo_n^hS zXe3C!*l5@r0zu#(bBI{T2xv5wg7a3eY!b1)fY{^KxFwrqM|DEltkK?Xv?#`)X!HD69w#ehbzaI5!*|)5S-Ss%MVA! z0LpL_w&Hu?P9kRa)rUDAk&2?b5{E3VxcgaPwbkv%V$<&j6!|5z2c##}Rr$mwiyMOW zG05N;?x3@c_fh|A1b|`Rj-N_)Bl0^Q-tjFCkCwCuwm7lTC>F; zpOmx`J^kT*exh47^&RTqG-3!eA##8^i!I7H+J?ur2AzZsbj_0jmW#b1G~Yny-|JF+ zFyVva1^!wJHF`&9EI!Wc3;yM~l6Vg;;cF6FnDYHYTTs+*ay)H_Q+^r~JjfZZK z*G=eS#o&MyX9r)Or15&A)^-|1JH-va8U~IDgzR5=K>KDzv*4n{yUHhXKOY^C8QU#2do^~}hcL%3n^8SV?IUxzz`!pA)BgcT*+a4GNa z4Zcdzzd#zbIe$Xp{Le?>0SnsnUNg&}_~pem_d)g!k@%05b!}?8OIJ;3BEwn&Ku3|H zOn#3%cQ2>b&V^)Skk8G&kz0S}&pj}VQd`wEda6OE|DS|~(JG+C;pg{ipN~>s_P-5f zpOY1#DmJ=~kZnGDkoK48NN2wMYOmfMV7d%JitkqFF>M;sTzF=zR7iS?#8wUU7 zSRd{JPW8>GjnYtk5OpIKuUhILPPgH_wW_#$;p-6NsjaN7)3-@AOgzhJ)0l8&e%-9S zoE7!Qe%xZz&QMl+l9b_h-RIGfDV)<%xcHz7!yYL?_xGFUs`rf1))*^kD+{f7{F^p1 zqgl+I->mN!!j?o;m8`t)eHWorCx!1?ExY#2I#P+c+!%1YNIoW@RC$aO^>Ub+W~52p zTwlEC4Oo)l{9*r*=>4o&e z`sI8@RqeTr(o9IrlvkFZ0^bOp+h`J}La751&8S_8^>w-HI&~HO6(@)A-#JyG$09k! z7JJVWK|dlrg3AlOBq>nc_-b9Sw&2=g^f2ce=metEHEl%*-dJYTqgoV;?K8ol&x#Mm zN!)v@xb1lFyTIbJ?RQ3zw&G&c1Y2C5u>EV*a zi+{rLi-i9*&rA_oAQaAaH_?vL=uRsL4AszbNp|Cdl7Vw1NXqQm^{nZppok)6IaK-v zbwZDB%6Z#CPBgPDZ)MH)iXb?Ww|6tCfp^CNWurO&Nqj7ezvkIs)|{a8A@%_*)=@)7 zJ9ff$^s+>wR#240WW@GZK!4(=?Os{M)v;-Rk5d0h-1P>os%_LDl9#{rryaU-J)3a- z;q>|ysNUA}t-WDSo^`FhJIi#|+pDxoqSS!{&~5GuyocQ>9va3mQ!A9}k!65ulhxgSeVclq<##Re9#kFREo zN{GH}_C?mg0q7JEGFPvvt~xlJrm537`M*|Kte)O(ubqfVO*xsP32|=zcnhrbth^{u zRz{h!`SoSZfUBo%2=5xQFwxix@&zd7y(I8RJ=gMzDXWw*@%ti*CTvx(3@to*h3rAf zE)F5&RA1__OSrR9Go4&>QC9@ZUioy4yU=Qu^70LFY81HGPH8`#M`HtPSBLSmzleq3 zs~yK5SKIk90XSuHc&T*8C^Xl2a-Zvw?=K}Z?FaSm-L-MDg;PbiDidG_N9?yOgAFYt zLNbQDTKmV~Re*#i3t0NPHHT;S7 zbbH*=lQHA?6K9_OD(;u-S#V78q7J(h-Q-26YxWCZ!D`8)8qE-~olc>sJ$HB(I@`{q z9Kc~k?%?kJeZ4h*H`;x*4I4zrq`ksFKOJYve=z(`QRw-D&DkV8Mi7S}e8&@-C;U4! z*nl-l8$92Q{Aa`5^`!CDEPr{(PkURHii`flYdsW*Oz60uS7^^p-`ES_CYFKwKv)4~ge?P9fQ&f+01T(`Ym0LI7CU4TdOej{}w zPv&&95oIKv&a;DyMLs5myB40&dj0GiN~vwxaIYl_VwSP;mfMie*g`nzh|#0UB0IO& z7mXGHLXPqW@#DSDvUii$x_IooXx+mAiRImNHds%SK%E>~WGs``zx2&Qq@ zdjq_XNd8R4mtwIW;Sjd>G>`Mrv-b5@^^8*_taxGxO;ghWBg*fj?SGN>NkXzMZK330 zopBVet=L4A@0yPn#}kDXE|dMJpQy~%`%Y{3M=Hq#*clG~EBdn?$a0G@@}7~f3|9VW$>N9 z?TIl zc%0tEJh7L9{dAGI0q9w;fN*&E4c1kQIdaE)d$-#Cq*3rGu zB_9xXZbRhj(=<~sy{5a28bcg+9NIIcsB*vC&WXVstX`~|q${rZ4YD|anA*6Nh;eAI zFd=SyYuvx(EvP(Q^UnFJSv|@VJ8>qNpuMDh@qezhv%}gMXvP>5Y%|qsFkq(z6Ei|x z9gZ+Z;^Ic_18^<1)4W@l?k79ubp;rWBA-fbg>8momfKP5*Q!S{e}16#eCTo#xdUB% zodBzSQ8Waho;XXLGsg>zwCcvm63~An}AxZoivD&aqyv65s2;* z{m#M~sRKsOl*4i<(EF4BO3>zb6D38Y4o`XE)7ISI9l1s2jiJ|oDqpdgdS7NE{?W~x z8hk761g*(YyR~)*Ej|Va)B|t10Ec_sM|y=Q5ODwO7ar@S?$X5gP(SbG*hQ}Lv+>)I zJ4Qfr)-g%ig!Q8&rDFkqN@@yIe)>FaGq+SzJAwkUM5O;1d^mHQ1~`$ls=*>Km?C7vU>PukkO2siY0c8`+BTfpRQyxBQ{R5X z-koY|&0u3$QC;Fr@CG{ML0B9H*@}vbzH zZxpfhXNRYR1m(&tL+OS9TYMdsW}5p?#TOomyX8kUYZiqgnC)&}KmvYoQDM;uRBiVq z3J5#bkjuYv{Y)N*fW4Bndsn6t+}&k2?aMqWnlOS27@*ZsJ&VXox{YxL;kvAUCA31y z+Iz%%k==7f<(Kx{PIA1OcFO*;dR|5dkFQ=9leYbM=8eZqp^8;RmVN+8uk|_^EMiV` z=V#wgcwXZbNRF2L#2<30y^>FByG_++FM$|pi8PiLAKRfzZ zBb#1z<`!e{?9R~D%Iw%KJP-5od|SWw+(bvV3*8F613RTpeZiYh+X3Lefnmd~chA#jWNnDki za!E>BMbNzYqu$6gL!aC3zGih{71y<-jWEjImc4!!6{YSu4a|jN0}u!7;1G>(S3Qh! zt&NxfR?b&2c!%eCDB`T(pwY!3G^7g6=S<|`&SnKAx3CH7X}dpYoUYP9UzQE^vtVB- z7-tDiN2S0;RNfgF6jV`v5EAJ1?N)AZjlHuTzRH9uh%1~6{akXX%s`{D_f7@^(;JRR zRhFB#yO<(nR%&ps01fo>V@0)I@V00%+n8IL4<>pYW{PG?sdB;P=+#>BLk9GW`*=UQ zryEd~@jJX&*7yaVCAqO`AKm*XeWDGOBQaX-7gM1hTtu4^h|1@d+KIy)YP6qK6ek>Z ze-%8s&`>t$6DP^xn}f;^uU4z|NGI7Ios-d`J(h9*nVIOaqDeYrXAoUUp>+uEg?^*d zYO|qHT>`|Ax7T!Nc}FgWW3kE=aYaOyEGRV7kL;K>-lZYbclsUOsSpuS_Q;ik>+fBR@VuQAiFgM29f~p*cqWUkNNtQwV^z*40ps|M@2+QiU|fyHzn4GjQXoTS z#$pUca-FTfqhQtoiwgeSfbK8>-WGLm1tAs7#1zJdQd>B&rtY7&I~>q$d=n*}Fm0o} zI5+(Km?w|RpQY1y>@e*^M8$M9Tn^;8t<2U#j$6FEUZ7q0)=M_Um;-V?Hx;l(SSld$Y9z-$ui^H6|0@X9Cb;S0Lqz2dLbi2^!Nk1cg z{qkKveuRC<%GOajky&Z_QZKU#C3yH|l{_BHD}x)uLIqM|fg!iqjb0<55tA2fBe6kaRcp=xky>3L;)n)UV3|pr>{(+zcJI!Q@U?S-(8J( zSLyiO@Kf%E?!L$A`hru-QNX-q1hO3$9)}@(&)3yxc0&U0HivQM?+jCXL3mM(y`02< zuEw1sx2nZ?D&gayl1C-9B}yb8{a2@r-Ee*N=H$to`3%iF_Yxy&_5?lJVvHxnW8s(`fY^beRcJzFkDj;M-~S;+cdmaoI!F3?Eg9B@PN^%N|(Y_nhW z@RT4tJ2D{!x+Sp!i}u=ZM{gVr!TsNtDrtEec;C2}e)I(0!#ckTMhAOmP~q`2nx4!t;O0t<+2 z6b!lMYoyraDLD27M=1G3?N&eWWXeIUy6VWCvMW{5(Qnr-OG{+r2g#;712MbH;=3Wt5~S)aJnu z?NEDVtC+xXb|5Zz>3{rp4wDZ27^~bbY|3r>`PS}vDq=G2x6-edj1eJqUNp+47T;h` zq8{QC$TM5Dt(p?#nkoWssFwH{?}^!?|6s%y?Om#2{Nze>kdfrNUr5-zWJ2KQhM!g`7cRCrUV_~|q0+KG5Rk+68p^8{ zk}4W{n&)_Jf}0njtspU7=i913x)Vtx6M)FOkt2r>08N(=V)%x7UE{V4Z+ol-1_95K zZn}H7{?BFAyfc8iS)$fp(aQDM(0tFEioJB_n-Ii9PGMqzz|?zVBXMDPs@FY(Jgtj| z4^zs&d8J#+e7H9`1~~D}Z)6fPdR5I=O`2Ai#`ZXFl+y!3|MROrbu&8wzMnpPy&yY~ zerM;OQJZkoI{=8KxuSU>P*Z1!9JgggpknZi$Afgs5xI~^)9k(nf5@i&Q-iDMq(JR= zc1CK?_sLo8T{ODxbX+(OuK-vEf<5)&(4}TZsyJ)73i0{E>VuVu>JY|AEuB93z-(9m zQ@M+F+NSvhft^G5%*P@AIx@!SN1xgk)6+FPGe5M{)K3ke{!o%^eKXqomAUAmc}w8v z2f>K_1k8YrOhDz)T_JjSw=Uh#(iu}uI{?hf%?E0rTLuGOlo4aHzWdW&s8e&C{?^3^ z!8i7dWj5Lv(=Aa=0*eRk~aavD$GIq0Wm1_=)JJvuS7lw^_8$p_>EMJ_8;Ja znw$NMk{|{p#6v*Sn%c;Ah1PW&`f5}R>gl_}#?>9hnZX{}y6R2G8o7(qk)|hP##ryh z!a0Ljw#{KOUF{+gDRkA+%ALcHIpg1c)PKguh5=kTYv>rujWGDlb6akP1C2ns1P}v@4~Hw-5O8;8oM?Lmq%j6;r-hz1)!lUr`cLQI;umgSZkLb_ANZ$+)tF*&G}`ZR|-#0dFzeSyel(P z{^a|U#NBvJyhl<0^$y%bKI!KS$=S+85dt8PtDE~M8|VX*t=ZZ1x$D*BtCYOxTMbQcbo``hR6kZ%cuQHmn1oA?uPaou6$C#&uD;Z|7^XEPH z-fz>>#mCK71>+0i7JO(d{eoaI4|l>!4EEK6!jYb}r)L&Mh^#b{J8d6!CRs(z>x%T( zZZU1?#?wr?I(cIF<+f|pe&snKM^wYX4((aBa%iy4r>f`%DvEv4nR=|al|Ntmw6$#s zie^JN*#ml;oZ%q{jfi|e*%f0jUAj9z=tcnNW@yD>g_~ptTK~82-lnftg4e#GB!Qzal=+$YF>fx$z^Qtr*pTO!DAlW4XW>YSVJn;LFm+|$Dsaz02eg7r_3 zvoeP5f}c%(a;Ll=(AZ-r<+CyB#DF#|R0M)*s*dDhw{gaAz{LM41)rPi$ zA_5H+6~2B{%WzytG&{d>@=U+tH@sSY>^c8F0peG&F|{tI8SSU}U?KaoU#s?Jp0FP4 zuM4}s|W;5z7<_C5FXV->DPg)$OynlnBfYrU8 zwdG8n)r?~`U-yPp)j*!vcpUe=UYlU9_@Tie=4|EkDCbCYtDoLikagJ{HXc4nMen?t zE4_{hM*q6{dgtw+L)&a8`gDk`{N@RWKDpw4jv11%!JAE_mm`YnHfpT7val^HP&6*mLgpi)%yJC?Ht` zCvZi6HjF{5&~pJBZx{SH&{4_<7;(U}2@wu5`uatb*()+PxyU4X39op0x%jEhTRpm% zZ!Gx~NImavHj#YRdu*lYdDqd#6i_)jH4R2;XHO#=!#*+mxxKP1k6-K=c9c7k#{t;P zH6T%GIADHDxHzHbK?vzx;(f=72g9cfY3i~m7cUq;>KVOsuG-?H$0pCZYz1mjJJaf3 zQ9*=o1T%Of(v^4rw_b&dIWe7dE@dsO5Q`;U5rn|=z1ob#X3Y8x=IAndq_P4g>*Au~ zaXr9YlgF(d=wh?eHTJ_c1qp@a4Z#e4ECF9f4&&sy>rjSP(9?TK!6Kjsk;(%r2(CnW z@74V142$R^GPgcy16%puH6M_w^V+JFcVW1DuY8)jaZxQua8Ar$7iW&dHG8l@@F4`B z%bjJ2s!{K1I;Ovcz?S*YdRmtjy$*q)M8}^Y0AM%v1#i>G+k5#QPAWR9fr-|5JopOh zwVIQ}PoJ@EuSY?smQ-1`2ewe32s2x%aMRD6t?vtuTcQq;QB-%Mvx+$0x{FZfb_Nh~ zGW28bOe+X=FZWuc{!iiC+y|d8?$OAMwH4J3f%Q&jrFxBqa;r}@H+_W#{GGQWaA#S~ zg7$Re2>$b!b3#n?{l7GoQ;g8TeYsH|-@W$v9!~P{)A73X#oTe#l9eybe-W2=o^Vv$ z^H%?B&E{0E;s3|Nt$3itLVr`6+PH5G82_EewSXg*`G@K+KdHzu8BhPX18m3*kS zUXYi;^n$(WH*!iWWN1<1SqFfN{kOKwn%kwpj@$=|e zGaL^)n0jrYH9vbK5Q(w*vQ)%h+bw~a^f14C%U>B%m7(*nrONkkXm;4aJYcEijXB5n z<=g_{kmy=A>3h^L9W%9xzh9I!tXb{V6!Fq2U3c~inyEquby2`!-=dK&E2fx~+3O^V zH_p|><-k#ZD)6|`b$)ta?^IN8?L_Y8kDYIa>-$TGw!jxQ_Od9mHPaz0a0Y=DLoFc; z4hO`~RZmBEcyKnyF@j)7U_u3t9v*2*{A+s`{duDI>v)r+1}4u*ATwu65ZRkxueW&Z zXPrIOuHBsLk*haE9KiY0J+_qfNkesNm~UdLF{FYXH=Ny$?J&>Euc!?>f;G=QWoz=? zIz3s#?#!`$hmW>gHH3*10N$N=tLd$(Fo6LQ-WX4OacXnJG}^JU7@QiNyT9C_ewejt)mwmoS_qz|~u6F1y# zI0339fU1ze9jXi{P>Xh?G`o-oG6-1(Qx?cpX2s@}{$YCiVqc(*8J#9%<<<}rNA=sg zv3=4Gd+kna!)mR17!wy4JzYhS#k#CwrA=)VdB#4scKU1+N`eDB;V&- z`BqOe7NlFPnN^aWLN@_EgUC4dHpL{JkbhN}67<9H<-~{Qe^FMxC`Y?l_!YK)y z4b15t0c9Mmu;FP;kUo8b7we~T;^ecb#h2Q;g&%cf_&%>O^Kx?(?2M1QF9KkYZyMPK zRr_(0e){d5OU=xB!`wweWxg%Vmb$>t72$OQZ;V=gUR4$(#OmhhQNOpE!%Y3cPb>~@dhm5w^;Zf zPHkA7e*G-&&DGpX{9O@c*HXUc>A$+8pG7Kl7X|zWSDf&d=8tX9MwVg;7K(l&P)e7(JHc`drs)y5)2 zJ~~V#WNyMw%Z+zM>^wrI)>mH&bdR9w(_6$ZWg zwO_GX^1?rB!K{eKrJfN-w%>o6(X1EzYq(Xbe?+7La*GdOSmW=fF}@$m;-^x7{UEQ8 zG2NFZ@{zN13$c=KZ(>el6sRoy*m8K{?w#5EKOm0(iBkPHz5K7>n4~zHm3&!ls0^!ipFwr?8c;E_4fPFzxb9R7 z32F7T(;cs*y547^+3!CeY{{kwg0l;7AU}?2fK#6@VvsP!I-X#&F%J!-*0oQ3A7Ct5 z#Ti}Rkv_R|@dCE?bR=?~JbIN${y!n}EAzitDTX1jnqMW3}!Y=PIUfX^wYCs`n4!m6pM zW?!ZFHootM#j&^Jzj9pyaMMFx>~%Iw1be%<)$A3R2pl{_#ZWp|yi&}bYy|<53f*JFZtq--s~-!i zS*_b8e%vXkisN~gEfPnTzU`+mjbeB=+7L&{(`Q7==F~E&v-acXKW6rBl=M zFESsb_k56@Pu}(FK-BTQi%+r(NBf=kUMVFyo03lv<7-RS7NZ3tf-^RC#vnsMvTVM-}yGD9@&i#-{xa0^F&rg z(dN?r4sDx{zw$@CDPF67%q1e0^?{fQD7?c_uTa|pwUv8kt6A!NbEjJTq^hdQzzD@=V|y%Ds{_`sJN zL2#qTE^CXM^_%gVT`(sfQ};_R^6!kf^cyL;O_8-o#xu98}-LPObAMgBxapb4$UG#pZs%hlHUa zZ&CY1GVCVKpowhK5FuQ;t*X2*`Ho`dXssenz3`kTYJiZ+(Az z_siaw!LJTbjzDkhR8BJd?&+!2FCh`J(`tEDc za(tcFP;Tk;kaM$8Ir=6njx=v%XV!^XlnI0^gWvyHPXSrs*O!k2@6Wl8SN=UU%&qY- zlloDf=y9w>cANU^6S2}(tE?>qwP6|=?mcFu&yER1#r0U1^Z4_DhWecE%0%=YFefte zfJxe;Q1;}1RO)*DJWJ3LpG)c=S)Xf|q8zfTVuN}|WG>6y1Bk{wJP`w0_aqb-2Q^ew#_Vvl%($x{##q02n zBJ!4jAnp^1F*nEiLP}IDm^_nJYTlPVR`X=>#+{MvHrk`UFajC$7QP^&3MP3Y-lCVJ zA^hD=y*8sO8WSJw-yGlkQ39Wd z4`U?~qbe#-vMvutQFC1$H6#7bZ#uBPgM1Q+?;Ip7deY^$bw?JK+^2o6bj4dOZ4rS& zK&<9>FVgaixW>0u?AO2r==|r23a{qBB$?_XGQIh_l!+7alZo3@cWN%oSAk7OI)0hg zzXa<$&ADP1ynWp9-D4<{`$ZRs_N1Oy7;?Ip{EHQ2<|q3DDa@_l6Bub=~>iF()YFD_V34EWI#*XLfKQc=Sd0oX+ zo+&>@!NWL|vN$MLV z#l?8NIDVxW2#Ogk-#|N;x3SxqUy~H)M}nQ+)dN^1+H~135r)P-XxAu!HO@+(ra6vH zj@5t6S&r>6=b-O4+`2TsF!GB#m{~!IG|Jy$3e_JN% zlTbFe_r%JxsMM#=@4x7;QMA+wqbX@k9PifA3oap7U)Ye+4@|GKDbqoSRra(hMKO6} z!~_(W?TijJ_}G8*kRi(F`{9-3eNIVQS-mYci!@+8Q4-b)#bz{Ke)h2N-n3Y$jU2Qx zo8x5rlZkk`4x%v?AR;3Nr<=TnKur%GpU_a$S1L^pF7ylvk|a7V8tE&&-Fx?GFWWvc z(Kxi8ExHb%m%>hsBt)P6GT+-kP3#-htfjckFkpd1{CN*nIN`JZgG9wwFUqhJLFpk` zKRrXzRYH78=bA!nVeihoB%#~VD82+eJ$-Rvmmj2(ES8e=&Ils>%753Q`M_H6F{0DF zsn`*^Fw^zdZaq#CEkHX@!8cPU12xT%1aZl2Mk47JvC1y5Nl>nQTIi?T5-DNdu6}H-ib4Hm!4j*$lUmHsIlA4 zXa4pM6>YIcQC~c0vo>Hw?6K(QO<*zBz#WJ!fH^#Ql(T_@k3g&_N{jzIuEKI)?w(1Y z2JJ6i7fyS6X2({2VQg!CaO;IKqGn(?Sq;nW-pW`mE&C0B4PYU|U# zrJ&dIQ^Tfh3{Xc1`@Nu6>mbLU5cO6AYIna`eLR@)TKW9q#DVKCL>K5+qO(&OgW(G~ z@@vzAv%I{9Cd5I8jsw*a}VF*!t4w{X1pszSo(UkIVJb> zsxDD93Ua|OyvX$|>NO_)#TNGqZyh?IzXv=sSldB+#%Tkb`u6=(``A&`sK$aIy0U;m znQHZGT7F2TfMEz8xlS-!fv@Ga7sc8%1c%Ffd{N$Y0XMYy>b?BKZ@EE^@=2Tdz86mp zj~aeR-8t499-;~_AK9e*)k66;Tp0FeiH5KvOWA<4P>%S_D@F1$UUogyry-N7J(=OF z|E#$Hw3_*LHSZ$&x(w1^k!_%0?DM2qMjv9UwLk-K5~7`EJu51XqnxuRD4{4FF!Y$` zoy|#Qn{$4C(7$KnK8#^QC*H_83d#ZfWqcRp??C%_SI+LLT3e45BX3D1ImF1 zlhXG0<_v*;qxb^{IU+pd=#j-*;zN33>>Yg ziGYpE^nAJc*fR>Lqm^m&sLSKcI+K{(Z9!WvIr3h!YS@P z(NniE>U*?PyhDE#JQJ4R=V1ITGvk=7zN2OHRzBmwFRAd{m&UG0)rEFfT>n5W2>u@> z(nPZ7)NRGjhD`(QyE@ui);$mk0e-!In+G&r=eeJ~@nm=1v(eGF=cA>!X<18R@k?$_ zTX26S@L!2-+2KPDams`G^UZBJFoalUuYsQ70p-g6?!CCrJko@t`JSi?-*%u+250W3 zUf*?pMk%AapU>Jm6d+7ZiCq-?ct2^zEKPmGraHhxt{j+osT*>{Q>(Xm#8bDDz?wxJ z;y%b5RD(ieeEV2zXXRDf-&(uwnA8ZOZwxjzb!GTISAu%WuY~k9{1aqVKi&12OaAA& z_#Zx&;yoyideU*yu{mctCl)A9RX_vTv)eMy(M{s&SlwCOub;%IU28S|`Fxk?X~~YC z7*F6gyodWR!EJ?cI47_&wVl|WE_^abnLh%0QOC&lGbeu z;+EjO^D!3q&2PE?yjBWd6cXlZypQ4jpm*1+5y=Ub`(8`hD;-KWjKu`)e7y6TBg4>g z?}skn7z|kI+&(fb*3mLtm?IVZ^UL_*(HM3^FvW}!r|%Vb$a>_hZTMjud{sN%u8ot~vEL_p5(z&)&XfL-S|LY_OGT`B*?WRhIPLVy`| zfZtBi3(SvaSj+l}2MS0T`xGy;e$}kX_h&A{(vLYT zO)eZJcho&jeppbkfM;#4>=ZQC^YI)zemQ7_KG1)-WXziD9&EtGD8N3D# z4CanNiPNklRz;gaarl_u{=|zOvc4c?M26+vN83JY6!+)l^gjR6 zMTU$uI_!A%3As=1^2sz4LwA*yRku;!-*yq@ui?fGRVG86$tKQchv@KAh3&dlY?No= zs#m*6mjp0}UMQwSN5|=V=YEQLJdwhU-~#)kmebDEw5&cC*ni!=OXpxjaszd{(CXWrHiuV1P6pI;WmVU{p`mM(z@{g=u@@ts zDI*n>eYf1thbX7QP8jabG<}0H*TiQN#E3bBGF_QIfA1U;2WACi%!s^4W7%KcSUbq- zHve_qma(hVGwYh^yAyQ9m7dOso?b_{?uoGymE`z&L7Sg*w0}Qsy3CtQ4i7&Y`?Bp< zS#x3O+J#|A4S6_#ZS?m+l9av{0G4=p<>OGo2?M5M-}cyhqPtB9|VSL+nxX8oR- zJ%we6&%L9OC%Y#Oi0#Mj@NYhOc0872nV%LuTm_9m(YnloCrX3j=QnaqNOn-3xMPb= zqY%t4)LwQi)rGGm`#Bs&kdF+Xcz0U)-k||AJ||}{LGmD1ViIoaHw`CK zIE35y8iZz9nzyjpDw=|0ZkiHti&_bYzR9p-C&(|?FPSF4R&eVcq`zQeXEQZ2iGLUE zcLYiA&W8SwKT;n2uf*isiID)WX$~X~riHR8h>3RTs|6dZF6-!H5!H9_%`Q5zRSAeM zqk)Gmwnr3d5~25frF&{y2WcL`GL*@CI@hWWC`-IKRSd)j8mD6$^4J&zbIp`BI2)`_ zoEk!-bRB-h4P)}1wQe`-`OH8c2=+SYwGHmPopCLncplMW)b_#ybF(v8{^Xb+z^&N;@mp(A0x-Z}#>;m(+nFabb_!7`PMfu+4qZHW=^R+Rp z;~!nO#mE~$MQ}eI`||SjC?}^`nkIr2J6ekShrzWjAkFZrr#AgrVv|{J|5~RfdSwDO zS9DT^nE1=mS83JwblMxVb+!A0-na0o#LBuMT&%_V1*=7r_*ptF-3O_SopK5rj9nkw za<&kH82zq!F`!Eo8Lz9ING6UqUis2YZ)ulg9rH@xu0I0JlT^4RWp-ht=w!PO3K;gS zjuO>>jk17O9`&8An8#Pd2m^ZmQiM4{2AEd7u%oP5H45w$u2aPQY5gpRN^X4NUl3yq zM_+AKQqK_wW!blR5lQBr3g}z_S467Fj-_g}$MAkK`l=pKe?!BGB4|iWJc4v~eqwlx zh&_Mx*66jKMRi*1_)d(xyW`5E5{0Nfy2PO$xIS=|eu|3@5^`#JiaJh=9um?wEuFkX~??<|#c^)vbJcHKT}g^{4VnwQ~2t_USDa_Rei@GLiRHlACx-fjD>`OYe1~jJ6{c8)yMEumJTPHQliA zTUT!d?z#X1Q%&u2SH|P-{yk%UK+hs;aJ$Z}7c+0Rha+zM-gfOrVqh3XQ_uS>xw(y% zs~Z?kLZpBwmDw|89G$>YXY+*45CoT&hs#YaCy+2dNf`Kz(92y07-2d7S(JNR9X`qP z`^1yYW$8VYgh;Wftz@1Yzfgn+Sadw6?9XW^yOuM&R$kFi3g!A)-K`(IKbyRLEN1=N zMZLcYUHZqwLaaKpMb_xsmTjJ{_93f}#|aM?q*(!9pOV>E@{tIJ$Z6zGee&fzUMU~f zUi_1QS@R{%VKyB08TAa=S-W5QtNzjRs4MTS<0!(atX4M0AqaCby%VO)VsTp501mkR z6(SZgUxOsD%&1GvG1Obluvi=BEWZKe`i;3+{*sQ~bK~?e%S&+O+^DzWo6z(5B}L)x z6Edz<#;p0+v$uyP=a@=X2N%a2xFp3yTQZNN+;_;O`Q~{BK@1~Gjx0tv zo($|2e%~B9k?0XOtC#Owz9!^!I0SPF^PQ&enlH=_`mId&1yiFT5S00yCSpKgL&xar zMJ%v3Z@fi1>}ok?|2YiYXYky~gVSw}G$4(NGcp+HNEjWZHUmMI#PQQ!SjlcOFv zx2VdE0m)&!KJ2f*8s;+Hf9$LquZHZEuGu5;_)C;Yc$SQunZ(%7U9+ zO5nl1+2DFk@fE#%SNjV341+={`bSxtSkgjJ2&rG=8y5o@ZRPn}-xvNsbk4En1*ghFi4vHL^ z+t7N%j1tCfBH}8bJE$Z2y-^v3N37gB;!|eC#1ka7+*SU{FJrw0;+TnsUZXWYZBqLJ z3UfjC5MxK$Gp%?jh(Roq)8(vRucsB?OXE}yi7$djLCq^!kOoaz%gC@^+6)81WbS%V z8tk5Rb;!Cwf+T6AjW+b{RLHy{`G!O3W7#SLwB*)7pf0*g@_BF6=(GWX%?p9q5d;;u zfO=q_-w5{{4;p?ul*LPZr>qfG(^C(K-aCv1%yufia+*4FY4hYWiT^LR%R5Gn&lGPmx9lIC6l*PvqN%#;zBkgJhO2n* zIg@;%w^2L78_wM4^0V^>iUH)hqcjbuT@@m;;&H3>#+1SKz>ksH>kP3Lf=-n^2`8=? zb;5W2x%%OdeYu^n)PWi+Ix)yVcE^4+(Fpp-<-Y@{Inp=@O8vA&z>{n zIC$RIzLcw@d$+Jc$BtLoi1b2wHC-`;9Kl^ThIA`r_`JR}bY#EsWyKd3o?fy^7t3FV zzU>tJY#FBK^{#1|+nj~w**S?PchY?%V`2I`P2ckv-T|}Z((mrF#`d+UJ&z4-&`Ro| zU~lfk^o}_;D#6viyi+;d;weLVRRteEb{C}dk}Gd0x{JkTBSE9$B(pO^FdgtT%(N*= zM2IEHD-?|Yv77h9MqYw!JeSn|jFKqgbe`)PB&kU2>iicXvz3e1O)W_S*sZ9$J-udQ zvgc>qVzn#lNl>o_0*;!`oW9#omfObWPV3rW(5cJLx@>3AxBEx+mX6;he34@I|LARK z-57Z(3v(|D=nCN_KI}f2_A~Z+Musf*?~|m$1jLUD21lR38J)fj42V8X(A0~K|LXxA z@R(a-(C~1=va(WDD}nsvrdj_wW;9nvP3~5O+g8YrXD6hld}F^gqzLZpu@f<`VMeyL z!I(dm)3Gy7Kw?(G-Svl{Sz@=xIZf6XGnZTIs&yYdBL*hVmE~rby}V(^`Xm*p4&VDx z2Y&ioRBQNdq1Z7u0~byrv%?0svp2A9(=baguYzAWQdC2m4jF8^?)I**Uah_9`|W4J zY2_6djf_iA(`V}4FjitobXVJulGVT%yX)*iGn4FO+@9+WnTE14 z-DccrD?w?zFfvxRs}_3aSOatsT*VTbh^qK$c3vgjQ~ZQS=xcbMadU%lcU-wuLfKbq zV%&WIBymIX_kyn;x;o*n3&%14pKBq&15WCyKj5Bcizx=Tb(*Sk&!!|6X-mHx-vAw zOwF|)$`0J)eDB@`!6jfrr}K3rxQIMQWeaZUsr3sV47okJ#Mn!zzC=0apmFaZC2NBe zasFD*)!=iB!tq;-@oRxg3vS54`mmxwtB1LiID^{@FT(Ta8xdiwD!Q|6?Q|TA?W``gnOA&N=!V}09Dz@$NR*p7e&{xjE=Q+`q2a2?=l#a8jk*srY=Ika zyt;+W6fBH5zqnt#ztGs@m|eDp%N)t-0>jtkSB(Q!lDzOCw;$GfVK{NjV!c|c%*|~G zpR2d#H|whhXEQth<1P^LM=E$G0f<<;=@6JIVWi_-%Mmx;XbkMM{nxfAtlfjrCu!$u zbAm&9Ruv63R3ff4M7J}Xwp^^4P_gF*8am_&0}bnE=nJ*Z9ed^(9sD?veLbKEJwdl?;2e3i-^*xv2`1#`A!!z9c3`)^Cb5a%K5? z_K*`q6OXP|Mo?hH6)yeOI8Jjct9iV0h18ZJxGj>|cIb-F#=x1@)gzi4^*)>LwF3ld ze!^A~V~b(p!k+(v=W+~6HW#4N(YtG*aDdJ%aX~a6O(o!LWTW%@gN~i&!vE}z>%4oD zEpixWnN&4fq*%KhN9OjP_1@ZL35Y)!Xw%SAOciI{<+-o_q$O->vL{gci!T-V=)c~t z2+i+VIIQ_r=9tvU_5fuWGl?C+84ece3uNcba#tcE$H*wnQV}wmGpZ^sdoIzo@_f7cmqRehys_ zh)coDZ_waa{OkZsrS-@r`L!Z_upOkefuCC`b{||IXk|cNH)r1sS4zASb22?uvgy=K zNe5{|&$6_P)Dudl{5sCIe{*-ARcLJp2jStxHGCFf# zxAyqEL$`upVJl7hzep+fTK(P}wjr8Y^wCfI zUkOBc)l4~O^QadjtRXtS?2IXBQ_Z69T_RwAotH3Kb=dPYk9b8_`N(lA$+UNq63+u5 z16+Oz!?>x#W7R{mvVZ0@tM)8&O_Q9>v7F`_=i^4ooZB-*Lhq)apQ5+>j~X||C0TiD zolJfGs!1kMve-~2(Q!QJ(bWOoGesa!%BP$6zN)q!%lmxyueaMGt^Y!QWy6G~u8oM0T(O{0e7mr1 z$S-bO>a$ylL+c~ZYOY~%o>T)4A zbuesgx}>O2BO88v^-TT3zY>~w%4~)1@ja;?>$sjkwznfe(fZyC&pRd(yVGQ10t0!cOYi)!-P02AYI`B!#*U%V} zZLHX6YN){XNbJ&OukPy+JlzhPmGF7_va;KBHitAiS6xk7%Z_ddAXF;DMoa1m-J7WK zLc=KvtBKOyfQXlVbuDJ7F64XY5%G-UmIqHuKUrbxTZ}|-@~@g_S%liy%(zz0hqf#! zM3c-^Z8KH9r(oBOM|U=_)U2VGEnMiL(!q`Sj0@r<#cKL=SMG+d!%K$Xvn7U;0pQMKTn}cCqbmj3u{bw-7}xIE;%(^d@&lfVcC&D1xbJiC8A)b0gTbg33CzSVb6eE5ExAsi2^C+>cmtoII-O&yFsZk9< zehk-@_61w~ERh<#%&oa`aczG9fY2@Y(00_LZA6*OB%w%9taS`XSMySLrAZ z>B6f=O&x-p?p(1)Q_;TY>cn*uJymvbi=R0IgIQKV>YAsz&AhIit#yDF4HfTbE;lq; z5qkBcFM*6QP8;7mx+)-aLuU;M^81;1wZ^ArU|mL|Ak9KPVhFm@KUhXuES5gA+6s14 zy{WOM#Ztj6ub9b7FUQ-wY^zOq-(xB?)KNQeXt&KSqa6{hh?D=fV~{snR%S4}yu zN^nO3Y>aRY*_<4SXmNFg%aqW@RI~L~W(+c;WiMwbSg#)C47pgHS3EsDQD!}uIaKCT#(0dbH7{8jIkk{&v?(Iiy8 zNp2G<@cL@HtZXnU*_14&&Fc_j5}msR%G+{j`ulO`aHB}^mj4*eF6nlDd~c>D;kR2q z)8gsqZe~NSIGq74-reNXn=K6$7&bRy&=~h*r?V8tcitgR%Johz>9?$3_J|&%SuVd$ zYRn{6hu}fx5LL0eNm?xv6QFgj9k5~4`tXpB+PgGH?b% z)>{>pZt0mn9w5KxJZ}NTQ|rv%2X0_h5YGCnr`I}f@!rMXNGUqxmZ5HOa#I&sroe3+ zx3o?VE9fsBZcoPvXI&59Sok)&KdATw8oojfX4+NxY2l-fC&1!>M9|%3>jGVdj+1Q8 z!1=b7ufT+7AL@7i0Cetl`rTzUM$eDxv~8ARS{)7n12RNB70#^$oG zYAQ65jVjOXKTNq94M-}z?srV0?=O2>FhU`I_u)?>ObepM6Wo_IkD2Vq(}C-x)og zBh!%T*0=ufZ`0qajC646&<@n5ZVj_4W1EJ^F5|!&W`kgbF0B=+Mo5TTsw>T~rBNiY zlv^CH5qs}#r#7kaT!8_WTo9Ekxs{gMB%mp`(@LDMNzz^*=U26~UZt5tU+8rWC5A|h z(=~l)KjtIjJceXV{eLKB;Yndk?0e(YpJ!OC_V8w2p7*n*raLvq_Oe}Gj!%?BSs{yv zJf4NpOl~91+D2vg7R|~{+33mbe7R~5rL2a$>9j*{?J!XwIlZEmO0Hfk9%zbvu7%H>|HOJ&~5t$tmC|Imw4+IJ}#frs2F@p7Cf z%;1w%NJ&TReS~>Q;pmd?LlaK$uzw- zM05?rUSAuAW3&|hgM{&aQx@oyOLS%QCV1;CAojmPZTx>z@g#Qu_Hx~7e=%D6#>p={ z|3D%B$7%}yFaP!bx@F}5c|42%vlWeh)m$IFzJuFM0vP@a`C-G}r&-+@x9LDs2b4P4 z@~*#Ax*;q1OmS^--|gpI=;D`bv7)A%N=o(80#z3Tio;{n`L@rN;)=fHFx?eew-@;tmA)UchlP&2&nuE_>Dimw<^>;lv%j5r}?#pQ|J&WWSXzbTPYcI5Mazw z2rv$_=Skm=1tdQ`Xgk{Y2k>M|2)|!#kkWH!L2G!g`^+Bf1J4!+t8Q2r``%rvZYzk7 zH}P?{NQLG`8fFzVK|&TCt0^#-ot{o}3x{zCKYGil?-ghu4bYV-fA~d3LOA@Ep~S|! zH?Tr_=*}O2RJh-oQO`dR8vc3)?VV))ZR_LL`rdA7N78q9n|TGf>>9acL~TZN+yr>s zt}L5c(C9o#eE%ks~ zr>aF_kSl*{S*)3HhK_1}3jHSLDCKQyW2xR3P^telmuJz$wuMBS&WVmePK#gKR^G)E zXS&P(S*B943wp7s(z?FqJrV`+@l{hPJX;V;V-uZ$n}GR{hW2mN==l2$M6m1%ia7lb z-o-?CXn^#?E2P_{g4c_yOcN8c=M@||d2`x&V$E-{Uq5^0egmv>sAjMM_|Jdr$}}wN zlxcFWbd=(8n#gM6FMpi(YJ^}>k@M^%|DdB23}i4_LnR;+Y9hW@(oHo z`TZou;Cbr5W;Moa-0k-P%Dzt`3MCLwVY z%yp_?CAVKXVXi$o5l;OBNYw|JC=JmnF!qjLJd{4kiAf2hJ?@q|VRNIXeu18ugO-{3 zI9QLI#v_N6on-zQ>_f1eCY)ASKe0d6-SV&Ac7B~BV(U5wn}tKntH^Dj?)|^oe_yCa z77~AqIQ*a}eL3Vt1b;V84@H%p@WQ?A0@QCDQRoR$*Zq9Q=%`&Ef#jxHbS$s6b_HJBuO1Fax|D3} zWo+*|Fb;^ZO%XkOkez!nARyr8obP-Hp*{8w0Ck6EN!rct!~Dm+Cyn*}##8Kd!%@AyFQd7B~qHAI-C{$y3Krj<)|9;CX`SrKtwOX&fjZtO^ zc6-ZdGITt%COc#ter+2iXI=|^d$OKRyTH|uTr)F!d&u#?$5`Hz;U{lO+?~LW>S<1F zM7b|*>$i4`JH5RPa-KPq?DAr=SJXl3+Y`ku?1^yRl#HeP z`!y#hHg$p}Vtt9eqpl4f(1tG(ula}$3fFovjoLFfu=yKK~Z({10CGWExqBY^~=h15} zgWV`%fhCK3>K!O26XCDT#a6{qFE6fM^$D-Nenn+$vPdQ%=y88*rrc)oZgU}}RBeAMBn5}p$Hh0;EY%;P2{VF+q6 zGh^SuY(Enk-e?4`{_3h`j(&B?%cf5nt9`-5{(@slku>n<6)Wx_lIEU`s?>ThzvNT) zse9(my%Gy$yq^}noxf$9a-yybK|a`9f%s=Kcm#-o?`%!q3nU7v2R6R|%B=$r1TLF^ zmDfX8oZFj^n6KIII-CE33N`Q^di9M5V|DLpwncFcTG^38s;4K$+UAKkz6cHxsMbAX z+9kCu*i{KF^J$UMTZoev`0u_@|Le%;v`RipYY(ea+`ZM>?W}ZMjp%;`q5n`19rWNG zaU4-wOD~&wQbgE0W7S?Z`3Ep~o?FdGiOu^1fN<@wDPeLiJO2O{N4WJ2mQ?2ZKY+js zTYt|WRf-Adx_fqT7MT#2<*9a&R4V(a{3Q!}|EyjmrsmVn-Di@v(i<)dK~?yNZLJd|=-VNzZgpP|PIuUs@j`eb6-PZ}?Nx{IB|Kcy zrMoSDXwjho+^w-OC?%2x{+OjgzaiTrBQ#-+4d6EXTp8HEFB6|DS9rGP*z(n6eRgxR zC$2Ew8Q;rMd|>eSdZ>ClLwJkiG|{7)<8W!s;(Exg-ehxRG|-;-Dg%3a#68I%NX)L7 zpm!$snV0fPGCy{Tml9j^JP#y0>xb8fT*4L2Bd`*U8)Rk6#hx_}Z<4D+!#?tgrm_tb zKGFANU-nGGO7F!JTMzGlxSBZ4L*jTS=2gE-Z`@Gb&$arQgO1z;YmzDUcSmjA7$v_u zG0{KIn^ZPvT-;IJKoo8C=Oc4MX4##~w4AQaus;9?R@n8}o^IyXk7++x6xt4$w7fws z`wp9fiKC$pBD9ltxM?iT9?f-6wSVUC%(NTCqk+=WrJ2Y}Vm-@g@OP>2x_F3(1U}|I z>;7hsFCala7Ec!;GGka~bov&YL{6N;`O1zNtZz?z*j%#$uX(GyDYt~5eOR2<*H}+X z&0{l8JDt;C=oONVESY@y%0)6UfXm19n`MvWfE6aO%xZ9aa*r1F4?v`bYQ&#C^Yr3o z@K%B4VpcD@G(W-O^tu=Q7AP_N*<5xA+Wy%ayNodngWJd7bsisgP(mrjx`F^|?lvwp z`bekuOt+AcMEIt*Ps`kz(B_ZYGPZ8xFavKA{XPuUd~D6T8xQ>!W@EkKej-o4^XJo) zqba&wHb;7zjXtoThDM@m+n?O`bjvvl#hl%q@fOpdv6aR3Fmrrf6@1=v+^2Gsu&JVK zv{5cizr5f{Pc`X zA554m>*;H-;wjr98P%=nCFHEc@p|&)72_4Y8dXn%44e5tZdGs;Zp~}0aJt8|=8~wr z2Ueq^2H{99vkYP{_e^tv9<|6PMp2NB2NA9P$iHref>w<*u&v`3yWmBc!kfkN_3`}| z!fvG5keq~jr4jsypT$@54eZ>n$8~P>Vn&QeckA1C^fKDX-6y)|utQ_re)3gU`-7_v zMXPvil~vKJAdRW+Ju}vl89%cfnn_J7GcUS2uSxFv@qs4JPi)KvDZHIn#ZEp51nC&T z#O}@@+u2)NoMC38e>Is6c73l(*a)oXX>mdu9e30Fz-DtSJ(sjCp>NJcc4vp3TL^Pc zzgE-LT_0-AN_%@+t|{Q)Pn6o>!t>s$&^tEBT5TC^^jbJqR7$5WoZ1O;4jP4mJ;}vI zNKM2h0b37}giP1Xw*+I3q!eE!g*|?X`E~$xBrU*uO*M6~L*73#%CQPcvUXl0uVc4~ z&4J9>OZyIRr_i~~Hy;Ua!pg*78NPbg80+?X8}qA>sq#59qMXE+m?=jp6gN~As~Tdc z-mauPftkw_iqi{PH|&Epi_nw(*JI7?=SMMC%R0qBChc>$9>p?5!@Ks4$$UwzXVlx~ zt3f7Qf6!i+*FsfYkl3F(chh^kZt%t^`v`B`%}jG6UL5`kMj zCnnyvD)u7sx8Cjdu4?VHr?qe}LY+(q48Y9SCy?2+2b8cpVdL3gn zcH51rT_HSF)HMth**tK|{Jgr28I9`$GiWvWCOvkr?_EAr`{9m>SBT|_LYo=0Mr`tA zYekuNy5|xu*x5YqE|ZZyjyeY(evSqWW=77>6LOpdP-+8bK_0$dk94dE=D}=Q<|#R> z=x(D_%KHexuw6Y9&mS~$dzNgy zT5a85qbgJRrZvi?CP_l}cJz&hT8CeptPktE)@mv$abJr78aS&EI<_vsg%m6_P9d_j z*kl%((G7+Ir$-uHyY-e~@_Ni38k|DKxcmnz=<)pc=&P0_kUUUqXJe;a)F^?N%fO$?NE*Ft7NE7Yca`Z(=V` zvn!G)G0v;jHYmmdCR1}8y^+A_uO*jrY3ZuxCh^V?jgYV~_}EW`*>=^X>eRyp25$m| z+*&Kd(k*2Uuh=Wy0__yzeeHFMpQWFtxBV=qWvy8Jy+;jdU?gB*_O$qj`8f&+T2e?R zg0f}8v4m0EkRf0h!X8haY6vI#1m`8#Kzo%-Uk#fFO8080u*XluUNRLW@OP1-=4eFm zDF1sT>^#Q0wuj(48ab4+>$<1RT{?#O$}N^wp%u~dU8>#ebp!?8eMz~$uTU}Jmqt|x zGHm)YU+Poz?aI_o_Xj>-F-Eby)FXB>9vu`j+=u8-vY-(&Q^5Aji4HgOh-(A$_0zaz zk)D&S5d{r47HzG=p<|1MBj%6QV%XoT0_r21rT#Yi?vXh8j?ZY|iz(1ECctcuVp|^F zTf9Fq8?8}hCleLl4QEX_mypdPUqQlmM0@s$sO3m|98q$)6}wzsj5>4EABTC!)0DoH zT9g{6+DQfQC-}WwUHYFd^aO|f|*xEA~{{clYzP4k2Dr>O%>(n z2H7fD2f99A2;D4N1zj#Vx-9jqS9AEr3pVLpGT#t(D(c34U4E;sr}xJCgGQhc1`~!5 zFzCYq6^K(!_%TYxTzTHkS4JwHSuS0$M=>_R5m!~}z49z(j+Qf&hO4O8l=-V`>fWqK zg&w|qtEE{ssL5+O<)FwV=`v}?dNX8w6JpxQd=QGRd3nQid)#Vr6`e*dV;{7yR1D16%s!O+==(7_{sc-^566L8UaKN1*`VyB zp>(Yiu3q)l3HFrnr(XTD94&lHD9em>R36DuF&OboR?e(Fg(Rh)Q1+CQ+L2A0=J>wz zL|*ChzOeainLqza+Kx3GM~kS3E7CGC=3xIq4w^8znd>kS$U+0jH0|9UD0#bE1||)* zkI~DsACvcd_yjyT)tgc1ePMZkK4A<1GwHgd%cmpjGM~x!nhHD8+Yp5Z)S0wH*=D;L1OM~0J9FHu{7(4q=#HE3y1a= z+QE0*meMKUAkx7`am<*;tR{Rbqq(pEZE3(IdN8h@KkZbzaWg5~c;)Z&+F2JH+)YOs zGVZy>fF@1p&v)LPWRh)!@M#s2B60E{IISLClO$9t3epa(7<65}<(5PD-#{5bbCw$1 zCa;ZORFv~r8TIlYUY6{CE(00l6+_i-W z$GOp$ofCuSN)*ks!|{a_vjz4}7NZ*!QsV5q&WaBVbyfzCsYd@S^`mc%1<@yN2K=gs z9UUsm8W_hw1xLN$%V)AHPoCyA{v0GXe}~M1!R>&xK5|X+I;+Sb>N2sitp&X4968@l z^rS#1%xKX5P>fkGoPSVtn-sd5@ofZWXrj>;o!N{6q>1^O4QWwH)!naiTnGP*%zU03Ge_+8TS-T%$-s-&Wx*y!kL zx2owxx#5*s|E8vsA~tx$ui?ax`{ep*yq!J-xDo8RAJrXY<<%Gt{Ch1Vd)`Jq{K4?< zFbdtcM6(ChVbBkkR6Y&&D)$YWTx_lB>a7p8Tj~DF|7!d=Ad2+riBsv6Fc$ZHYkjm? z?k*-84W*$MQ5En@(~LNKmi_!0yN+5-cnCQU=ZQIGmC*}D9sc2iU7v=2>FW^P%eVIu zL{JliU)?FMeEqmd1dxQWE&ztStcPpk*2|!7JKaPY9)BebA(Up1sS=%ldiBfnZpuW} z*31;VC`A=?#pO9G`ee0Js1!{I&5#{;4kU3h%6P9!f(UfDmSgt7&C6tYhFj5&o~Qa;IW|q9 z(@bAV#cqWCkYUq{ga1-B_cDq-p`03e&1y!q=e$z?-P3)|s)%qE5PNjQ#VZOk?z_{{ zNHEOd!qy^AOP}lg!3rc&!HcS4)IHRmt@XrHBjD)(hFvjT6LX}0i4Fv5{*0)B2y;t)5*pE}q-FaB!qd}s81N|2c z-oKO7Qpi>OZxyDQ%{!@veBK!+g$AM6HPGIl9f+;GoQ4d>0z@}t*c5ja9^G-LWUN5E zF5<6cOFM?a*@usn1T*Wp1}FX1zdyQ&^LuM$71=qyf@PrWts*Jmq?x>V5H32DQ2{Gi z-=)%8zI2RSgdvypy1cT0c#KG*Qq37fB(nF>{{qHXt}nGS2Ccznn08e8H*z(a&Q5b+ zhgP&T9M!Uc)8iCD))xcD*8eKP!g5d{J4zeBgf=bC#vfRrKoGV|jRT$a=Wkb{>K5Qzd z8Nog~a4zRlV7}!9Lg4R^{gn(()x?DINhYyNdHHiT!^3azRK#rH%?FpcPTUHoe7`ooMPOFOc{P9LodwLmauNI+=A?TVv#}76EAMccB)Z1$ED(h z2@Y?Po?{&=U&T}*?F7`G%Z8lfni<1F6cjA?MrBlBYn#hwm^p6fI|aL~Xq^#gFr!vg zxrK48+s)2%6eH%Lk!t7_Cb4P}cx~nOuhzkfS5IgMZE=IW1owXr_R6^`G1l0cZr=K4 zhK!8jA3!qKm9>ADnodEWHpBe5G+N7AW!(5A!feM$(=$QjjL^mTP4jE@gAt!xj44+x z9F`5!`UG-pjXUkI>Gx`OY%%DCFp)D4WCIm4FMf zO-1LFsE}*ztWnDtzmb`C()m4=Bfy--H<9I~e+y004TFnheY*uMkL(E0fS+yQ%=eD^ za^=CfvPgy8Rdbd`dPMOXdXDOk)%7Q3(K_g>@ZBkt46BcHmaDCiBFpZR9HZwhk`$uP z?=!0Y07@%!-|nB{%E_;nTf4=h#D;B@wn5#wwJJeQUXBPR?KGC9mNFBR1O3gL5tq zW#C_wu40W89yMqU_gqi{-Jie`68j=9Z)1N`^?GCtL8!umI+nUY3Dd+Jv4H(!Q|Lxh!o~_h@TRhp%A1>9( z=#+QT=uX3FOxL7E@n$z8Kad`Tsv^`{JLI(b*<1wzX4Qu)!m7_BxNUc|Sp)d;WI|Bw zwG_TQyI+3w7rTBwvbOcVLpf=Be`Y2D#`oEAN7r%_+T)P4!OUnzw}3H&-=S3`ODmAP zu@)iqr3p921NXYU9ICRf3Y7We9Jw@oJBsqLt2yJCT1hbuFfMNdqqhJQ_KRa4OpDF1w2 z-v{G+r#Jq7684_j7ay9rVC=YFp0??ypZ)Ao3-T-x1JNx|9Ja{3Ez%I<)^CRzy&9Ud z-L%sk!t9&fy%vkkfi=52WUPH)WkXH9g$~WRK}Dr9I_Y0il79QOZ1`Dfp-YrC z+vpQ_8cQt2J9-M+os7)*RTf)NEre}f*ut6N>(4vBhC1alj;It@A&^RShXNC)*Va4W z8bx+S@a5OI=>;RrdAPp7J&pyjrR=Vo=3h?RbhnZIk!(-Mt#$7_SIW!<{x}!oqF#cM zz`bW7k+a-KgP6>%I`^R1{C-aV-3(?m1YsVL7K+Ac)|*n${cyi)Jydkh71R!8CDK&P z;JwUNJji`j6<&xjR7&)SwzW zWvs&|lEcmJf-9|IgnBvF-NxkXVmUW3e z+-4}qE4Awoj_)4ON4);Cz)c@UlA=E9bIA4Y+w8 zjaA)`ved81kQ-3r0&2aNN2`p;Tqwd!=AK?6X`-e6Nk)DG-C+sW@6)iLAy*@H$|zv& zy{EV!;vB2+kZ^jKm+3Sxjcgcm>#g{U4z~#iM+Ob#O6N$OW}erir&wpv}=h?8V^QPE_*&N}s*M6k~LZ({*18$mBSB-H(OT-WRhf`_3s z{h4~MmDq}L(6&x5q90LU;*FR)Gl;Hq*;-^x8p4sny#u@nz%9l6@^8-#_r@ewos9>s zqzBt%L-og-(h^n^EZlIlBXO?kPhD}w;hPQ#Hm+6RU=)OZDE*vxLEoYDz4DXCndCsZ zZvV>wA0@LCVTl->YDQgQMjGp5F{7KBm%~mQt*J7{^`F9<)q>^i;L6<8NXg7t+?qEw z1@p0@CsS3TL9?3onmnsh(DlaY6s{ZyW7nI~`=RiV9v*B? z9t3O~f15~HtojXZfRW(%{R!`gxJ!<&^TD3?GBI4gi7rn8MsVF{pt-KCkyJ`qU}D8J zXNSP2?b%=`UR&ny7;Y7~GqFPSe!7;B8=V1bTaB4DhfB*tcJ=gp`;R`^kcWRGHe+n} zzu0;~#U^-&sfnu6YABZ<7WE?1Qo%=Kj;|)IyWsJ2TOOqe#y{A#?fqKaz9sPU?6u|a zR0oKI`aEG{C~Is}h}H!4#u_)77fB3Fib>`8kseRi9iI4c@TI&~Oo>ysv`)`LI+kJ^ zMs~K|oKjv7YH?OHzrA%=3xqc1rk3#5%gBVu^iZ@D88S*w^CYNi6-p4j$cvKut`)J;eK=a*Y(a(T=99xIP7&%|t9wMA9ohK5VviSd^={X+%d^ zZrU>~jt`o&z({&wo>IL)LDRh}*c+gCbAJuu;ns2kAphd=($dSQ__mFmCFq#(<9|x& z|7nHKe}frQfXr`DW1h9d%8y#wn#F%Rm&NlKFx6CoAV&}Gi=F6w{mbXT$Bp|E?gH=? zf@&slhonY4_q9Wt6XCqQ<^K1_{5In zH*y-5i5bJr|Ik~Rf#48kv_5esb28SVWMUFDYr(4yN+K~dyx&b9>j1r58<1B;*%h^f zqwJyEr5PlrQ1NjSLe(@UWR=ujB?MO)_+e$>cmd;T5Sk>mZ^OUa0s8jHD#qqOaX8yjpil$g2j6l(41wf<2@R#qZ+db5;!fxwC=R9IPQt;N$u zGwo2ff@n828H#_If*0{wAFNLMb`oAKDIr70MFK7=Iwc$uw6GcJRvX|9#P)j8h6$fK z2M#+JN_cs7uFEw=0Ez-S#rfYDKX?R~J9K`wf0fKxj6{;xj}exU*cPrvhvqQp*%DdJ z_QZSs51JP#&*X{5D8Yc{My&Mm$U)5k!${mA6AaD(|2P~rSa z??#+5j^S=7c2=~PEh!!dnGhA#<$BNk4j$Wv;yBUZ#f1|Lit{`i4M&>$n(t-$%;dbh zcwQ%^(@I$zD=A8#Pe>*Q>D?jI8|m|G(jeBkZ;ha6^{>XmW70C0N0{>($M|TI+ z84>;_lxbB--+~L;*FP9gm~{}Q+Fq3 zZ2HAG@r7>)eZA~6q{1Qn_ls3rlIkz$fv47Ic?6Y3`J0TTV(xgbfL9I}AOCAzBRW%c zkK~D)_sk&F-(>dFw@8eAF1vdDykzU0&I?JWFCOGMedErFd#^pXOZf!kT;Rb` zh+J_CS~@w8#dfm{dDS`{4Mvhqqbd%8eE|sG1L9)52aX#VFu-AMCJ3sp`Rdw_XKV)54IQ$61XnwZRJ|1vt22>jXIK0_6mZ~^z=2Oe z0f!Gic>LMyD!8IkuNM{|kT_?(wj@Po2xV@VlU zddp!pCu+~wAPWjC{Qm7(sr@8S?M=Yfg8}Z^hd-1LJa}~E{;grSHf>;eZxgj1`F^oz zeE;xA*6?!K{|q+0*ifS@?ikZMcunxu5Z(9vLiNUy)G*HeLGXnaF6RUaHH8EdgMM>O zYRPi0%M{8Etrl`~5w=F_!iqb>Cm3=Rtqh{Bot+Ak;SNxW9Iy~qu_M&bHJO|RN#wXu zv`qb0@ReZPai+PELTcjzf7U0z`pZh-L(TpIk7GWU9HpBnnj5P{Mbuf7*73WK7bXjw z%*=bUE$v-1iCFj3v~$`*RUJII60AyKK~dyVmm4 zAAom)pUNki;9<4K`}~Oqo`~A;NO_1JIT&BH*ta?E+A_Dk+D7iiXDZMvuKEtUJpk2e zyvVU4ggqNJ6wEW`EfMZI@;U->a1N@h3Ri6}C}$9?N*T9fDMBRzP<#RUSXIoo*yF~R z!2!pbR#T1#9TIn@YpsyrG&~JC+V-6}XflI?M>&T>-)dH?wl?}kuqH0qS*`;f)qd`K zEp&N86o}Hmm9)5StPZXZa~+;22`e1ep^Y{T9qF5gvQCP9um*P)EAbcS}cL{q`nDU#)_*bHqFyKl)KoU^2 zhH@qTvn4j$-Vo^jVnn;<`0XUZj{3ZUXjqGl-1m|zy`g__DnKHi=M6Npv28!<4LNOs ziyY-vaXX{&VQP^@qN|~*S&1Ivzeu7}wU*o|UoP1sy4?wpMex)u@GM&X0X!=h zP;T1mCy=zLr`x&aMUNG)lYs@nc%^QrKuJPpqUW7tvx>sohg)T=4M_ z2~V$SD{&o*v}ZUl7{zzy352;iBLVBqWT36%key3#z-xeL=)Ef(yU13;N(+=WsznxO zTM$>717m*sNwO=qqP-@zN~gM>4*=EDdM#Mbjg^U^CLX!TBbg*Z4q0QAex0mUP{r1+ z`2!fO9-pK{%X|%=-ze`iij`vRe)yot+Z}W5Lm=ii5V7+W=)?P^5#NYkM&vTjFvr{P zgmcE2u~qEw8v2n0Q{js*zVOBQzO(zF#w+yXy!?e%p7*{L+%={_y?Rsa)&xA>dk7 zm2Bl<1f3Q?w!!FjyXCrB!lBQjxtSz|wuH#jGy*A8OJw|+^AEWhwcIzUr+W(QCXWxl zoAxMC3|WLlkiw5--fW*3BZY>k!N1l`_5K~^7gUkx;ht|7ez5Db!{XTd?$p2ChPdJJ zKY&ZW+kdK%x3R!j-MSftw$9_EM|zII_wy6=1iO~a2P^mPT5&9Djlb-+<0Qv_q^&zB zM0-(-V;_Eh=Ho%?TS*JuJt^^)Cr~sXLPVm(@?Jf(!nc$R+SlQWqjr91_@Ol20HJ>+X&*|*poR?xVCIXm6$QM z6$**rM(eC-d#3fzdHc|XZsC)iH9dVIz&m|qK@EV{BKClaYfrjnlBzQ;9MY(ZgvVqW zz3RHgrd3zuKpyO2&*sUFD<(Y}sZ|-A6)bPe6~Vxc-c8H`2LH@`z-=vL$6~Lpw5zQ*myf>=*a0S zFGfA@ITDTreHA-+=%v}2mUMe;qWkWaJGV*-+d4SfY*3$`nW>RmO;$x-rZnnBDhpQ{ z50@+W2v&BztMeCOy;VPa^xVID5On+(4}zdbFL_lJsIF7EE5mE9aR(OW{P0b@uIJ8* zf&$5>AnTF9HF+N_P#mL(7O!QU;Z#Uml>>nmT; zeyx81lKVT{H3t5)P~P>-L&4fCM!1Zwp!Ue&yu75|T{80u6B53V9lljM%+9$q=28X6 zQF=?-OmD*YtBeH?m^!vBiv}?tzU>qx{yLSrS%Dwtl2|M_#46-S6*n9zB?p64xsO=P z%?-!U;wWeT7ZpjNH6x0fW&-!k{s8_G{p?poU)5h9uc;`j&4ypUG^ABu&;0^Pu8Ukj zWwYd10W6ZZe7VDbOAeQIphh|b=v+IVPKfA@+B2ch^+e)hv{KX(JqY@8;dAeyb;gnk zJ@il`qyVfHgm*U*A7{56c-)NEw*ET2%tD7joM`y5m%TSQv+V^C^kM}ND?+I%PBrdT zJ8a}>1C#Aj8|P2OhWaM3cuUp(eRgKjCi@;URe+_^Y= ztMXeUle=F~Tz_2h7z4uFI0TLSY*pqwSqc2oaj|0`=c;k<)i*fr$tH=wcXAWb2E^V+)78U?|dw2(Rb$TLH1ZhNHKTTnFn&ldd6k~ zDaBJV#xo!Cb*}|Mow<}cI*P&tgIe9-T4IBd%7o?mos=>Bv%($SBmt>If*Ft;m-ni*C*bwd$+Efzatl^a&n{wJl;t@cF@8ikS zH)Lh`kK8whfs0yJP~>r+NF)>TdAsVmg&kRmO3XPsro~=XG!hFt)f*~Tl6Sr7vltgk z>xt2?02{Yxhqh=DkR-!}0=vSpFK1Th%Dk{yqg0 z;KmFoeM7fB;iTL`tRA0duZOG$z(JNJCCJ6!YYl^Umqz#esqNn5T(1P~D{f)DXyG}z zSP02HtDOr-Qz(0Q5MW@^2LJ*l01`F?>^zz_Xb~CgyruGy=1L&;Li9Dcp@PU*%7knN z{H&p9_g`l#_J&cUz*V< zeS0|?>?>J2Y&1rxljGaLOS2$gf20;V{3~ro(2GLF~-Ix zWJiYEhzn_;4Xcp3?Ke(-B+jf)bieFEY-tWu_@%m`P_NuY6T#O$N6NhqDxHW(%g#~M z*~@I1+cyfaSs+9@6bZrADc@~$@5SDJevZ3I-m5**I>6@w0ML?Ss%o{VT%U_cu!?J5 zOJqmd|IRM)g9i^!uu0rU>U`Pj|KfMiz*cI#AOq z%-l04lxcxkMGU#Zg};vrJC=+rM2y`(Y9!h#{-{$cjz{|U$9oNcGc(>t)9jZYu0Se^ zr|?VM<4mR}ZDuYT)|C;`-|y8$MV4+@D7%p#HsQcCu7!C~nl2hws%PAQ;0!B`Hu|oh zEXKjxEjs5d;O4!L!la-@^cFbt2iHaw63);ff7#$xIol#=&f}wvYx6#6parbD;{hyIIJyXFuCjs z7Ms~)^DNUrGI*zv+`43Sjwo*V79{*7&b->jtg_-lqR73&s~2}G``SEx#cnECiGivh z^S-MSoZb>{O*3P7)XlD_!H_!|`wL`^P?GJTRukUvs=2 zN9tBSXEh1EuK&wk`J2}R!_B)vly0s31?3XdzId6;FPUMJ`-*LLH8tAc2e7jK(zM^m zarfV2@ZS2E(9Z~>vaC2Sl7z5)(DcmvpeXf9{-b7eJbdqY&n_2h!hx&q%WV#sFsF$$ z2iI)nNbvB`hB~+R@MLpC_RRiu1@p+7U!S{Ab{*{e?Je=t9XQJ|xpaib4~MYbSMjoh zq;I9W6{wQM+!{`^nW`UqW{HF3e?RnLEz&0Cm9PkuhacoFp&)Y4Q ztiE_sh zU_Sk|x2Ia&z#9~58Cov^R@MTdwIIeJddx~zWH7L8WUQNVI+-h?73dYdsSW=d5#;%B z$X3L9RR@?NeClqbGvqWA6Rw)mQ15^kv=9R;l2w*k>9u8r^rc32{P~}$ANKDMDWp_&UwRi?D6rBd$M!Az76T zB1nGr=cJ)*|0(=4#?g-ShLrd&k9S@l&8kZ;4QBIeJrrc?3R7CM=U$g)Pn1si%#Gmc zBl;mSp|XYTu2J?aWl(xJL))3!y@(RWwgFE_+;`7ka1wI55w9~moqSiUD{Lc1=N-d* z{h+LG!o&mpbou^v`Hr5AExis%)qHS{0+rUfV5IsJbg7o+7B>OK<$%66{sNxCiJH#b zZnpQ83Auw2hV;EgsvkZmJA3bFL4;P^SunBdO0~RYP*PSX*U(EMYZMaUKd| zx9gv6X=%Z8l^j1)b6|Y6#j#;O9@-dV)eujV-?=HX4LP!+w^wMFDR%i zR%WLqxK=8!(Zi_dU=oJtbL2E|KZpn^$e5@8P8RQcEESK^w*D*jG{CgiU-^PMKr8>X z2r%-?XDK^8tnvP(ZS;IoEkqfy?*JuV|0&yBfAVfon)bHlE zGtc|ZzVGbp%rmpQ^XxuK;Hs~X#~0|?FEBvDfO#2_^`k{MY8-ik7{FGuFp+V!<(aJ1TwE8ouwBf`HsHh*WipBa(ao7$tj)i97p?@m4^h-;h#2%rdmvp`a{u#Kyzi~l zvxw6T%32E9dKZ0lwINMvT-SBo(B<58`(cvp@=359G%`UGtdA@u(=mC~tYTb8%b&AQ z$86zjx<&nfZbNUytNk+J?*wK&<){D~k0Uxq1|`E2q5&7dH5J-`Il}Pd0)oA7Pz@s!E_u;mIva;>?Y_(eWs#j1iUi@4rlR+&OS6fr8OiSl#(+5upCXvU*lPa z2{xthZNt)e&m$Vliq^EmSRLN@3UN7|2bPwAm>&6BXIcn(eCU4OckiYo!FGbP)I-o& z?Jhb4$_fGZVYGF&I%HZ(FVvR3rp;L_zvm$b5$YlcJzrmxWBF7j=@`GPtZVEXX{i`8qsZ)^MFvU zm#4H#EiZ!Tlu+(wE5CI5r6t89xK4?m_LWaO(=4XLXLsU6hn9@G5d~SN>$CSOb7;~i zrghetM@ij)K`OriR+^BufD1QJ97~JVR|-{2#o-^fErbro3mEE-jehdIXcJUs>2Ov0 z=Jo@Qp4+wgB6J|mii@CJf0o0^1-I>)tl3BMo<`>X^=TAKMVm~&ZePyBH9&NaW1R)B z;}q0YPfGWl`>p@t@&{jYv8u<^4@aEw`?9mj^64bSbqi89g|gUqL9J{g+-b6gU_O8( zt>8xcyI!}8T?{q+@wPqlb_hT8v^({;TTr^6Ra;opNc$;`tnPE>-!OOCe!Z=C4sz!Zee;I-(Sqw^7k;h60`yPIuv#Oh5_?h4CjgE7G* z-H;=iaB4b%KHR)@Tyit${wlF0)^{I&;mn)?wZ!4E z!7bn%>q-{%`onaBQpLXkb*oHi|MI##pm0Y9jOWNIxZ0NJrRB_xNa6WECsck*C)rIv zS~8PuzjjC#A3|boXJ}6qI$Z0MD#%a+aXN#O^NNqk4u@PNwqjAuRWewp#&V53=b6hm zLk>qa6!RVh^Za!>OWqZ3MiWkQPZw3pNGwBY#4C8kxdZW&v)(TQ;wl-_e&SbpoG6ex z+48K<9kCHj+HzGuv|BTZukI}PAUu?gs#9O9W;wd<}E({_9A~MNC@k4OML_;8KvRr!0>8iHg zVo%v9XsO6kZPy&yxB2i9gK&@aGf24T+UI=G6WDM=+_B0e#N|^)2Yj-fKpSY+d=_PU zy|wkeg*g}!rnv++jdWt0T?(u7nQ=0OL^Vm~8qP@@1y@R*m_2w&_0?HXZ*eh-e{&jnbbOmVWA+Xs@JqOv>CMs8JliaicBX82}X*~ zKB(*e&^?$oCE^B{)P(cBP?6gNLtFflUt?12ud2g+%ZY=v6B?wQMsD>o3)|@FKiCJ> zvxlc_5b|4j>K9!l!%HuJ1bysMG5XV^sG{=DaYTKp^QN}D$xbi8p-)*V=?4O1Vr3Wf zTJ^Q7i=Mq%$@of+{XHR~rHglWfjKOcJ-mp?%gY&AXZ+FPye%f=c|no+`!2scYHJ`} z%l`OX+;O-+IH1SYv*S#Q@WxymtbEwNDX(>4Jg*iInr-$BC@Z+>HDVTy+l+Q1<#1i{ z@na1LtjR{^Dz1^O-u|#Z=*XUBqfke-rBIcZFb|w?`3WJ=T?umADuGmt+`+JX=`Lf4 zX{gHO&7u+(wv`tVUv6P|y!vxocMJwQPrhabnO6*wgdgs|+#r7L<|#dw91i%oRfzTP z)oHJa#)%2^d+N$2I?-fcG61Kr2`h$|IHBlD$7q-w3~glfukk;8UknN$XCev~jM}F> z5jrj7mYtd_pC=piQAzM}xV_psWWeFx^Le0O%GehRr>(m+ngk-8osu8Q#NZiQC)Q`f$edN`&;K1sk%j?4l+Ywc@ zI*7UK=4OblrC?`c=E!ESp;lu5y(EUs-3#5v^n8V`DfSX4G?K1=jyWM5J(o#tw&%&Y zwgOlvx6+lG>E;}I4^M2hzRV)xDB$&T*~=EnnRWdhtM@PY9pg!Wj#{sonqZ=MVT4FhEp!R0qcQE4@N-Pj`bepgWNziA zl`wzm$Ka*ZYTJ^#_FcVQQt}&#=_AAQ1p5mY$Y$9{n2s)?0au{RZ2~Xkv3ysDaXc%C z7W7*LlEWKpo;6d9Qo3E@3OTtNi2Qxc0X}m%#5|VBTR@Ghkt{J`);wDwYW+wJ6fhB0 zppW{^D^w__@6_%Vot;mZVD0>H6!UV=IEHP>#8?Wie5q>gZolx}#wRyR3bJO_28695 zPzpWDuVf3au1b@^;Bj7@+isUuP3yXJ>dVp@u(A56h4>q3lP8(i0v&g{lI;aKtSRRY zS8RU#)Rk}%GQ;6YtKGu;(;?Ed%nHHK1!_Y%T(GN^GgtPams_5;^=&@JS(@678;2D& z`fIJ@uoB2gwt{n{cBttjb6&27OlxZ&&T7B*W%-N@tei=L^QoX#s>xj&ti{WIN|xa9VOo8uslZa)5og9Cs+!R!^#&S*Sg z)N9%WIX-z{1h_o<`q%Q_?sME9elZp4gMb8Mh^NA3OGeKrJHM%_4^!RLZx!BN{PE?b zwb9zn?Ag*jWt~nZkWPg2#ccIjmE^V#F}XGnpx7mJDs?F^`R3wtFV6m~1AcUSEq=Lx z3tt7=G)&UI{kg-9fa9`c@~rc!3$o|N71nWIH__WS_wpHRL=$CW^+S*P=d-q=TtQW- zdk-ze-lWA-Nvm=OXLrX>c)^YOtN3zo>_)3#)O-4dIhAzjYjiEHhKp;)7msF75bbg3 z=D$s0-hy}9D>fiJxcJ*l>d>mEjl!u=pI5o}dO=#WI?M0-G9TRNXNxF6dRZ~K6Xty} z$vRX_hyscApdrH=O!A!Qb*|aPnK^M`I+9yZ%PxUhx!?)ppP{S!V%&!tb_%ZX|2%ov zkGG*>V!^itYU9<<1DU;hOJ))5)n3fpyMhimBVLU540JVd)4_{=B|-AG;mxXZQvJh~ zX~()Xm43T8rcA%7HkCR!n_^^yl@+Vc0&ImSK&C5+ILtZ}thR|p(=F#rv2DPB5qlzJ zk~ci@#UM3HKsq<40eY_M+*4wY(3AcWmE2483vXoSY;BdTa16Wg%GZR?+J;+Bv~}DX z{40`(l77eAnW{@fumPowR4dD0a`$hwheULG!hyf@!?TNn+}88$_6st2M=;v7e7QYW zmx%PpP)iuEAi@IrmY}W+8gs3=`04E)Eee@5Iy#SX%v=uh9+SfSO0Y$R`%z?%)VSC> zRPJnRPw@f1qz|$_dguzF$8{)ls~{A$G7Uf?!qoqFoy}IF0j&41sS$ay7Pv?TAQgY zV>4zQY`XNQlcnIw(gQaHMcooelj=A>Vye({_A$TEiYKKA&+eelyl z@lJ-f0+~PMY@Fa#^&_k7i>_W|TPPD1qd_))*r9Ej?i=G0smY91H(%tMFTBg(~ z@(Y4Zp+wmYuoH&deo}q;6u5l|8WgblD#pB1w|XDOLH@<#oS6VJ< zrpvVsU$wxF zbt;dJNFzOtJov6vfx_0LSq7s@`)gXLNO+H?_rmu!Z4dyr=~93;S+W}mcTIE&(?K@w zeVa~XdA1eHOva z@_71d8M6^kmXvgGG6%mvPN)LsR3?Hf&& zPGflc)h`~|zvNgyvjVP)PiE%cYj3!see8{aW6FtkeRZGg#@t2|WZT^)k1hXo7*W=n z)CeqpB*z8-Z{V{{dyq7^%~;F*jX%y7K6>9IQz&;OgykH$rx;1ygC%?#&+Y6KxKBKA zJ_QPHoONqd>?pBxZgY1M7LG*|bINMk=BLV6ov9l*5`&c9F!zl%R!bQiWl5_kI-Rnq z?C$kSI|H%BJSfQFuYPvIG5p8==#O_|AaYyNLt#t43zvK>phgxvS(VxJ5R|gGt$a5}@4B=#lX zes^HKWu?(ekRINod@);NEhznOK<8RU)?|WNYhR|aYjYC{Z{Gmgrig;uEhRCJ%E3=0 z?*d;Y_4Y41A~C7$Ht&3RI`4hpAbyWzRv&gMGf&$pn0NFpleZDoOAY|LZaK;>mQ2}V zqi3`6T=Na5XE9RocX?Be0MK6l75RM6zB(^TTg&9*7l6s}Yt>Zz&sU_Tur_rp1z4ep za+GNkCO{4hH?Gwu#`5A;Um9C$_fC)@;$f&#|20nSF1a-}1Y|+&@3f zkiR9^-RIPpIjCg6kl|IRux$())ZEHJ!S(=1~GDx1LpCvFN7Z^N@sN*ha>7&Q6qa}*e?-a$|q0-Svp{Nh`COd}JN%@i3I zp;Q+!7vvaVrMUuT))!MLzs>a7aMBIHRuKM2Lfg|^>}d85VcU+xo0NOpSPR9Ku6^94 zZc5XdWqrdr5TZ-T%ChmLmxbti;~oXQ*`5L+ydsSj?t7_ab$?va0X6sQmt=nOtUuM6 zxx>NsOCn+ln=bz8WE&QgH?$qof|Z3Q_ZUsfEK`Qn#@SxWGK2$N{Dp3?v4Q=8O($A? zOrxfhS917;BF=|>BFzi8j%kz2CG%U6F}` zPW(Ywd<}=Li~v8^e!cnf3BaMu*P-0K)u#H%tK%?+KWRf9dcP=n(}tnIm> zT&KN>Va&jff6i-+m?R4`fF<|lGUOxtoUhnPcZ+9npX!``8)zzLRr>N@vSV41*g2~p z!oWg4r4&WGvQ1d>TP4Nzm&0!o zoZZZc(2+QpsghuR{Lp!=B{|umFyXMpj3zH4*4iFPG#`SNvBcG@Y#6{Bt|u&Gq5aw{EOjWO*(U1tx{x@C_yVT^=Th59Rnh0H8$MTl)v7;22^Dg@}H;6)u) zh*9c^#Q(!r8gi}+jOqB=Bg|2uMO^>#KHuZDyoP*Na3>#N=naRZ&8OWPNB(-;e_E%t zcep~0fU8E^(OQ$IEI^^NjoF4Er!LO={pYWn(&G=l}Vo7e0} z%^{A;E>2Fa1Cz_k-^0F_W%W6fosK}?m6dO`e8fe#p6`Aq@{lxR1Gpy;tdedw7pY_q zh^cdlY8;pKg#~C-Z`xti@|v{)-Y}j`$VFzI7kvF= zzRgT-)Z`uO5voo0J}D9Wvz84@}F%~(RZHa74!R`N6alsU=ra7I%1FJ zT-BUWJ4c_YsZU8$$kUI4P>%3K|GIUA?V~w8R)ee9P5Yi@|AFE&HO+L@ zd9}C+Xc{J6GT!XB(w^C832`S~@{RbliYW$48cF3m+{aUTs$NMBzVCkt_$BBz7ff&{ zp;IbCWU`<-PP=E?3}}u_$W_2>v0USH9vcP0<fKEnOY)-fa8&AA`lg7$z7L97f0JG$BLC-WL!TF6S4A-8mT};IM!sr zT&APLOf;7M=s>RqH&#p|$)Pw1gld86gN1$>3Ds;GbyW-K2)_~6@96NoohoHWQO$a# zXN}qP0;WQi{QY^FW9>C~R4dyjZ^k0Wr^)H~lI0hg=ZPMTDN?|^rEzg%q(s5**NWDe z&l>Io@uxZ~j+!Z4(_c&wcd{V9-6xUput!{7&{g#XNSYj`Udo=!Cs`-KW=9!;Xu2|^ zcio@1QjCyQSW{uO0E5m^#J!BrSH)Mp`*MpT`_8&$colb?lECj25`TxawGEA8XtB{1 zOUwbg>eis)&=j|@4n$}=?;=Btfco0EPg#_*vGKIOKEHm=4&cX!C^H|$gv2!WtEEYF zyU{)v6q-~xYJB)nl&L6raVaq}xQR~fduu+xBB^|Q9vo!p4_pvpv;Og7>iCK8pRR#G-*&YeCjc_aTA+S2C z95(TFx}aPWn9b}hY#w2LbgWtu^<&~yzRO4v8t~I{$`@K~@1lb8zDTcnO&wA_?=L(f zorK%4M{;vG2&gi8E`0M0jD}e=9dd#1O4R>rlx2HcCoSy4dO;>f8gaZ!K}A8T{)77> zTEyF%`5~S^DQ@3;dviOAiN5SbsJ=Q|2YW+@Tkezf2v5#N+glL6iWa2Xk!2+*DLboV6TQvNL=vw(79*Gq^pK*(c0^qM0_pz*QhPf zDt)*)c`|_12jGC9(_m=X^VSzZelUPt3g|K_{*6#~IqSabor@wp-@9h*I)9F5teQC~ z_5IxNFDPjKF^Z>fh&+v$GR&Hrj`$TaPq2u$G}&G#!Qt^#%Pox;dbxkps-GG24__#K zJpky+F7Y%Z+QBqt9lP(}V3eq!Eb;`@LPqs7ezRH?{P(pTRn2 zhft3~|Gq6Ac$o@{`>ylrQRh&GZzliB@y^J6`FXZ0&2L+O8^w*>LcJb>>FE8#x6Y+R zHI=cJc-`l9`L3cxQKt71aKx{=SHa0@Y4IN|ac+`ym!7kXeaFJ{+P&>6hyz1Ca+?r{ zPB@fiQG(c^n33qNs~No0x4DFE8f9^00T-2jKD-(I6T2*{c^|E6US|B{i~Nm3PKZj> zyLZ5S0w7(8(YN7%lG&2RbhA~*niLkr9%<&ss=_eXVGK*cWb?3oGudu}TE#?l1hjd4 zPZDC}tNI_NL&nG6*)B#)TsT#C^ME66K(0dl)%b}nXKRO6kov$HUN_y$(`9C4%X21d+v#K?MJZO^U zx;RS_eWmy2p{j~zx*w4G&@)R&OxoMH@9Ef&c|znk$s1l*lC}FaX_SYM#k7WhAnLPi zWB;mHqIdGPAyw*5Jp+Gk+>Xp}?3g8uzcRVkJaBk*RbkU)4~4x{Axu`ZktHZA5PUD%BU%@F z>wnKsT)-Bj`+;Bhs|HK8Hp*rS z$Fi-Xe`;HYz#Cl1Ed|6Xy*Jf8G1u*=@+OaUF6gpi?=kcC%sULUi;I!R&fZ`p5Y|~dvdh%LD#?s9c3Pz zVa(1M4kD8d4BhW&G{s%w;fWE!|K_p09Dw@hbD-&Nk{2@i(`?;;+d*)r&_o5TM{is zl|0e_A2}JNZ}3+xyn)+yyF00En*7LDIjXR_?_+cRKYTcR{%9uM3}rgqH*FUkum(PT zuxFWkFGSg9H;)(aBU~8Y_TTa_{HYC`5cLbFy#Vwigf;P3s)6gI?>|gU4L{XwtNGCU zj~QD5CM$;W&rV1jH7!F!TpXucUpGXbUt1U};i-|v>3Gy8bd3JJ1@Gql%H2amtnbl3 ze1V^XGQMjDSRE36Jg0X;;;g*uYU?6*e2I)Dfa%nltY)c|ssProUqQLex|y3#aD#&j zbK2)Xp8)q~;nRW4dqtMeYjS|l69-NWw4*$MoI<|1U%LxfN-~&G_Byk7YdVWK&MLyC zJDDxFA**rYHYxaF1)72qnF!0QiBeae-?IHeH4TJEZN%LtHCf0RNGc9zb{n1ma9aQJ zK*k9f84)mS#%OEypotll<%4z^-)PO4R%e%ghC+XbAaI*ar7>hXDmZ9?E81$-QEivn zcl6C!DcbpiGLva@ie8(KJ74s33<5;Hx16hLT$}jA2p^$yRf3#YmHu0_KC@T2Q8234 zgu)UNizb*6*Xo^C^jsu4b9ZjCPa_-hr&v>byqOoiZt0@;6o=G)Bi2(b@m|g3d%*d# z()8>O8zO2_Q{Ah!bT877@*tg}nt`3&3`ieg)55=FmE*mdZVA*Rv`v2LPs@}v=nX%3 z`Oqyw6aU($L8xOZ#0I@>T`pI?+hc<%A?kTGuX9=hxguI^4dNYCiD{0xm2s_JC6{OhlqarZxzMOkD7Kv9db~#&!GCq`j<89ZPOAVyLCA6Dwf1zUuxGi} zl%ZdR@*Fy=$P(99`{}~8GS{XR?%cc?r4ysH^>CV3Z!P|uJr2DBeNzs8b^Ps!+N1BE zp7yyX>`$$(cYpfytfI3VDh0^wyQ&GB&B4^}36GfC`e9$_M}U-ZCv5VEqwu5SRfJ-5 z3zWh#*kRi7-XDX9+iF>Y+DPTO%5Lgxl98CFcjCd+UX6-54TWA)S`whq+9}M5vWrD& z)P^=|76PY=iHzxDkYSt7dpgCDL=pEaqf6KssosPU{e|U(G+e%_9KD33 zpAHYqo`1o^My1^`$0sCYWhU%2FHKO#H@$~0Hq4Vt8T}JEi_7_$E)v&&e3LN2rK*z> zHbqBeDUz?x^ljHy_TOwgE!4JjU7E=2?+D7*lWrt4AL)vcanmrh5sgu^(D6~b3Vxgk zq>O8_9k~RDF6U=6{fF)o$2^{_Bw2Qe6dPa49PDSQndD5V$Qe{>8H{+L!+-FFVI`XT zSRj*&CLCh;wEfvhuwk@Ie}$gI2=r%S(#~2U2uU$t*T-H&;D6wRmbL`E2LL@ypvC7WCY5!j1L7a2SV68mIl*9Urdi2=2V*=Dii71XkE+ z7>_aqv!SZy!|K4xw_$q&QicKU-o9yKL6{POQjcKa{VJuw5qve8r}4a$@iEI*){P)E zqI3_a(Yiv_^e)e+Bmzas7Mimvdu-!98BZ!EQp2G6Lm1w6b z!vLFI1~22QZG|%AP%Z3g=*il;%e8ghp3xrDK1p9U+-`uPzBu>Q8$$$KUBA7ZYCjq^ zMEr+Okr#rtPk4s6oE@#SWF+pfFtsEKGx~5#2_b!sT#TF}pq60R82q?e-|;2kl%6Dg z@5d@H4))i}97#42IV$d)+<(z*`;jf}YgU_RDe;9;j%!aygkNnRZ;7s=m|HGs^4!Nw z>ho3gk&ux&n;W8qi4NWxzZA~;Ja=@x%{lz6J;|nXKT@%XvsLq%*0%V?B2v4vI zgiWLLT*+>9(rDs?4A}3=*3m)wUgk>E_7Y=afd2Gm9p#EC3WI;z`v`tK0%|pPZKHL;6O<8Sxd6a914;WO)JpICf!t^U$p$x1wjQdSASCtr- zEre5U2CBEEN!NXX%^LaM`z;MQJ)EhD$O<}mjPKUZ&Z!LQxKdqN&APwMubG(K9@=_J z&pJdW=FIyMCz*0-*6u`g+gmvCde%e%Y^?exL=^PXjQr$~aEUkIW&Lc%p+EtD5WtiZ z=$D+qklVQ$JG&9|VkonOQ8w`!)8m$AxMS`F-n_`eQWK)PzwA!u80}#_)qCPm9{FL_ ztsd3SDz)VPG&jiCIai_2Dm;ALo0g3!Y0B~y#Sc$1=W;!E#>wD}^~9%aQIf~nrW$2) zx-_ZE<=nD4LF~|}X&5Tg#`m(6)0_Qq-A(S_d#>DaCh#wdNEr1s`YY!yhU`x4pjPga zR^VCx)xGGb-)Hg7G*4*7Fuzw-Y*HDZqS~O9d2Lqh{rgkJE02cS>06Ua);@@QUNkxAzPE zl{v@iVDApAD1Ec&ZV$fWAB-}xMicDMji0lCqIxvOR~qZ5-YRH!>Uy4C8qH2R6BDyE z4B^Se?-Y$MufSdDPz&Yg#?IUInYQj8Xq9)Bb^Pq5I5GCay2jFyrsm*YT|!Xo^HyHC zt86{-b*|gq{*_=#Yk@pi#Z*$JG#Uwb?g7A~D-Q5aiR4*L%NWbQ^U8bY{-7x5x$el^ zJoAx7-ViAh|2o1**ENL|VgvOSx%B5&`N27cdaWN}SR*VljKy(;I-hTY? zfENGEfe-4iCNnc0SiIY8r>umosLvuCqZF6fJLTO_CA&$5^gdTP| znei~OsyM>>)GZ$YqS4RSZ#;8H2%o0Ixu^jD3qE zMdrV;AA)|>ixw&aCo~U7nVzZkQEY#S_EGuxMbzreAXux&^sUK-^SSA$#iH!kzsDL{ zvm$jAKDIg&vI6Qp zN%$Ny44KE>HeaOhngY_1=b8H1j@NhJkzK%gj`(XcD>E@OxxVn}Uqd3Nxe-ytwy{4# zGP0)fTci6jTj>61o`qannad$Y;Z@nymrO%qIJc9OvmfnDguN{JaKsUiDit9ccPF*` zto&?dhg_?M=@Me z1=i6qhQ4CfA&%_KNqe)u4|BzhPfOx4-xGVd!!2lIezSgL$!txti`qzJ$#@TZHGPD> zIBL7{=J9o#D!t=Q5(l4|=gI(l+(NrGM=qp{8Z)cH^XoX4eP2t3!aiSL4+HWGL{4QXEh_ z-2!EJ?{avG;nDre&rRb$o!=jn)`XAi0@j5z$|vS49c7E+M(v1D@0jeAlV}q&zmohM zCc>1ctkh|^@kIC2;{be<18gTVWhN;3((&zCmy`>S`5$6V+_=}IZmYX#gB~sLow(7% zsEQGhg}dRI@_&WQ#n$FWB4Zdt7esq>QtCB;E*UW`uWbHxLiq zgt_OQpHC8s`G>C&Kl;p*DOUlWox-7bHtuu7#@jf6zO4**g10zT z)WNT>DIweDQ9ZGvgV0iD|8j2#Q?vP11bpY^tY`zcy^dI)`9_I(AzMve&v2iJ8(*DL z*Zwk-KiMfM{_-@TL_`dhK+2^0+qfw_Uw@FL8V$zbCn3>FE7PXXh9GycsGt|v^X4Jo z(%^_S{O7maFjj$ob6ZO=w7MpJom7xMx#^tXMzpk8E@|Z0jjmm1t$~ze9Mz9t?^;rK z3wmCwlst{p;)UX$*d9BnbW?AnKYWYe!;o#sbs-RxH3*B->b2fQtHXfaLgBUN>yyM1 zEIiQ^7ak6$q(khH;X9zD} zWR|pN!?a;pg#wW69clOl6m^plkP~c0gH?`GEAIbwvLSrLCx3ed!@9r9xx7LU?0a19 zU)M!bG#ENBu)wH7J8gSgDg%}q+hT*(77Nm@1{CaU=Sr;R1=Q6(A>F?y~^R@9kJA5NKE9$ zs@zbK1)$|ayxGV!u)8N9z_B>>DQ7-rOp5PxZq)Z#x$T;~z4nsnU6On0BCS2@mZ_0>i*|N*(z_oBSm@}gs^$H;o!&gP= zvvip=gCEMKUclyVRQf5;A;=ro*)KMv6! z9v<6Wc%u+)$#q{V8ZatQX~C2Ol&~FlTn9!i+Z=k?_NXj#>xM?BAhXbxT&gQZdSqeD zu6gQv;_Xz)<+#CqY`;(jo_~Lx1~}wpF}+LHW7A(Ow`tH&)>jOho%$`0M{!GQ}oAnp4C zqPm{jMJVbA6bR2yB}X&7W+*z)yjh3Y)3mD zO);Kq*<<`ZA>mnx={}#z)fSq(bsG}KwSq;IHDOrM*7fN}QG>;MkdQk)RqC-aD<$cZ z)1faqel{SGoFj^5Ah^J<2y4yLE3oCq5xzl!1;Cxz$+QgEN9pLXqBTo17x)C$4XTdP z#Ewkz2s-O@F354YI>W6J;yU3(sIaZBY5eN#w`Yk@;Bw*OI}>(}Z*5cG*~%Y1`0O>2 zkL5gkGUXV!G9)E00-`nD%Y@a`qJW?s_4CN!Bph?VNw8KwVqtC6)P?4hXxM6mfA&Ui z01~?T;-qZ$B0cDsbswd=6@HR^RzsDuCap_8tlVHWI$GP*U0@oJ<3wTG36b-RSs+u_ zf);D&5O|U~td1Self9Gw8A~y`eK~$kg91)hV@hIrQ}$N~=AH_AY^;XnI$v@MEnI!( z?9B-u{c{gcGrhVIvT2$ZNr~MG`Zj5@8*mtBt7Ot)VA!` z5S+BH1>&Pxnt>Aa&O@+s<6U6F9|!oL*-CB`ZXsum!21$nrI^NVbtMaA4SPwSQ{c|i z;}f)!Nu4??X+K9hEyqBMZZ8jqxWDIdMbX(H<1o>DtnMXDH3k=1Vme+v!DRl75zyk$2k@YHd($zTz6|bAw6n}eg zV^j3BboF+5|2Sp^5={*LNG+Y#BrR3)PM*nyF{^j-e;xmeDHe=;(1rdk@i0NPjnTcC zn)Z0gv(=!qTz=1~g*{3!ijB@H-C9>oV1V>D*LzkQ$uzdI?waVo-c8>U@e+)X2VFFE zKbUe*=Xc4|H#uhot{LkFX66>q-k0p^*6QUWccO5|5%Z?$^L=u=Wb`UWs%cnh)^WM` zdNi72Ev2LS%Jak>lY}S7^nRTn^L7YRRWNa>a>>kZSt`-4@U@BWd(wgJ?!HRD%!0Sy-lk!zn*5~GZmz>Ei&PD zOaRa+I@h(r2{h?SJq*SwTcCG6$m-eb;+1Tu*;?VHcqJj70w55W#Bu5qevs*qLH!t9 zbJrv$Dti63w)WL7wE|@YT4!;7vw~Kq2ADE|s6@CZduz0*OqvwPWE1Idp5(`rQEp1q zjJXg|Q$p0((3D|%0yi*p_fUfueR`HEod$;=UOS((BJt7EL^I)UL~aJJ@~1GXHeGa> z;l+3?adO?V!G44bQ)BHe>4)27QjdoexjxGtneN`c$vqm>G&w%XeI?a-+SB`T20-G` zCmoYqc=?6%+W2RiEtL5XLb~&S8+a0iDw#kt=nojn=^HUa7Bc8jQz)r=V;w49b6=Gz zyo&W+74ZUCel|`Xe`}k-a8CUU7C2;+G=>HA6SZ*olpzbX8L_#V=Be^(=Sz=pOJe0c zW#qwKMfbjU^ud(T;}9p)hQNtsPK3~v=bopD5nkw|14-?@9k=A0zPt+NU^i_hNb{Ni zY23IhRJUe%CEYVT80ChZU#Pd@emRAlN%{jdK8_#r)B>$HmQ1UxjnWX!b0aHG&3S0&K_Sz@W9KyIdsjx5S>HVW z`uto$d*G|Bemh5(Cgx)(ICPY{^#zGQqI-907s-6V$2iU*Wk5F38g&nGl+3{%=lf!xA=`$4^dt#@-*I7KbFEK90`f=%|6iCl! zcCDGzMvAsC#aW=3@RHKLqCH;U;4!hSb!CD=nJy$ec<|rLSo|*{Km6|_Tl}}4$bS#v z_CFR?Gp2ALO_E zI|%XT^@Cz*mwF%nOJx85K|t*j-=5i>*n=7f{l5ju|7mM9f8cJ%KYXFF@_heNw`b4n zTK~h>Jl*|`Uybhpzc2eW7rcq(1O2<;`~M;CnbTjnV!OjT@AsSh&-?G&w6(bC9sU?| z`^3|xoBRHv_453|gsNddg*S*wH;NI#-m+r@DHV8|?r~@VtbeuB$bj8-p=Jtu8%%(% zwXN^M9*x)dZBeV-$m<_#vDng`x~Y4jQ%27ZP3`pW>!y^x7GL^Q7+u1(0!4z|+GBes z27!AwVh$#U1n_8(7)Iwj5>}4b)Txt$plS-!})|RKD03uX_jmX&?7k+`HAm zotS6VrgTPZ@AOa927&pZwv-&&c4=tK)6MO}s|2-oDs@&GZTx{DoHg!3#Alv$5C(5f zW`=Hw!q;`6r?5Al+~Iue6Vf$!V6OWo0i|c(2LC*=%O7nwp>=RL?XKA;xVgnUKKRi4 zPGfCh{cvNlyVGK`I0gP5!S=Z~9o<|cEMf3ji#B#>DPPdWo+$IL3!tvdxl5=tZD_*| zS46z0GJiZhvT)^BBY;B?Yo)ZzArU8_>j(Mq@x}& z*6j+=8d~-bAGtpP2p@E3oqy6&*c4A5yx}F-^sr#%=|uW)h%%EzI)H7%3UvHA($uKh zX(cW)elPXj;y&pUmiwe12R$}mClH~RG=2Xrv<*FU`^B+$i?3x;vzf(@uT~2wCT~f15*u%3qA|v+9 z-RQ6O*1QvyRocy};DRu(DPmh&=Lrk5x7-esNEe<}-PI|Ev?a(?vfUx9PU@S+NU?B4 zogV&cJx$%R66XUCSt!0cp=+kcE*ej)XMgyxWEawXCFOxL`mn%0sSh6e3j)sMkHKq* zZVcu}UA0Z&!aw}>$~+Lr*^CUI6L%hh02!C%#V)93UP5L~20O)KmJ0^ALlt;)ebKx2 z^EDms*ZtgaDZdu)UUXl4^1EvnBNd|?tS?`Is-c5+nRLJHA67Y?75tWZ z_SR?jvp>?4L=FVrTNp0qFvFK{Hn2U5%|gTt7_B>S~I9i@7~w>>58RXviO77*1IK zyLr@qnIHkFqPSg8IeOrlc&E6zuF((Gzdk8h3%pMZ`X#ad_U0#1Epht;HQE3Jx4);) zhm1U*)hydR%EBd@;=SSx`XASwefVk7$biNhTO5#(RZ5Kg3;T*JB19^?xx6eQSNnt| z0}d8EKj;vQ@_Ns=Ep&3;Okv@9D2}II{D+S}bIHE3K2`~`zg6|I_y^sMr?0z&3BxDd zKTZIF8k_Y8g_PE{JdSO}y?gy`5oOr`t=tXWYyG{z^VR;I-r9s~%s4hQQs=sbo8u1% zn2&x--6y5@x7cT9+;6ANV4=Xc4$xTz^-EN&=R-$-QK7zgf9z;i>ovJa4(YRh4PI`c z`RO$7vvR9i*6G}R&t_8|pZU9LgL#)j=U6cMIExC~13>JhPVU9#Wtp`}_utnp%`sM! zbC7}h)IrLdZ8DTGhehs?4l5(+s7*J7Ye8(9<_0Y_t9#YHe{jluMlYw1p#NK`C9DuN zeR7>Sb&1#-|7;aduU3EI{Zqw2bG{96{nQc~VXyYxNGlz&GM)j%Rp~VGU+Wbgy8JL6 z_4^)QfqMvfq>|QrCbX?~X=^kv+Q0W|RD8za!;qgJP8J5h4wGe5*0M;1^##q9l^f9> zBNJ|tjvCz>J4A=wl|136e=q9%%61;?Ls942cWG|Wo%lYi<1OV>PG$FrBVXMw-Tnl- zxG8mk2}DY8!-eL-4UNbI$o^(eDQJ3k_d@y2NZ$*8Ksqe?bRf?vTN`!jVEKP(@5|$% zY}@`XOGYJ9h%7@CQpr-*F(iAk6%~;^$u2vWrLrXZQo<;ELS)H0se~j#_BH!5ma)%r z`CWBC_kBOz&+_*Bz4!b6-uHR>r_apDIIr_O&g1x=$MHSBXPm*tJrM-28}m+H_g&5k z)Hg?Ka>)e-S(qh*d}46#JWjHXtaEmSP%WfLJorwXdiWCvm`_lH$OSQ7LAhSslYYta~2v8LZ?v{*grwvb( zXv0S6d)#6SDv@Ju1||3B2?fp0LRZZ`E|YRB=%5FdAEn0}0f=oZH4gWAM|)fA z=d~WYCkXr>#@o4x z+^jG>Xy?$vyfHMTsA~G3g#-T`LDzh)tI*Su&xsC82P~EZMdq@-TFdo1bplokP9m#c zHMr%_y;>9s=BYfrLXH>^Giok9=Y8pB-HvZ!;c# zTIXIdQDzwIZWW%!9_T-cSej6yrkL^fciI3 z#(>JLTq8{eflm!VffcW)B)PfL3#77{udAV_Ov2z+lfXU1Pvv-Q3qwu3D_PBc!I|f} zy3ojTm5-_;`*yP7KKhD<+$$ILt~xbY(y`bLapVf&ON(pBx4M@#AlBC}_14uED5L4R z)jtJ_#5XnVo*ppoX4V*7S6hvBn@{@W#|}P7t^HIup$^&1Z37I1Mg;X5ww?m3Kl2nPwBV}BH_)thF zE?hV5`tv9RVFW~q+yzevf#q6V=veR}rZ;zb8<0%W->@-t$}vI@Dzd#=P~~QL8zsbq zV~~(KJm;b&)M+vj3B~R!i}t0iz4`IA?c5!g3Ea&c7x~WaG%}}3^&D}Duqn87iqo~q zcbK#rw_#(ZR~?&>lu@0yRl5zyJ!?MT2lgt&9*cc^U7zuwV!|}50IAreNj5B_X0zYK z9lTINmNT}{7)k1##m)GF1(QYbrWKt$K6%Y-7Od!#mwIC8MTRQ~`X+c>0jxnJCFEz- zx*6?zFw8ht7;kB7&0_U_!H1k;YB3miM(TJIuuqg07~9gvWeS=+<4$Z+(W=}AJYx*P z%GH`mXIDXRVp_qIIOpA;Q@2->&v54s_r1enfKwfR;aZDD;?dhCE9DMHQm_n-?Hh5) zZrW;uA#^-u$l)qbnUX3=9tet%+~q;IC>#Nfnh^BFVC|m^!g%Y`t^&O_R@%+ zZhy?=Pq|Ni1E7~0t3f4?uNmc=RJoUbf$8~liks%lH67&^`5=DFi>*p!o!t@LHMP6~ zKt{fTH8Vcg^)Fp(z9vxLO%s8u${ciHx;h%j571uEgBZv!bHHZI@-~3WsWZ$e5O2JB zXh(rYAd?{LR?p%`k#MJEve&_G&$@yHY)tvq+~&h0>1-zXxT(U`T7BVJ11p((At|-3 z4_;|U-L(zRC4Rh+4ARa740CtM)Dy7o7Vt$1hKaV4!^IobtCc1w*?FZ*|CJtV#NOua zR0SVDlT3R^5DY{tFLwmJ>c~h0>biR87mSMVdtR@W z{t{3pk)G%C0}%nMa{QR2Y=u3*?Y1$ZuV#GeP`Ov;#QW=g=aW;LVMVV16qX7Y*FhfNR8k z^oX2%iJb8=b%U*Wtm5%pJUQz#mJ?)$j@Xe$J@`ZWn?NxL6~jzLkvL`DpPV}7$TwE> zETPS}e5Sef9=;sodSf*%VfbDmW)5G`HZyWMSBPWZ@h^3R+t95S9ev<^zjIA?S|nty ze|nSJvxO{=%j0Hddl91JfovvhFwNnCH z-GG`yad0PU@Q-g{-#0Ib_c=S{_xX01bU+WS#%uE59$?@#eB0G{Pas^Cg6gm;_n2Io z_0q@VGt!|mo>x8d(O2zYJ?6+4nwiDjO>>0dn3%q${WUX3-Hn{Z zzvh&WEgO)w`IyqKfeekcO;wh78YE;qAm&< zIC^Ds%?^ic1H5Pc;j>Z1oLCd;aO7QK&9!}aYET^Jp6Ska5W^~OVb*;s4Ht$d0wv7Z zn)tKMOA|hq`}!n~JUgS`(YVmACqh~(&bQ`A z{AH6ELdzvIYWJL0W8$zpY{fP*c4#0R_J;MII`tpbmokr3KLX>luz7DbePkpB+rTt4n!LdeFU)6Ot8CI%A`BB7AAn{f@K3B2;oYuslK+1-9_#24j@HRlEgkRT=Zv*3Aaxe5CpJq6ojVlrl zAhrQTY2=m`9Jqr?$6&o@!VdVgFMQQUFM0Nr&(<1z5PrVN{x61RcXT^NW6 z1IFlSLW|bcif{w`l40HmNv4|yC(w{F^7}{rw3dWfnKX&q1`Z*6UZM$XZQHL^HD(1KLb=T4!gK46j1S#*&jxGMH|8;IKmwmiVsZ?W4z+J$XkV*o>nROz|_ zwm$SjlR|#Z-p|?lX?s6+?oYq>Giv;d8b4$1Z@B6wV)=>p{$J7Z#t(qyMAt~9M$T}3 zw$bHNxer7J@Era*^Ym|a?b~^#qH(dC_37Osx=EQw?T-(;_e#77n>P(>vN2sM73mal zEIep0n4@$BA7^fV{d0__Aiu5KqRXec7BBvW-9MJk|1*gqY)R=%A!F_SvevH>G~I8G zo(CRcSE&odc90|>{;jjb2>xxr2eu_+URa0dXlf|gA1a-}&?9>o=HwCURD?DP_>&R< zyoSyPet!7hIw4es3XWn)z{*Bwezn?-3_Py`nZ|2{A6nD;T26!Yb!u9>W<%*b3!ijZgduFhHlY3{wZ&lr(GCx~Dd&gA zK|4yk2Tem4C}1AjA1?NzZ5>7~l&@U*#{c<|=b#dXJ5Mh7#ZgRxbm*}@EyBVuf%?|u zH7q>Bx7&aK#tHs+5k5fAy#-e%^m@K7|Brm{4?V~aE&bnFBpS61O~9iJ_9(nc zoCs}{;El_$`hf?y_yt_P0Q~Sf3bWlW?OnzjW9;UN;)^gw`@?{n-DT?yH``U%8Ixt6 zCVP!U*8_Wp`Ug(;SVkznb1Zn~H0Q6Dwi_l)QphuB)s&aXSj9;8gpUqh7=tzY1%SKj zL!)Ji$DB`#-MM8RX+)_BT!!=X-#aOxfvaz>OPrfy#k~x`WO+eM6bU{W*}`1zkQ5K3 zCVx-AuRf)>pDE|kDm9LOnw=V=3`n~08^LmiO`#nwqBk?*G_R!fMai`~-Y_7TsM`v8 zzk!>W_iqDXX;lC8?e4u--?(6ooQ-w38lE3c7)jQqUj6#*dpubV9~(a!kr4$DV(S4u z=VMPfX)R>~Gxqx3cDzop3Upb9vzRf)W?E+?G49CDG0FV-JeP@5K@OkB^zkT!xLh<_VkmOr4=r9^jI zG&)M5k++kvE)DS)@@?-ZthdK}*UMQp^HwMqj zupmq@4A}-|-e*5I$nuf=WsDS&-i4QY#%1n0k#?>iA2$HoK(!L36#Fo%mT>dUCyeB= z6^S`~WET6JywOC#3Cl{m6&cl`XKFAx>-m=lkygdqK%@tDE(x4I6s?r>SG6BHJC_Rw zuU42srO>4Ytjnq~)`)(U7Ju4o`~^phv)Y2D2v^Fe@Ftv@FS}0`+{1~Yl6^h)Mgp%s zpL6oVTc5eM7!@yzo?wX4PLX&IPSR&@wy`j5DwQ%U2$R<_WAc~Kqb!~1m)H%sKuKYK zWc~eBIjy7Ds75#_=_zKCwGj>@{Z^{u$xFq1^A^jc&;@8|Mdj=e*Fv0t1UGs0ve1G| z;aLCh>~*RS^9!l(ER*Ee7_@W-kY!b!jL#SDS0a^WzLe%7Pip*|3E-eZk#3D|hWDHt+j zg18zTvn;sUjBMQ**(|;z+*d`VqP=k?aY7cM7th}zyVo}a(e5u%EmK&ndpy6H<$mo2 zGf%x9YhrdbN)o4P{Y3lZFFT*Ax%v{ZFa(&}JfnJ#}m z=v~b=kP4SthH3m6=52?!Muo{s*Qtt@QsUO{I>?=lbUbHIZO#57y#1hzG>zrZ-01`k z4ekMSfS(oNnk!uVzUJMAIP5*fh7`IUC_%vEGizT_B zG!l@-`?&;O{$O$)*@nqhn{qF0-S|NOZwIPNTh{GD-#T&`Rbv2Ak&4d6g0H;_NiW~~ zp0j`~!5orr?jFuTcuGe}m!IO0rXhGo3*L?#lZM>&hI~qEiN`b6dM|f@pZPyb^%s*3 zvpe`pMYzhI?%hEJ&;c?Os*kV#0{Q;MVJ|!XAX<@>L-jsiR>IrO<;8oGMK%03FjigO z5$@f=H#<+_wWFb%$a2uC#70SN0}VwuRX5o$3U|5P__sP3ar!39iwDRqT4ZWnffv7@ z)J4@;m`_aI$l;6ap)a8r_sS>EZpgDvNo-ii#B#dul7uL<#HYmSRFAI){h6X0XW za+2o?_60ag!`T#tKvvK(1FEG37tjE4Ks$_H?S6o^To))_kKojv2dNl z{&=IFQYBMYU?r0I2KIXFd^9qKcV(=K;CmpuTQ2TxqpV1#Z<#}hD*D#W#ZQQPmaLY5j%-}zx$gyQEvm?W&KK>9R>SF!BH@-_2nC|@!kQLIx+@J z)0?(hl@o(zj2|wI!$JEu74*Mb{r>jhpV~@7k@$2Qc#2jBR?v5m%Q$|ry91e0dgY|a z8}JKU_I~*V+;|VJeL=(R)AP)5X6T!Rd>aPhuEs|HESMsyb>knzM5>@=tNd-?ga}fR z2l>Ec7NnSye=xjrMkca9lY=g#Zf)9dX&SXMBhSlQkH+tfoyHJgFIljZNP;6ozcLhe zHzGCQ&e!Qguzw*dz7MtAOKSINfu^U?`$;+bMM=D-Q|G$R$z8g8fIJ3KZG};A-Issu zFLbppb6%4==;#UP(n)5huy-xTcrNiy2NmWnXjPy`Ri6z0y1c*61VsqM z*)e!e{PHn1v}}qN|7H$j5r^%*kT%l`03wZwG9WUI*8Yg$^*)199S!6wZA949($1<2 zmKpYlDkM}Rs%0CjFruFt}N&~Jvg68Zs z!s_u&SFWwq3~~1ox?fWg6fwRX@?6JZ7}@yAZW}O)#HUs_Z38th!(lfm%l92SboLUK zuni!|LiFCJumec-)z;u6e*@sZ2Nr*M_b0M+=xPkeIs|RRLlQhUkv8bLDsY;y`(Yr& z4&UoWdg9vvqx@QU%!Q1wMpV4kwZe0oj=v95R@9_+SdTOl<9#uakfhFZ`W@sHY-AVOk140zh}s5NB+$f$*ezGWE~SGPzWN)E?68vhiXfau03UB^ z@AQN;w_Wq)@eWLN4BHYSrJ zsN(d9?>Kuv+I3E{R(k;bQfk~)sGoaF?~ussVE?o<1DFX!Dy02Fa1VFfxynTG#b9qU z#~ll0Wit;;ZoDsTi|F~lj@Rz)ySUCg59%#8Tn4WB?xc2S+7W6mi@dcjA++5HWH_qT zA?s(M%Nb$8c5|X}kf{ihF39xOt04I4ece5{x4HP2Ts058B~}_U#BO}`Bot!@3rT*O zFl!tpR-6%ULaCgASD6(YKz2ndmww&ZRDAa>bt6Qd(Q;Xw z^k9$K;!@d-|AbzpI2Zf3 zv4Nu50}f#GWkY8;WJ$hzeHU8;Hj}8GYgwNYEqmOMd`KzSr_hqNNP?kU*{W&Zw(80- z^X!nhZx}EB@rT>)_RVm+(X1~Nw=)@xBg%f@^v4bMOMVoa3p0}*-gN_O9Xs!giBXK> ze?jnN-y+-W;kx(t&7WT8ZPG?>KuH<8T2_W(ge!=gK1@s>PUB7ZB<}v%a!P0yG6lP? zvj8eI&F^NMT|WCfE#f5eg=-XTFh*bfHT2)6>hfoS&1baQ)aIUJr#2xr{# z(mrD4q3psrdaoozEae)D*j2fNN#u-gEh_xqw+I^W|TCJGJhT1{!1 zlx^U)BkIyB)f}n#I#okIv9tI z{ys_%3#@N#_ErbqdY?O9oQso!v`PLg0j*LKA7$+4p6 z7Ie?Dxx)h)a(JU`LMqmYFw1Su=_~OL5(j+YJ^{#U0EBsc_W|7`wZmo|EY>KE7{mV9 z7C>vq=T4!rorlkUbPOM{#sXvF6vLako|9pUGBQzP?9KaVWbDQ7Mic}Ds{{Bw&f5U- z%*;5_$?q)OTkK9cuQhfqw#`S%be&!4*hxGIw@LoA$OT5Z?jBHCP ziu9YBuJL=xIn6ksU4MA~iFmUbulcN&F8EOVt=H4(=lO{}-i*!e!`)yvI-BXP=f#%l z1Tk?fe6=+)@r+!JLKbgl38(fAw~-?9yoUi-?R}WI4S8mWIsXFK{(4O*K5PaX9d!2G zoBVMM+C)@rpkJf+zZ0b$b*&M-YHXJll{Tp&JA#~Scuvw{%i`@iZf9P6-}JVf5(lX3 ztF~anbm`XsyQ|y4=~Bbw$Ns&0#R5H1k^rXN|L7Il)9%Hd+G;g$`=ybi^qRH+l`g!0 zCVj5d-Bd+sA^_%=?_V~V$ffm$9KmEia)jG-f|W@8wnSZTStRQ-fD0J z?T6Ds5!D|KEDdjBCiX8#V#s(5*&`B4e(JpgP6>X)?cdCRVl;-lPQXl^i|u=c9UBrR zOkxPM89G+*BGqHNapF_>jAI|AZ?iRYN1^E^0}isdN1G@g2`CpFYYQ6K>xEot4@k)Q zC53@6WPV+QOIxy|GOhU{mQOXj}K8M<>R2#Pr zOoAIK4sS>N;EWeS{i_K8-M2zDD*B%8ntKRpJ+{4sd5(@D&(eCrzJCVoWG(C zO7?ppI9|~Yb&OuH6Yd(v$V;Wsd7~M7hg5mayl;8EU_>M5KaTgdW+SxmdIdbJta)5h z62nZacg8N#t4xLaiBn8S`}W&(3!X-QF6ZrZHQlma&QhCaDc3PpK71!g$jfb=w+J4- z&b8!QX$+^9m7gURAFt+jb=s#vAKnmwlIO6l;4c-`@S`t2JTw#ho~C|-e+QN)U=vQA z5olm&g$TcaUEY!)-vY_B9eit?yCGTFB|C!fslOBcsqhzHgL!Ros#vHMC-#mECKo>C z^#yQ&Sv|Gs(yvms%*mfQwKlzycbTCghN#Q#n}*w+1t?abn%0^_caoS=4hqs-tfTfm z2rLikY5ATWO|uV*mwW1koc68DQXBLzyht=Mv5bQ7Jou^2*zzwmat}5##Bas@dO-QN zz-jEU8r8>C1MRb+%h*CrMtQuXN%gdr0 zW>{$M?-Y>?F*QkJcF(L_>Mu{-#m4Ta*9nL;&TD1E#{tb>@DNptkBX@aYNc97b@q&D zJZl7#brfxyiq5D9U-^sPqa}7V5v01;tunDE4gj8$PPd3YYw@9oP_9!D;o z9OFn0en}a?4zY9`d{2{bQSexHI^Ev%G;X}8GkBDOQ}b(LnJRkuY`17yNm-bI{-p8+ zGm&0r8dU2D>N?{G8)>=?gI(o}O-vdqBDX9Xk*`G&{Mno|aPB?luM?sawA9z|djL=S zqO?|9N2O{9KFyUA^;Z3uQAq?VE;Su~R^i>vw}8`$BI+TcpI*Rer3*19#X7ym-I?x2 zy!%6ph<3bEO>)u?ue`3Cd#xZAx+=9QHkpz~O!1 z9RPU^CcLK_AUED(M&%JbaQDDed;@uze^VP_J1WT^U`0Hm`cUxxY;!8RvV zax(*4BJ!p=k7oB=_&9@~vs@fSolW2I@p4)^U1jEgWA85j^jds$7wM&HZ*mx0&b)m(6fW^gh85zS^oPEzi`b@HhePOhDY>ig9?(_oErjC_6GUpoY z`dHV~K{f1|37&`eS&@3jvVg1OPA_prcZ|kdIKt@&RA;(oT!$BGY7~fND?M$$o?X5& z5ptQoXvikavdZ#k><{N0u|i$Wh{c?!Ahce8QVB!8^0{0fwIc-TKl8ItTjzO!$P z`daFMKf`E}v`J>)R{k4n`!cP?o}%9msPKRP<*fDZveOXgFOgda>}T{UkM3$5x}G4K z(7sC6!Q{>A828;Q@g2QP_X_bv&L!FIuuJ&J6tkvx6ZS4)y{Zqof=;RId4Etb-p2@w zI_?b`da{|FVu>u1zb5vWUDt%$(Ot-1hE%LNpde}55v@p+SQIj`Pau{~%wQiKB`dMscxtC-#UtnkU9GRi8H07Yn^|S2QlG?5uWg?8URa0iX2hp&Hu2We z{z)u)c5v5eGFyng!gwuns2M`CPErzOfTG#K3|{iHb?O-GCk{PARfnVJr)6I~I^Xqt zC1hZrQCeiG+_cS0&&%6np+>d@s>K_Ss-Tq@tEIS^Cqf^uT*B41zdB>Ll+PU!^VJSa+XO0HD2BUMACRnvFbfC#N#F`=AkLX(f`Z@JApAKTv_y{`FM?U{SoZWb#( z7TFkgbIFF$^CoyaYJcQp;id%mFxl_ToTqT*iRQ)9n!R-sHxuV&sxigzl@5#MEx457 zhSf2)1}}bF^X#KOQ^_gO4wd2Rk z_cWo{lj+664lhgF+VCSQ4TwWn$MlQ~g!){cd~1U6!$Q!)&eg44Cnh?V+iQVtYNd&w z`{?tjV%vo)M%*PUHrN^zT5+Fu$RgjvCsw`dXLTDhqJuRDd@>5Hc+0%LggWql>eO_z zg>Mrl^f>kHewx*>=)JnqHEttrV|O&hiYD#o514ZJWF*k4eiS|LG|YU(JHljj*>+F> zPg(@ysFtJ?d4A$ zrYd606zO>#+e$wG$IO(J_$2MJ&KUBUyD=b^^3?Hx3Ehc9%-p_}P#$#KP{0@>KF}5t}8Z z=R3~x!`J6MnuI+F700z}=3%{s-ic4bF)cY|(3-Qst6^i0!bI`Jaf6Zo(@RIrh3=RM z;cR*9N%Xg79bcc~N7?e>8ew6Z9}L4WUkQV9!$o{gX64jo1%|YHb)vKovZ&=ySif_9fCk^+qIB6=i=)vf zez+=YfGX(K%$4K&%ewUEF^MS!7?KR{)7lNcvTpEr{^qP=9vOo(@N@GSjP{fq+*n`l zf9G`zm+by5-P;>`KrS`W0IJ!;HTP(AJf7_3S9|rH@t7;tVR$|Ai0}*8^I=Qu`y(VA zh`ozR+1|BU_b}P`VGY-E;Wa1W7cEP86tTY$&O?t(i_S?U;;NwjGUSUfKbL@%M;;#+ zYZjATwDQI`D+bBJ+3&l{>Q@NO-P|9S`*zXwJjc$nD7k61o?~YKB1lE zd|rOF!ON-qRC>8=DIUUuR{ieAXPZa6w|KKtAAw%Pt%2T8Pwo!EB%sH91G~Z*i7x`3 z`l;i^)0Zy0#$vO3$r=f%h0<99(t3SJHEJtd3gTt~7v)6zA}ebtEA!6ShH1o9QbFkPeZ9N-$~&w6OOyis4QdmAwA1gc z_P^}i{|;J9e4f)zxQu_-!^Eh1Y2J>uD9QJu30m6xXgVt%L#$}UsxM8S&!m0nEhtkp z)Z(hKWn#W|u61`%i;xQA8i8rcCdYY2F9j#4zOh~e*|J$>%jLby7gVXS(Wm6X7ss~J}*M$fO_?m?tY`ir_gki7yWXU5DlYFKw!CNtgey*!5=$5`5(>(P>kJ z0V47=0zf^$nnuM0nEfyK=&ws~|ELc5tx=d#((em=EeksS+f6LrU!q14o)M)`$A*gz zigcQmX`?hH8$erV(nWw3gmoHfVOd3Hc^c$ra1mx-)oW7(ubB{GuLXBR|NM_Xr{||p g{M-uvDxN}xj0ZQL0$w@Vz%&i|C~yU4 Date: Fri, 25 Feb 2022 18:55:15 +0300 Subject: [PATCH 114/310] [GNA] Update documentation (#10570) --- docs/OV_Runtime_UG/supported_plugins/GNA.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/OV_Runtime_UG/supported_plugins/GNA.md b/docs/OV_Runtime_UG/supported_plugins/GNA.md index 3426b10549f..d54ff3c5e48 100644 --- a/docs/OV_Runtime_UG/supported_plugins/GNA.md +++ b/docs/OV_Runtime_UG/supported_plugins/GNA.md @@ -225,10 +225,10 @@ For the list of supported layers, see the **GNA** column of the **Supported Laye Limitations include: -- Only 1D convolutions are natively supported. +- Only 1D convolutions are natively supported on the HW prior to GNA 3.0; 2D convolutions have specific limitations (see the table above). - The number of output channels for convolutions must be a multiple of 4. - The maximum number of filters is 65532 for GNA 2.0 and 8192 for GNA 3.0. -- Permute layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8. +- Transpose layer support is limited to the cases where no data reordering is needed or when reordering is happening for two dimensions, at least one of which is not greater than 8. - Splits and concatenations are supported for continuous portions of memory (e.g., split of 1,2,3,4 to 1,1,3,4 and 1,1,3,4 or concats of 1,2,3,4 and 1,2,3,5 to 2,2,3,4). - For Multiply, Add and Subtract layers, auto broadcasting is only supported for constant inputs. @@ -236,9 +236,9 @@ Limitations include: The Intel® GNA 1.0 and 2.0 hardware natively supports only 1D convolutions. -However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Permute` layers before or after convolutions. +However, 2D convolutions can be mapped to 1D when a convolution kernel moves in a single direction. GNA Plugin performs such a transformation for Kaldi `nnet1` convolution. From this perspective, the Intel® GNA hardware convolution operation accepts an `NHWC` input and produces an `NHWC` output. Because OpenVINO™ only supports the `NCHW` layout, you may need to insert `Transpose` layers before or after convolutions. -For example, the Kaldi model optimizer inserts such a permute after convolution for the [rm_cnn4a network](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/rm_cnn4a_smbr/). This `Permute` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result. +For example, the Kaldi model optimizer inserts such a transpose after convolution for the [rm_cnn4a network](https://storage.openvinotoolkit.org/models_contrib/speech/2021.2/rm_cnn4a_smbr/). This `Transpose` layer is automatically removed by the GNA Plugin, because the Intel® GNA hardware convolution layer already produces the required `NHWC` result. ## Operation Precision From 5724c5ac44e912ae1cd11543fae00526be436ba8 Mon Sep 17 00:00:00 2001 From: Andrey Zaytsev Date: Fri, 25 Feb 2022 23:42:00 +0300 Subject: [PATCH 115/310] Image added (#10674) --- docs/_static/images/custom_dataset_imported.png | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/_static/images/custom_dataset_imported.png diff --git a/docs/_static/images/custom_dataset_imported.png b/docs/_static/images/custom_dataset_imported.png new file mode 100644 index 00000000000..2497a10a381 --- /dev/null +++ b/docs/_static/images/custom_dataset_imported.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aed8f8a3d61e764b12cf8f1c99db3d4c42f17008d4b3367a36aefec761e7840b +size 19079 From f55e69d65610a17632dfe950f2922fbd792e4516 Mon Sep 17 00:00:00 2001 From: Fedor Zharinov Date: Mon, 28 Feb 2022 12:26:41 +0300 Subject: [PATCH 116/310] Legacy benchmark_app is added (#10239) * Legacy benchmark_app is added * apply fix for supporting multiple -i arguments * new CMakeLists.txt with OpenCV auto detection * fixes * docs * docs2 * Docs changes * docs * CMakeLists.txt modification * Update tools/legacy/benchmark_app/README.md Co-authored-by: ivikhrev Co-authored-by: Vladimir Dudnik --- tools/CMakeLists.txt | 1 + tools/legacy/benchmark_app/CMakeLists.txt | 37 + tools/legacy/benchmark_app/README.md | 186 +++++ tools/legacy/benchmark_app/args_helper.cpp | 216 ++++++ tools/legacy/benchmark_app/args_helper.hpp | 32 + tools/legacy/benchmark_app/benchmark_app.hpp | 299 ++++++++ tools/legacy/benchmark_app/common.hpp | 166 +++++ .../legacy/benchmark_app/console_progress.hpp | 107 +++ tools/legacy/benchmark_app/csv_dumper.hpp | 98 +++ .../benchmark_app/infer_request_wrap.hpp | 146 ++++ tools/legacy/benchmark_app/inputs_filling.cpp | 356 +++++++++ tools/legacy/benchmark_app/inputs_filling.hpp | 15 + tools/legacy/benchmark_app/main.cpp | 704 ++++++++++++++++++ tools/legacy/benchmark_app/progress_bar.hpp | 51 ++ tools/legacy/benchmark_app/slog.cpp | 33 + tools/legacy/benchmark_app/slog.hpp | 74 ++ .../benchmark_app/statistics_report.cpp | 143 ++++ .../benchmark_app/statistics_report.hpp | 70 ++ tools/legacy/benchmark_app/utils.cpp | 194 +++++ tools/legacy/benchmark_app/utils.hpp | 123 +++ tools/legacy/benchmark_app/w_dirent.h | 176 +++++ 21 files changed, 3227 insertions(+) create mode 100644 tools/legacy/benchmark_app/CMakeLists.txt create mode 100644 tools/legacy/benchmark_app/README.md create mode 100644 tools/legacy/benchmark_app/args_helper.cpp create mode 100644 tools/legacy/benchmark_app/args_helper.hpp create mode 100644 tools/legacy/benchmark_app/benchmark_app.hpp create mode 100644 tools/legacy/benchmark_app/common.hpp create mode 100644 tools/legacy/benchmark_app/console_progress.hpp create mode 100644 tools/legacy/benchmark_app/csv_dumper.hpp create mode 100644 tools/legacy/benchmark_app/infer_request_wrap.hpp create mode 100644 tools/legacy/benchmark_app/inputs_filling.cpp create mode 100644 tools/legacy/benchmark_app/inputs_filling.hpp create mode 100644 tools/legacy/benchmark_app/main.cpp create mode 100644 tools/legacy/benchmark_app/progress_bar.hpp create mode 100644 tools/legacy/benchmark_app/slog.cpp create mode 100644 tools/legacy/benchmark_app/slog.hpp create mode 100644 tools/legacy/benchmark_app/statistics_report.cpp create mode 100644 tools/legacy/benchmark_app/statistics_report.hpp create mode 100644 tools/legacy/benchmark_app/utils.cpp create mode 100644 tools/legacy/benchmark_app/utils.hpp create mode 100644 tools/legacy/benchmark_app/w_dirent.h diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 0afffbb05c9..f3c19240b57 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -6,6 +6,7 @@ # add_subdirectory(compile_tool) +add_subdirectory(legacy/benchmark_app) # # Python tools diff --git a/tools/legacy/benchmark_app/CMakeLists.txt b/tools/legacy/benchmark_app/CMakeLists.txt new file mode 100644 index 00000000000..732058d89b1 --- /dev/null +++ b/tools/legacy/benchmark_app/CMakeLists.txt @@ -0,0 +1,37 @@ +# Copyright (C) 2018-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +set(TARGET_NAME benchmark_app_legacy) + +file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp) + +if(OpenVINO_SOURCE_DIR) + set(OpenVINO_DIR "${CMAKE_BINARY_DIR}") +endif() + +source_group("src" FILES ${SRC}) +source_group("include" FILES ${HDR}) + +add_executable(${TARGET_NAME} ${SRC} ${HDR}) + +find_package(OpenVINO REQUIRED COMPONENTS Runtime) + +set_target_properties(${TARGET_NAME} PROPERTIES + COMPILE_PDB_NAME ${TARGET_NAME} + FOLDER tools +) + +if (CMAKE_COMPILER_IS_GNUCXX) + target_compile_options(${TARGET_NAME} PRIVATE -Wall) +endif() + +find_package(OpenCV COMPONENTS core QUIET) +if(NOT OpenCV_FOUND) + message(WARNING "OpenCV is disabled or not found, ${TARGET_NAME} will be built without OpenCV support. Set OpenCV_DIR") + target_link_libraries(${TARGET_NAME} PRIVATE nlohmann_json_schema_validator openvino::runtime format_reader gflags) +else() + target_compile_definitions(${TARGET_NAME} PRIVATE USE_OPENCV) + target_link_libraries(${TARGET_NAME} PRIVATE opencv_core openvino::runtime format_reader gflags) +endif() diff --git a/tools/legacy/benchmark_app/README.md b/tools/legacy/benchmark_app/README.md new file mode 100644 index 00000000000..4651447ce7b --- /dev/null +++ b/tools/legacy/benchmark_app/README.md @@ -0,0 +1,186 @@ +# Benchmark C++ Tool + +This topic demonstrates how to use the Benchmark C++ Tool to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous (latency-oriented) and asynchronous (throughput-oriented). + +> **NOTE**: This topic describes usage of C++ implementation of the Benchmark Tool. For the Python* implementation, refer to [Benchmark Python* Tool](../../benchmark_tool/README.md). + + +## How It Works + +Upon start-up, the application reads command-line parameters and loads a network and images/binary files to the Inference Engine plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend on the mode defined with the `-api` command-line parameter. + +> **NOTE**: By default, Inference Engine samples, tools and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](../../../docs/MO_DG/prepare_model/convert_model/Converting_Model.md). + +If you run the application in the synchronous mode, it creates one infer request and executes the `Infer` method. +If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq` command-line parameter and executes the `StartAsync` method for each of them. If `-nireq` is not set, the application will use the default value for specified device. + +A number of execution steps is defined by one of the following parameters: +* Number of iterations specified with the `-niter` command-line argument +* Time duration specified with the `-t` command-line argument +* Both of them (execution will continue until both conditions are met) +* Predefined duration if `-niter` and `-t` are not specified. Predefined duration value depends on a device. + +During the execution, the application collects latency for each executed infer request. + +Reported latency value is calculated as a median value of all collected latencies. Reported throughput value is reported +in frames per second (FPS) and calculated as a derivative from: +* Reported latency in the Sync mode +* The total execution time in the Async mode + +Throughput value also depends on batch size. + +The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you +enable statistics dumping by setting the `-report_type` parameter to one of the possible values: +* `no_counters` report includes configuration options specified, resulting FPS and latency. +* `average_counters` report extends the `no_counters` report and additionally includes average PM counters values for each layer from the network. +* `detailed_counters` report extends the `average_counters` report and additionally includes per-layer PM counters and latency for each executed infer request. + +Depending on the type, the report is stored to `benchmark_no_counters_report.csv`, `benchmark_average_counters_report.csv`, +or `benchmark_detailed_counters_report.csv` file located in the path specified in `-report_folder`. + +The application also saves executable graph information serialized to an XML file if you specify a path to it with the +`-exec_graph_path` parameter. + +## Building +This tools can be built as part of OpenVINO, during standard building process. More information about building OpenVINO can be found here[Build OpenVINO Inference Engine](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode) + +## Run the Tool + +Note that the benchmark_app usually produces optimal performance for any device out of the box. + +**So in most cases you don't need to play the app options explicitly and the plain device name is enough**, for example, for CPU: +```sh +./benchmark_app -m -i -d CPU +``` + +But it is still may be sub-optimal for some cases, especially for very small networks. More details can read in [Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md). + +As explained in the [Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md) section, for all devices, including new [MULTI device](../../../docs/OV_Runtime_UG/supported_plugins/MULTI.md) it is preferable to use the FP16 IR for the model. +Also if latency of the CPU inference on the multi-socket machines is of concern, please refer to the same +[Performance Optimization Guide](../../../docs/optimization_guide/dldt_optimization_guide.md). + +Running the application with the `-h` option yields the following usage message: +``` +./benchmark_app -h +InferenceEngine: + API version ............ + Build .................. +[ INFO ] Parsing input parameters + +benchmark_app [OPTION] +Options: + + -h, --help Print a usage message + -m "" Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with a trained compiled model. + -i "" Optional. Path to a folder with images and/or binaries or to specific image or binary file. + -d "" Optional. Specify a target device to infer on (the list of available devices is shown below). Default value is CPU. + Use "-d HETERO:" format to specify HETERO plugin. + Use "-d MULTI:" format to specify MULTI plugin. + The application looks for a suitable plugin for the specified device. + -l "" Required for CPU custom layers. Absolute path to a shared library with the kernels implementations. + Or + -c "" Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. + -api "" Optional. Enable Sync/Async API. Default value is "async". + -niter "" Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. + -nireq "" Optional. Number of infer requests. Default value is determined automatically for a device. + -b "" Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation. + -stream_output Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output. + -t Optional. Time, in seconds, to execute topology. + -progress Optional. Show progress bar (can affect performance measurement). Default values is "false". + -shape Optional. Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size. + -layout Optional. Prompts how network layouts should be treated by application. For example, "input1[NCHW],input2[NC]" or "[NCHW]" in case of one input size. + -cache_dir "" Optional. Enables caching of loaded models to specified directory. + -load_from_file Optional. Loads model from file directly without ReadNetwork. + + CPU-specific performance options: + -nstreams "" Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices + (for HETERO and MULTI device cases use format :,: or just ). + Default value is determined automatically for a device. + Please note that although the automatic selection usually provides a reasonable performance, + it still may be non-optimal for some cases, especially for very small networks. + Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency + estimations the number of streams should be set to 1. + -nthreads "" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases). + -enforcebf16="" Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform. + -pin "YES"/"HYBRID_AWARE"/"NUMA"/"NO" + Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice): + enabling threads->cores pinning ("YES", which is already default for a conventional CPU), + letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs) + threads->(NUMA)nodes ("NUMA") or + completely disable ("NO") CPU inference threads pinning. + -ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network. + -op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network. + -iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required. Overwrites precision from ip and op options for specified layers. + + Statistics dumping options: + -report_type "" Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "average_counters" report extends "no_counters" report and additionally includes average PM counters values for each layer from the network. "detailed_counters" report extends "average_counters" report and additionally includes per-layer PM counters and latency for each executed infer request. + -report_folder Optional. Path to a folder where statistics report is stored. + -exec_graph_path Optional. Path to a file where to store executable graph information serialized. + -pc Optional. Report performance counters. + -dump_config Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application. + -load_config Optional. Path to XML/YAML/JSON file to load custom IE parameters. Please note, command line parameters have higher priority then parameters from configuration file. +``` + +Running the application with the empty list of options yields the usage message given above and an error message. + +Application supports topologies with one or more inputs. If a topology is not data-sensitive, you can skip the input parameter. In this case, inputs are filled with random values. +If a model has only image input(s), please provide a folder with images or a path to an image as input. +If a model has some specific input(s) (not images), please prepare a binary file(s) that is filled with data of appropriate precision and provide a path to them as input. +If a model has mixed input types, input folder should contain all required files. Image inputs are filled with image files one by one. Binary inputs are filled with binary inputs one by one. + +To run the tool, you can use [public](@ref omz_models_group_public) or [Intel's](@ref omz_models_group_intel) pre-trained models from the Open Model Zoo. The models can be downloaded using the [Model Downloader](@ref omz_tools_downloader). + +> **NOTE**: Before running the tool with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md). +> +> The sample accepts models in ONNX format (.onnx) that do not require preprocessing. + +## Examples of Running the Tool + +This section provides step-by-step instructions on how to run the Benchmark Tool with the `googlenet-v1` public model on CPU or GPU devices. As an input, the `car.png` file from the `/samples/scripts/` directory is used. + +> **NOTE**: The Internet access is required to execute the following steps successfully. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment. + +1. Download the model. Install openvino-dev package into Python virtual environment from PyPi and run omz_downloader: + ```sh + omz_downloader --name googlenet-v1 -o +2. Convert the model to the Inference Engine IR format. Run the Model Optimizer using the `mo` command with the path to the model, model format (which must be FP32 for CPU and FPG) and output directory to generate the IR files: + ```sh + mo --input_model /public/googlenet-v1/googlenet-v1.caffemodel --data_type FP32 --output_dir + ``` +3. Run the tool with specifying the `/samples/scripts/car.png` file as an input image, the IR of the `googlenet-v1` model and a device to perform inference on. The following commands demonstrate running the Benchmark Tool in the asynchronous mode on CPU and GPU devices: + + * On CPU: + ```sh + ./benchmark_app -m /googlenet-v1.xml -i /samples/scripts/car.png -d CPU -api async --progress true + ``` + * On GPU: + ```sh + ./benchmark_app -m /googlenet-v1.xml -i /samples/scripts/car.png -d GPU -api async --progress true + ``` + +The application outputs the number of executed iterations, total duration of execution, latency, and throughput. +Additionally, if you set the `-report_type` parameter, the application outputs statistics report. If you set the `-pc` parameter, the application outputs performance counters. If you set `-exec_graph_path`, the application reports executable graph information serialized. All measurements including per-layer PM counters are reported in milliseconds. + +Below are fragments of sample output: + + ``` + [Step 10/11] Measuring performance (Start inference asynchronously, 4 inference requests using 4 streams for CPU, limits: 60000 ms duration) + [ INFO ] BENCHMARK IS IN INFERENCE ONLY MODE. + [ INFO ] Input blobs will be filled once before performance measurements. + [ INFO ] First inference took 26.26 ms + Progress: [................... ] 99% done + + [Step 11/11] Dumping statistics report + [ INFO ] Count: 6640 iterations + [ INFO ] Duration: 60039.70 ms + [ INFO ] Latency: + [ INFO ] Median: 35.36 ms + [ INFO ] Avg: 36.12 ms + [ INFO ] Min: 18.55 ms + [ INFO ] Max: 88.96 ms + [ INFO ] Throughput: 110.59 FPS + ``` + +## See Also +* [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) +* [Model Downloader](@ref omz_tools_downloader) diff --git a/tools/legacy/benchmark_app/args_helper.cpp b/tools/legacy/benchmark_app/args_helper.cpp new file mode 100644 index 00000000000..ba6e2fe9c65 --- /dev/null +++ b/tools/legacy/benchmark_app/args_helper.cpp @@ -0,0 +1,216 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "args_helper.hpp" + +#include +#include + +#include +#include "slog.hpp" + +#ifdef _WIN32 + #include "w_dirent.h" +#else + #include +#endif + +/** + * @brief Checks input file argument and add it to files vector + * @param files reference to vector to store file names + * @param arg file or folder name + * @return none + */ +void readInputFilesArguments(std::vector& files, const std::string& arg) { + struct stat sb; + if (stat(arg.c_str(), &sb) != 0) { + slog::warn << "File " << arg << " cannot be opened!" << slog::endl; + return; + } + if (S_ISDIR(sb.st_mode)) { + struct CloseDir { + void operator()(DIR* d) const noexcept { + if (d) { + closedir(d); + } + } + }; + using Dir = std::unique_ptr; + Dir dp(opendir(arg.c_str())); + if (dp == nullptr) { + slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; + return; + } + + struct dirent* ep; + while (nullptr != (ep = readdir(dp.get()))) { + std::string fileName = ep->d_name; + if (fileName == "." || fileName == "..") + continue; + files.push_back(arg + "/" + ep->d_name); + } + } else { + files.push_back(arg); + } +} + +/** + * @brief This function find -i key in input args. It's necessary to process multiple values for + * single key + * @param files reference to vector + * @return none. + */ +void parseInputFilesArguments(std::vector& files) { + std::vector args = gflags::GetArgvs(); + auto args_it = begin(args); + const auto is_image_arg = [](const std::string& s) { + return s == "-i" || s == "--images"; + }; + const auto is_arg = [](const std::string& s) { + return s.front() == '-'; + }; + + while (args_it != args.end()) { + const auto img_start = std::find_if(args_it, end(args), is_image_arg); + if (img_start == end(args)) { + break; + } + const auto img_begin = std::next(img_start); + const auto img_end = std::find_if(img_begin, end(args), is_arg); + for (auto img = img_begin; img != img_end; ++img) { + readInputFilesArguments(files, *img); + } + args_it = img_end; + } + + if (files.empty()) { + return; + } + size_t max_files = 20; + if (files.size() < max_files) { + slog::info << "Files were added: " << files.size() << slog::endl; + for (const auto& filePath : files) { + slog::info << " " << filePath << slog::endl; + } + } else { + slog::info << "Files were added: " << files.size() << ". Too many to display each of them." << slog::endl; + } +} + +namespace { +std::vector splitStringList(const std::string& str, char delim) { + if (str.empty()) + return {}; + + std::istringstream istr(str); + + std::vector result; + std::string elem; + while (std::getline(istr, elem, delim)) { + if (elem.empty()) { + continue; + } + result.emplace_back(std::move(elem)); + } + + return result; +} + +std::map parseArgMap(std::string argMap) { + argMap.erase(std::remove_if(argMap.begin(), argMap.end(), ::isspace), argMap.end()); + + const auto pairs = splitStringList(argMap, ','); + + std::map parsedMap; + for (auto&& pair : pairs) { + const auto keyValue = splitStringList(pair, ':'); + if (keyValue.size() != 2) { + throw std::invalid_argument("Invalid key/value pair " + pair + ". Expected :"); + } + + parsedMap[keyValue[0]] = keyValue[1]; + } + + return parsedMap; +} + +using supported_precisions_t = std::unordered_map; + +InferenceEngine::Precision getPrecision(std::string value, const supported_precisions_t& supported_precisions) { + std::transform(value.begin(), value.end(), value.begin(), ::toupper); + + const auto precision = supported_precisions.find(value); + if (precision == supported_precisions.end()) { + throw std::logic_error("\"" + value + "\"" + " is not a valid precision"); + } + + return precision->second; +} + +InferenceEngine::Precision getPrecision(const std::string& value) { + static const supported_precisions_t supported_precisions = { + {"FP32", InferenceEngine::Precision::FP32}, {"FP16", InferenceEngine::Precision::FP16}, {"BF16", InferenceEngine::Precision::BF16}, + {"U64", InferenceEngine::Precision::U64}, {"I64", InferenceEngine::Precision::I64}, {"U32", InferenceEngine::Precision::U32}, + {"I32", InferenceEngine::Precision::I32}, {"U16", InferenceEngine::Precision::U16}, {"I16", InferenceEngine::Precision::I16}, + {"U8", InferenceEngine::Precision::U8}, {"I8", InferenceEngine::Precision::I8}, {"BOOL", InferenceEngine::Precision::BOOL}, + }; + + return getPrecision(value, supported_precisions); +} + +void setPrecisions(const InferenceEngine::CNNNetwork& network, const std::string& iop) { + const auto user_precisions_map = parseArgMap(iop); + + auto inputs = network.getInputsInfo(); + auto outputs = network.getOutputsInfo(); + + for (auto&& item : user_precisions_map) { + const auto& layer_name = item.first; + const auto& user_precision = item.second; + + const auto input = inputs.find(layer_name); + const auto output = outputs.find(layer_name); + + if (input != inputs.end()) { + input->second->setPrecision(getPrecision(user_precision)); + } else if (output != outputs.end()) { + output->second->setPrecision(getPrecision(user_precision)); + } else { + throw std::logic_error(layer_name + " is not an input neither output"); + } + } +} + +} // namespace + +void processPrecision(InferenceEngine::CNNNetwork& network, const std::string& ip, const std::string& op, const std::string& iop) { + if (!ip.empty()) { + const auto user_precision = getPrecision(ip); + for (auto&& layer : network.getInputsInfo()) { + layer.second->setPrecision(user_precision); + } + } + + if (!op.empty()) { + auto user_precision = getPrecision(op); + for (auto&& layer : network.getOutputsInfo()) { + layer.second->setPrecision(user_precision); + } + } + + if (!iop.empty()) { + setPrecisions(network, iop); + } +} + +void printInputAndOutputsInfo(const InferenceEngine::CNNNetwork& network) { + std::cout << "Network inputs:" << std::endl; + for (auto&& layer : network.getInputsInfo()) { + std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl; + } + std::cout << "Network outputs:" << std::endl; + for (auto&& layer : network.getOutputsInfo()) { + std::cout << " " << layer.first << " : " << layer.second->getPrecision() << " / " << layer.second->getLayout() << std::endl; + } +} diff --git a/tools/legacy/benchmark_app/args_helper.hpp b/tools/legacy/benchmark_app/args_helper.hpp new file mode 100644 index 00000000000..2ffabc0a90a --- /dev/null +++ b/tools/legacy/benchmark_app/args_helper.hpp @@ -0,0 +1,32 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality + * @file args_helper.hpp + */ + +#pragma once + +#include +#include +#include + +/** + * @brief This function checks input args and existence of specified files in a given folder + * @param arg path to a file to be checked for existence + * @return files updated vector of verified input files + */ +void readInputFilesArguments(std::vector& files, const std::string& arg); + +/** + * @brief This function find -i/--images key in input args + * It's necessary to process multiple values for single key + * @return files updated vector of verified input files + */ +void parseInputFilesArguments(std::vector& files); + +void processPrecision(InferenceEngine::CNNNetwork& network, const std::string& ip, const std::string& op, const std::string& iop); + +void printInputAndOutputsInfo(const InferenceEngine::CNNNetwork& network); diff --git a/tools/legacy/benchmark_app/benchmark_app.hpp b/tools/legacy/benchmark_app/benchmark_app.hpp new file mode 100644 index 00000000000..af18c908e31 --- /dev/null +++ b/tools/legacy/benchmark_app/benchmark_app.hpp @@ -0,0 +1,299 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include + +/// @brief message for help argument +static const char help_message[] = "Print a usage message"; + +/// @brief message for images argument +static const char input_message[] = "Optional. Path to a folder with images and/or binaries or to specific image or binary file."; + +/// @brief message for model argument +static const char model_message[] = "Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with " + "a trained compiled model."; + +/// @brief message for execution mode +static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\"."; + +/// @brief message for assigning cnn calculation to device +static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). " + "Default value is CPU. Use \"-d HETERO:\" format to specify " + "HETERO plugin. " + "Use \"-d MULTI:\" format to specify MULTI plugin. " + "The application looks for a suitable plugin for the specified device."; + +/// @brief message for iterations count +static const char iterations_count_message[] = "Optional. Number of iterations. " + "If not specified, the number of iterations is calculated depending on a device."; + +/// @brief message for requests count +static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is determined automatically for device."; + +/// @brief message for execution time +static const char execution_time_message[] = "Optional. Time in seconds to execute topology."; + +/// @brief message for #threads for CPU inference +static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU " + "(including HETERO and MULTI cases)."; + +/// @brief message for #streams for CPU inference +static const char infer_num_streams_message[] = "Optional. Number of streams to use for inference on the CPU, GPU or MYRIAD devices " + "(for HETERO and MULTI device cases use format :,: or just " + "). " + "Default value is determined automatically for a device.Please note that although the " + "automatic selection " + "usually provides a reasonable performance, it still may be non - optimal for some cases, " + "especially for " + "very small networks. See sample's README for more details. " + "Also, using nstreams>1 is inherently throughput-oriented option, " + "while for the best-latency estimations the number of streams should be set to 1."; + +/// @brief message for enforcing of BF16 execution where it is possible +static const char enforce_bf16_message[] = "Optional. By default floating point operations execution in bfloat16 precision are enforced " + "if supported by platform.\n" + " 'true' - enable bfloat16 regardless of platform support\n" + " 'false' - disable bfloat16 regardless of platform support"; + +/// @brief message for user library argument +static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels " + "implementations."; + +/// @brief message for clDNN custom kernels desc +static const char custom_cldnn_message[] = "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description."; + +static const char batch_size_message[] = "Optional. Batch size value. If not specified, the batch size value is determined from " + "Intermediate Representation."; + +// @brief message for CPU threads pinning option +static const char infer_threads_pinning_message[] = + "Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):\n" + "\t\t\t\tenabling threads->cores pinning(\"YES\", which is already default for any conventional CPU), \n" + "\t\t\t\tletting the runtime to decide on the threads->different core types(\"HYBRID_AWARE\", which is default on the hybrid CPUs) \n" + "\t\t\t\tthreads->(NUMA)nodes(\"NUMA\") or \n" + "\t\t\t\tcompletely disable(\"NO\") CPU inference threads pinning"; +// @brief message for stream_output option +static const char stream_output_message[] = "Optional. Print progress as a plain text. When specified, an interactive progress bar is " + "replaced with a " + "multiline output."; + +// @brief message for report_type option +static const char report_type_message[] = "Optional. Enable collecting statistics report. \"no_counters\" report contains " + "configuration options specified, resulting FPS and latency. \"average_counters\" " + "report extends \"no_counters\" report and additionally includes average PM " + "counters values for each layer from the network. \"detailed_counters\" report " + "extends \"average_counters\" report and additionally includes per-layer PM " + "counters and latency for each executed infer request."; + +// @brief message for report_folder option +static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored."; + +// @brief message for exec_graph_path option +static const char exec_graph_path_message[] = "Optional. Path to a file where to store executable graph information serialized."; + +// @brief message for progress bar option +static const char progress_message[] = "Optional. Show progress bar (can affect performance measurement). Default values is " + "\"false\"."; + +// @brief message for performance counters option +static const char pc_message[] = "Optional. Report performance counters."; + +#ifdef USE_OPENCV +// @brief message for load config option +static const char load_config_message[] = "Optional. Path to XML/YAML/JSON file to load custom IE parameters." + " Please note, command line parameters have higher priority then parameters from configuration " + "file."; + +// @brief message for dump config option +static const char dump_config_message[] = "Optional. Path to XML/YAML/JSON file to dump IE parameters, which were set by application."; +#endif + +static const char shape_message[] = "Optional. Set shape for input. For example, \"input1[1,3,224,224],input2[1,4]\" or " + "\"[1,3,224,224]\"" + " in case of one input size."; + +static const char layout_message[] = "Optional. Prompts how network layouts should be treated by application. " + "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size."; + +// @brief message for enabling caching +static const char cache_dir_message[] = "Optional. Enables caching of loaded models to specified directory. " + "List of devices which support caching is shown at the end of this message."; + +// @brief message for single load network +static const char load_from_file_message[] = "Optional. Loads model from file directly without ReadNetwork." + "All CNNNetwork options (like re-shape) will be ignored"; + +// @brief message for quantization bits +static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)"; + +static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network."; + +static constexpr char outputs_precision_message[] = "Optional. Specifies precision for all output layers of the network."; + +static constexpr char iop_message[] = "Optional. Specifies precision for input and output layers by name.\n" + " Example: -iop \"input:FP16, output:FP16\".\n" + " Notice that quotes are required.\n" + " Overwrites precision from ip and op options for " + "specified layers."; + +/// @brief Define flag for showing help message
+DEFINE_bool(h, false, help_message); + +/// @brief Declare flag for showing help message
+DECLARE_bool(help); + +/// @brief Define parameter for set image file
+/// i or mif is a required parameter +DEFINE_string(i, "", input_message); + +/// @brief Define parameter for set model file
+/// It is a required parameter +DEFINE_string(m, "", model_message); + +/// @brief Define execution mode +DEFINE_string(api, "async", api_message); + +/// @brief device the target device to infer on
+DEFINE_string(d, "CPU", target_device_message); + +/// @brief Absolute path to CPU library with user layers
+/// It is a required parameter +DEFINE_string(l, "", custom_cpu_library_message); + +/// @brief Define parameter for clDNN custom kernels path
+/// Default is ./lib +DEFINE_string(c, "", custom_cldnn_message); + +/// @brief Iterations count (default 0) +/// Sync mode: iterations count +/// Async mode: StartAsync counts +DEFINE_uint32(niter, 0, iterations_count_message); + +/// @brief Time to execute topology in seconds +DEFINE_uint32(t, 0, execution_time_message); + +/// @brief Number of infer requests in parallel +DEFINE_uint32(nireq, 0, infer_requests_count_message); + +/// @brief Number of threads to use for inference on the CPU in throughput mode (also affects Hetero +/// cases) +DEFINE_uint32(nthreads, 0, infer_num_threads_message); + +/// @brief Number of streams to use for inference on the CPU (also affects Hetero cases) +DEFINE_string(nstreams, "", infer_num_streams_message); + +/// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability +DEFINE_bool(enforcebf16, false, enforce_bf16_message); + +/// @brief Define parameter for batch size
+/// Default is 0 (that means don't specify) +DEFINE_uint32(b, 0, batch_size_message); + +// @brief Enable plugin messages +DEFINE_string(pin, "", infer_threads_pinning_message); + +/// @brief Enables multiline text output instead of progress bar +DEFINE_bool(stream_output, false, stream_output_message); + +/// @brief Enables statistics report collecting +DEFINE_string(report_type, "", report_type_message); + +/// @brief Path to a folder where statistics report is stored +DEFINE_string(report_folder, "", report_folder_message); + +/// @brief Path to a file where to store executable graph information serialized +DEFINE_string(exec_graph_path, "", exec_graph_path_message); + +/// @brief Define flag for showing progress bar
+DEFINE_bool(progress, false, progress_message); + +/// @brief Define flag for showing performance counters
+DEFINE_bool(pc, false, pc_message); + +#ifdef USE_OPENCV +/// @brief Define flag for loading configuration file
+DEFINE_string(load_config, "", load_config_message); + +/// @brief Define flag for dumping configuration file
+DEFINE_string(dump_config, "", dump_config_message); +#endif + +/// @brief Define flag for input shape
+DEFINE_string(shape, "", shape_message); + +/// @brief Define flag for layout shape
+DEFINE_string(layout, "", layout_message); + +/// @brief Define flag for quantization bits (default 16) +DEFINE_int32(qb, 16, gna_qb_message); + +/// @brief Specify precision for all input layers of the network +DEFINE_string(ip, "", inputs_precision_message); + +/// @brief Specify precision for all ouput layers of the network +DEFINE_string(op, "", outputs_precision_message); + +/// @brief Specify precision for input and output layers by name.\n" +/// Example: -iop \"input:FP16, output:FP16\".\n" +/// Notice that quotes are required.\n" +/// Overwrites layout from ip and op options for specified layers."; +DEFINE_string(iop, "", iop_message); + +/// @brief Define parameter for cache model dir
+DEFINE_string(cache_dir, "", cache_dir_message); + +/// @brief Define flag for load network from model file by name without ReadNetwork
+DEFINE_bool(load_from_file, false, load_from_file_message); + +/** + * @brief This function show a help message + */ +static void showUsage() { + std::cout << std::endl; + std::cout << "benchmark_app [OPTION]" << std::endl; + std::cout << "Options:" << std::endl; + std::cout << std::endl; + std::cout << " -h, --help " << help_message << std::endl; + std::cout << " -m \"\" " << model_message << std::endl; + std::cout << " -i \"\" " << input_message << std::endl; + std::cout << " -d \"\" " << target_device_message << std::endl; + std::cout << " -l \"\" " << custom_cpu_library_message << std::endl; + std::cout << " Or" << std::endl; + std::cout << " -c \"\" " << custom_cldnn_message << std::endl; + std::cout << " -api \"\" " << api_message << std::endl; + std::cout << " -niter \"\" " << iterations_count_message << std::endl; + std::cout << " -nireq \"\" " << infer_requests_count_message << std::endl; + std::cout << " -b \"\" " << batch_size_message << std::endl; + std::cout << " -stream_output " << stream_output_message << std::endl; + std::cout << " -t " << execution_time_message << std::endl; + std::cout << " -progress " << progress_message << std::endl; + std::cout << " -shape " << shape_message << std::endl; + std::cout << " -layout " << layout_message << std::endl; + std::cout << " -cache_dir \"\" " << cache_dir_message << std::endl; + std::cout << " -load_from_file " << load_from_file_message << std::endl; + std::cout << std::endl << " device-specific performance options:" << std::endl; + std::cout << " -nstreams \"\" " << infer_num_streams_message << std::endl; + std::cout << " -nthreads \"\" " << infer_num_threads_message << std::endl; + std::cout << " -enforcebf16= " << enforce_bf16_message << std::endl; + std::cout << " -pin \"YES\"/\"HYBRID_AWARE\"/\"NO\"/\"NUMA\" " << infer_threads_pinning_message << std::endl; + std::cout << std::endl << " Statistics dumping options:" << std::endl; + std::cout << " -report_type \"\" " << report_type_message << std::endl; + std::cout << " -report_folder " << report_folder_message << std::endl; + std::cout << " -exec_graph_path " << exec_graph_path_message << std::endl; + std::cout << " -pc " << pc_message << std::endl; +#ifdef USE_OPENCV + std::cout << " -dump_config " << dump_config_message << std::endl; + std::cout << " -load_config " << load_config_message << std::endl; +#endif + std::cout << " -qb " << gna_qb_message << std::endl; + std::cout << " -ip " << inputs_precision_message << std::endl; + std::cout << " -op " << outputs_precision_message << std::endl; + std::cout << " -iop \"\" " << iop_message << std::endl; +} diff --git a/tools/legacy/benchmark_app/common.hpp b/tools/legacy/benchmark_app/common.hpp new file mode 100644 index 00000000000..ac423db96c8 --- /dev/null +++ b/tools/legacy/benchmark_app/common.hpp @@ -0,0 +1,166 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with common samples functionality + * @file common.hpp + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef UNUSED + #if defined(_MSC_VER) && !defined(__clang__) + #define UNUSED + #else + #define UNUSED __attribute__((unused)) + #endif +#endif +\ + +/** + * @brief Get extension from filename + * @param filename - name of the file which extension should be extracted + * @return string with extracted file extension + */ +inline std::string fileExt(const std::string& filename) { + auto pos = filename.rfind('.'); + if (pos == std::string::npos) + return ""; + return filename.substr(pos + 1); +} + +inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Version& version) { + os << "\t" << version.description << " version ......... "; + os << IE_VERSION_MAJOR << "." << IE_VERSION_MINOR << "." << IE_VERSION_PATCH; + + os << "\n\tBuild ........... "; + os << version.buildNumber; + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const InferenceEngine::Version* version) { + if (nullptr != version) { + os << std::endl << *version; + } + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const std::map& versions) { + for (auto&& version : versions) { + os << "\t" << version.first << std::endl; + os << version.second << std::endl; + } + + return os; +} + +static std::vector> perfCountersSorted( + std::map perfMap) { + using perfItem = std::pair; + std::vector sorted; + for (auto& kvp : perfMap) + sorted.push_back(kvp); + + std::stable_sort(sorted.begin(), sorted.end(), [](const perfItem& l, const perfItem& r) { + return l.second.execution_index < r.second.execution_index; + }); + + return sorted; +} + +static UNUSED void printPerformanceCounts(const std::map& performanceMap, std::ostream& stream, + std::string deviceName, bool bshowHeader = true) { + long long totalTime = 0; + // Print performance counts + if (bshowHeader) { + stream << std::endl << "performance counts:" << std::endl << std::endl; + } + + auto performanceMapSorted = perfCountersSorted(performanceMap); + + for (const auto& it : performanceMapSorted) { + std::string toPrint(it.first); + const int maxLayerName = 30; + + if (it.first.length() >= maxLayerName) { + toPrint = it.first.substr(0, maxLayerName - 4); + toPrint += "..."; + } + + stream << std::setw(maxLayerName) << std::left << toPrint; + switch (it.second.status) { + case InferenceEngine::InferenceEngineProfileInfo::EXECUTED: + stream << std::setw(15) << std::left << "EXECUTED"; + break; + case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN: + stream << std::setw(15) << std::left << "NOT_RUN"; + break; + case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT: + stream << std::setw(15) << std::left << "OPTIMIZED_OUT"; + break; + } + stream << std::setw(30) << std::left << "layerType: " + std::string(it.second.layer_type) + " "; + stream << std::setw(20) << std::left << "realTime: " + std::to_string(it.second.realTime_uSec); + stream << std::setw(20) << std::left << "cpu: " + std::to_string(it.second.cpu_uSec); + stream << " execType: " << it.second.exec_type << std::endl; + if (it.second.realTime_uSec > 0) { + totalTime += it.second.realTime_uSec; + } + } + stream << std::setw(20) << std::left << "Total time: " + std::to_string(totalTime) << " microseconds" << std::endl; + std::cout << std::endl; + std::cout << "Full device name: " << deviceName << std::endl; + std::cout << std::endl; +} + +static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream& stream, std::string deviceName, bool bshowHeader = true) { + auto performanceMap = request.GetPerformanceCounts(); + printPerformanceCounts(performanceMap, stream, deviceName, bshowHeader); +} + +inline std::string getFullDeviceName(std::map& devicesMap, std::string device) { + std::map::iterator it = devicesMap.find(device); + if (it != devicesMap.end()) { + return it->second; + } else { + return ""; + } +} + +inline std::string getFullDeviceName(InferenceEngine::Core& ie, std::string device) { + InferenceEngine::Parameter p; + try { + p = ie.GetMetric(device, METRIC_KEY(FULL_DEVICE_NAME)); + return p.as(); + } catch (InferenceEngine::Exception&) { + return ""; + } +} + +inline void showAvailableDevices() { + InferenceEngine::Core ie; + std::vector devices = ie.GetAvailableDevices(); + + std::cout << std::endl; + std::cout << "Available target devices:"; + for (const auto& device : devices) { + std::cout << " " << device; + } + std::cout << std::endl; +} diff --git a/tools/legacy/benchmark_app/console_progress.hpp b/tools/legacy/benchmark_app/console_progress.hpp new file mode 100644 index 00000000000..f62aeed37d1 --- /dev/null +++ b/tools/legacy/benchmark_app/console_progress.hpp @@ -0,0 +1,107 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +/** + * @class ConsoleProgress + * @brief A ConsoleProgress class provides functionality for printing progress dynamics + */ +class ConsoleProgress { + static const size_t DEFAULT_DETALIZATION = 20; + static const size_t DEFAULT_PERCENT_TO_UPDATE_PROGRESS = 1; + + size_t total; + size_t cur_progress = 0; + size_t prev_progress = 0; + bool stream_output; + size_t detalization; + size_t percent_to_update; + +public: + /** + * @brief A constructor of ConsoleProgress class + * @param _total - maximum value that is correspondent to 100% + * @param _detalization - number of symbols(.) to use to represent progress + */ + explicit ConsoleProgress(size_t _total, + bool _stream_output = false, + size_t _percent_to_update = DEFAULT_PERCENT_TO_UPDATE_PROGRESS, + size_t _detalization = DEFAULT_DETALIZATION) + : total(_total), + detalization(_detalization), + percent_to_update(_percent_to_update) { + stream_output = _stream_output; + if (total == 0) { + total = 1; + } + } + + /** + * @brief Shows progress with current data. Progress is shown from the beginning of the current + * line. + */ + void showProgress() const { + std::stringstream strm; + if (!stream_output) { + strm << '\r'; + } + strm << "Progress: ["; + size_t i = 0; + for (; i < detalization * cur_progress / total; i++) { + strm << "."; + } + for (; i < detalization; i++) { + strm << " "; + } + strm << "] " << std::setw(3) << 100 * cur_progress / total << "% done"; + if (stream_output) { + strm << std::endl; + } + std::fputs(strm.str().c_str(), stdout); + std::fflush(stdout); + } + + /** + * @brief Updates current value and progressbar + */ + void updateProgress() { + if (cur_progress > total) + cur_progress = total; + size_t prev_percent = 100 * prev_progress / total; + size_t cur_percent = 100 * cur_progress / total; + + if (prev_progress == 0 || cur_progress == total || prev_percent + percent_to_update <= cur_percent) { + showProgress(); + prev_progress = cur_progress; + } + } + + /** + * @brief Adds value to currently represented and redraw progressbar + * @param add - value to add + */ + void addProgress(int add) { + if (add < 0 && -add > static_cast(cur_progress)) { + add = -static_cast(cur_progress); + } + cur_progress += add; + updateProgress(); + } + + /** + * @brief Output end line. + * @return + */ + void finish() { + std::stringstream strm; + strm << std::endl; + std::fputs(strm.str().c_str(), stdout); + std::fflush(stdout); + } +}; diff --git a/tools/legacy/benchmark_app/csv_dumper.hpp b/tools/legacy/benchmark_app/csv_dumper.hpp new file mode 100644 index 00000000000..5e0bc039eae --- /dev/null +++ b/tools/legacy/benchmark_app/csv_dumper.hpp @@ -0,0 +1,98 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include +#include +#include "slog.hpp" +#include +#include + +/** + * @class CsvDumper + * @brief A CsvDumper class provides functionality for dumping the values in CSV files + */ +class CsvDumper { + std::ofstream file; + std::string filename; + bool canDump = true; + char delimiter = ';'; + + std::string generateFilename() { + std::stringstream filename; + filename << "dumpfile-"; + filename << time(nullptr); + filename << ".csv"; + return filename.str(); + } + +public: + /** + * @brief A constructor. Disables dumping in case dump file cannot be created + * @param enabled - True if dumping is enabled by default. + * @param name - name of file to dump to. File won't be created if first parameter is false. + */ + explicit CsvDumper(bool enabled = true, const std::string& name = ""): canDump(enabled) { + if (!canDump) { + return; + } + filename = (name == "" ? generateFilename() : name); + file.open(filename, std::ios::out); + if (!file) { + slog::warn << "Cannot create dump file! Disabling dump." << slog::endl; + canDump = false; + } + } + + /** + * @brief Sets a delimiter to use in csv file + * @param c - Delimiter char + * @return + */ + void setDelimiter(char c) { + delimiter = c; + } + + /** + * @brief Overloads operator to organize streaming values to file. Does nothing if dumping is + * disabled Adds delimiter at the end of value provided + * @param add - value to add to dump + * @return reference to same object + */ + template + CsvDumper& operator<<(const T& add) { + if (canDump) { + file << add << delimiter; + } + return *this; + } + + /** + * @brief Finishes line in dump file. Does nothing if dumping is disabled + */ + void endLine() { + if (canDump) { + file << "\n"; + } + } + + /** + * @brief Gets information if dump is enabled. + * @return true if dump is enabled and file was successfully created + */ + bool dumpEnabled() { + return canDump; + } + + /** + * @brief Gets name of a dump file + * @return name of a dump file + */ + std::string getFilename() const { + return filename; + } +}; diff --git a/tools/legacy/benchmark_app/infer_request_wrap.hpp b/tools/legacy/benchmark_app/infer_request_wrap.hpp new file mode 100644 index 00000000000..741b2ad7f13 --- /dev/null +++ b/tools/legacy/benchmark_app/infer_request_wrap.hpp @@ -0,0 +1,146 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "statistics_report.hpp" + +typedef std::chrono::high_resolution_clock Time; +typedef std::chrono::nanoseconds ns; + +typedef std::function QueueCallbackFunction; + +/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution time. +class InferReqWrap final { +public: + using Ptr = std::shared_ptr; + + ~InferReqWrap() = default; + + explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net, size_t id, QueueCallbackFunction callbackQueue) + : _request(net.CreateInferRequest()), _id(id), _callbackQueue(callbackQueue) { + _request.SetCompletionCallback([&]() { + _endTime = Time::now(); + _callbackQueue(_id, getExecutionTimeInMilliseconds()); + }); + } + + void startAsync() { + _startTime = Time::now(); + _request.StartAsync(); + } + + void wait() { + _request.Wait(InferenceEngine::InferRequest::RESULT_READY); + } + + void infer() { + _startTime = Time::now(); + _request.Infer(); + _endTime = Time::now(); + _callbackQueue(_id, getExecutionTimeInMilliseconds()); + } + + std::map getPerformanceCounts() { + return _request.GetPerformanceCounts(); + } + + InferenceEngine::Blob::Ptr getBlob(const std::string& name) { + return _request.GetBlob(name); + } + + double getExecutionTimeInMilliseconds() const { + auto execTime = std::chrono::duration_cast(_endTime - _startTime); + return static_cast(execTime.count()) * 0.000001; + } + +private: + InferenceEngine::InferRequest _request; + Time::time_point _startTime; + Time::time_point _endTime; + size_t _id; + QueueCallbackFunction _callbackQueue; +}; + +class InferRequestsQueue final { +public: + InferRequestsQueue(InferenceEngine::ExecutableNetwork& net, size_t nireq) { + for (size_t id = 0; id < nireq; id++) { + requests.push_back( + std::make_shared(net, id, std::bind(&InferRequestsQueue::putIdleRequest, this, std::placeholders::_1, std::placeholders::_2))); + _idleIds.push(id); + } + resetTimes(); + } + ~InferRequestsQueue() { + // Inference Request guarantee that it will wait for all asynchronous internal tasks in destructor + // So it should be released before any context that the request can use inside internal asynchronous tasks + // For example all members of InferRequestsQueue would be destroyed before `requests` vector + // So requests can try to use this members from `putIdleRequest()` that would be called from request callback + // To avoid this we should move this vector declaration after all members declaration or just clear it manually in destructor + requests.clear(); + } + + void resetTimes() { + _startTime = Time::time_point::max(); + _endTime = Time::time_point::min(); + _latencies.clear(); + } + + double getDurationInMilliseconds() { + return std::chrono::duration_cast(_endTime - _startTime).count() * 0.000001; + } + + void putIdleRequest(size_t id, const double latency) { + std::unique_lock lock(_mutex); + _latencies.push_back(latency); + _idleIds.push(id); + _endTime = std::max(Time::now(), _endTime); + _cv.notify_one(); + } + + InferReqWrap::Ptr getIdleRequest() { + std::unique_lock lock(_mutex); + _cv.wait(lock, [this] { + return _idleIds.size() > 0; + }); + auto request = requests.at(_idleIds.front()); + _idleIds.pop(); + _startTime = std::min(Time::now(), _startTime); + return request; + } + + void waitAll() { + std::unique_lock lock(_mutex); + _cv.wait(lock, [this] { + return _idleIds.size() == requests.size(); + }); + } + + std::vector getLatencies() { + return _latencies; + } + + std::vector requests; + +private: + std::queue _idleIds; + std::mutex _mutex; + std::condition_variable _cv; + Time::time_point _startTime; + Time::time_point _endTime; + std::vector _latencies; +}; diff --git a/tools/legacy/benchmark_app/inputs_filling.cpp b/tools/legacy/benchmark_app/inputs_filling.cpp new file mode 100644 index 00000000000..4ffa8f8ffdd --- /dev/null +++ b/tools/legacy/benchmark_app/inputs_filling.cpp @@ -0,0 +1,356 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "inputs_filling.hpp" + +#include + +#include +#include +#include "slog.hpp" +#include +#include +#include + +using namespace InferenceEngine; + +#ifdef USE_OPENCV +static const std::vector supported_image_extensions = {"bmp", "dib", "jpeg", "jpg", "jpe", "jp2", "png", + "pbm", "pgm", "ppm", "sr", "ras", "tiff", "tif"}; +#else +static const std::vector supported_image_extensions = {"bmp"}; +#endif +static const std::vector supported_binary_extensions = {"bin"}; + +std::vector filterFilesByExtensions(const std::vector& filePaths, const std::vector& extensions) { + std::vector filtered; + auto getExtension = [](const std::string& name) { + auto extensionPosition = name.rfind('.', name.size()); + return extensionPosition == std::string::npos ? "" : name.substr(extensionPosition + 1, name.size() - 1); + }; + for (auto& filePath : filePaths) { + auto extension = getExtension(filePath); + std::transform(extension.begin(), extension.end(), extension.begin(), ::tolower); + if (std::find(extensions.begin(), extensions.end(), extension) != extensions.end()) { + filtered.push_back(filePath); + } + } + return filtered; +} + +template +void fillBlobImage(Blob::Ptr& inputBlob, const std::vector& filePaths, const size_t& batchSize, const benchmark_app::InputInfo& app_info, + const size_t& requestId, const size_t& inputId, const size_t& inputSize) { + MemoryBlob::Ptr minput = as(inputBlob); + if (!minput) { + IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in " + "fillBlobImage, " + << "but by fact we were not able to cast inputBlob to MemoryBlob"; + } + // locked memory holder should be alive all time while access to its buffer + // happens + auto minputHolder = minput->wmap(); + auto inputBlobData = minputHolder.as(); + + /** Collect images data ptrs **/ + std::vector> vreader; + vreader.reserve(batchSize); + + for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) { + inputIndex %= filePaths.size(); + + slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl; + FormatReader::ReaderPtr reader(filePaths[inputIndex].c_str()); + if (reader.get() == nullptr) { + slog::warn << "Image " << filePaths[inputIndex] << " cannot be read!" << slog::endl << slog::endl; + continue; + } + + /** Getting image data **/ + std::shared_ptr imageData(reader->getData(app_info.width(), app_info.height())); + if (imageData) { + vreader.push_back(imageData); + } + } + + /** Fill input tensor with images. First b channel, then g and r channels **/ + const size_t numChannels = app_info.channels(); + const size_t width = app_info.width(); + const size_t height = app_info.height(); + /** Iterate over all input images **/ + for (size_t imageId = 0; imageId < vreader.size(); ++imageId) { + /** Iterate over all width **/ + for (size_t w = 0; w < app_info.width(); ++w) { + /** Iterate over all height **/ + for (size_t h = 0; h < app_info.height(); ++h) { + /** Iterate over all channels **/ + for (size_t ch = 0; ch < numChannels; ++ch) { + /** [images stride + channels stride + pixel id ] all in + * bytes **/ + size_t offset = imageId * numChannels * width * height + (((app_info.layout == "NCHW") || (app_info.layout == "CHW")) + ? (ch * width * height + h * width + w) + : (h * width * numChannels + w * numChannels + ch)); + inputBlobData[offset] = static_cast(vreader.at(imageId).get()[h * width * numChannels + w * numChannels + ch]); + } + } + } + } +} + +template +void fillBlobBinary(Blob::Ptr& inputBlob, const std::vector& filePaths, const size_t& batchSize, const size_t& requestId, const size_t& inputId, + const size_t& inputSize) { + MemoryBlob::Ptr minput = as(inputBlob); + if (!minput) { + IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in " + "fillBlobBinary, " + << "but by fact we were not able to cast inputBlob to MemoryBlob"; + } + // locked memory holder should be alive all time while access to its buffer + // happens + auto minputHolder = minput->wmap(); + + auto inputBlobData = minputHolder.as(); + for (size_t i = 0ULL, inputIndex = requestId * batchSize * inputSize + inputId; i < batchSize; i++, inputIndex += inputSize) { + inputIndex %= filePaths.size(); + + slog::info << "Prepare binary file " << filePaths[inputIndex] << slog::endl; + std::ifstream binaryFile(filePaths[inputIndex], std::ios_base::binary | std::ios_base::ate); + if (!binaryFile) { + IE_THROW() << "Cannot open " << filePaths[inputIndex]; + } + + auto fileSize = static_cast(binaryFile.tellg()); + binaryFile.seekg(0, std::ios_base::beg); + if (!binaryFile.good()) { + IE_THROW() << "Can not read " << filePaths[inputIndex]; + } + auto inputSize = inputBlob->size() * sizeof(T) / batchSize; + if (fileSize != inputSize) { + IE_THROW() << "File " << filePaths[inputIndex] << " contains " << std::to_string(fileSize) + << " bytes " + "but the network expects " + << std::to_string(inputSize); + } + binaryFile.read(&inputBlobData[i * inputSize], inputSize); + } +} + +template +using uniformDistribution = + typename std::conditional::value, std::uniform_real_distribution, + typename std::conditional::value, std::uniform_int_distribution, void>::type>::type; + +template +void fillBlobRandom(Blob::Ptr& inputBlob, T rand_min = std::numeric_limits::min(), T rand_max = std::numeric_limits::max()) { + MemoryBlob::Ptr minput = as(inputBlob); + if (!minput) { + IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in " + "fillBlobRandom, " + << "but by fact we were not able to cast inputBlob to MemoryBlob"; + } + // locked memory holder should be alive all time while access to its buffer + // happens + auto minputHolder = minput->wmap(); + + auto inputBlobData = minputHolder.as(); + std::mt19937 gen(0); + uniformDistribution distribution(rand_min, rand_max); + for (size_t i = 0; i < inputBlob->size(); i++) { + inputBlobData[i] = static_cast(distribution(gen)); + } +} + +template +void fillBlobImInfo(Blob::Ptr& inputBlob, const size_t& batchSize, std::pair image_size) { + MemoryBlob::Ptr minput = as(inputBlob); + if (!minput) { + IE_THROW() << "We expect inputBlob to be inherited from MemoryBlob in " + "fillBlobImInfo, " + << "but by fact we were not able to cast inputBlob to MemoryBlob"; + } + // locked memory holder should be alive all time while access to its buffer + // happens + auto minputHolder = minput->wmap(); + + auto inputBlobData = minputHolder.as(); + for (size_t b = 0; b < batchSize; b++) { + size_t iminfoSize = inputBlob->size() / batchSize; + for (size_t i = 0; i < iminfoSize; i++) { + size_t index = b * iminfoSize + i; + if (0 == i) + inputBlobData[index] = static_cast(image_size.first); + else if (1 == i) + inputBlobData[index] = static_cast(image_size.second); + else + inputBlobData[index] = 1; + } + } +} + +void fillBlobs(const std::vector& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info, + std::vector requests) { + std::vector> input_image_sizes; + for (auto& item : app_inputs_info) { + if (item.second.isImage()) { + input_image_sizes.push_back(std::make_pair(item.second.width(), item.second.height())); + } + slog::info << "Network input '" << item.first << "' precision " << item.second.precision << ", dimensions (" << item.second.layout << "): "; + for (const auto& i : item.second.shape) { + slog::info << i << " "; + } + slog::info << slog::endl; + } + + size_t imageInputCount = input_image_sizes.size(); + size_t binaryInputCount = app_inputs_info.size() - imageInputCount; + + std::vector binaryFiles; + std::vector imageFiles; + + if (inputFiles.empty()) { + slog::warn << "No input files were given: all inputs will be filled with " + "random values!" + << slog::endl; + } else { + binaryFiles = filterFilesByExtensions(inputFiles, supported_binary_extensions); + std::sort(std::begin(binaryFiles), std::end(binaryFiles)); + + auto binaryToBeUsed = binaryInputCount * batchSize * requests.size(); + if (binaryToBeUsed > 0 && binaryFiles.empty()) { + std::stringstream ss; + for (auto& ext : supported_binary_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported binary inputs found! Please check your file " + "extensions: " + << ss.str() << slog::endl; + } else if (binaryToBeUsed > binaryFiles.size()) { + slog::warn << "Some binary input files will be duplicated: " << binaryToBeUsed << " files are required but only " << binaryFiles.size() + << " are provided" << slog::endl; + } else if (binaryToBeUsed < binaryFiles.size()) { + slog::warn << "Some binary input files will be ignored: only " << binaryToBeUsed << " are required from " << binaryFiles.size() << slog::endl; + } + + imageFiles = filterFilesByExtensions(inputFiles, supported_image_extensions); + std::sort(std::begin(imageFiles), std::end(imageFiles)); + + auto imagesToBeUsed = imageInputCount * batchSize * requests.size(); + if (imagesToBeUsed > 0 && imageFiles.empty()) { + std::stringstream ss; + for (auto& ext : supported_image_extensions) { + if (!ss.str().empty()) { + ss << ", "; + } + ss << ext; + } + slog::warn << "No supported image inputs found! Please check your file " + "extensions: " + << ss.str() << slog::endl; + } else if (imagesToBeUsed > imageFiles.size()) { + slog::warn << "Some image input files will be duplicated: " << imagesToBeUsed << " files are required but only " << imageFiles.size() + << " are provided" << slog::endl; + } else if (imagesToBeUsed < imageFiles.size()) { + slog::warn << "Some image input files will be ignored: only " << imagesToBeUsed << " are required from " << imageFiles.size() << slog::endl; + } + } + + for (size_t requestId = 0; requestId < requests.size(); requestId++) { + slog::info << "Infer Request " << requestId << " filling" << slog::endl; + + size_t imageInputId = 0; + size_t binaryInputId = 0; + for (auto& item : app_inputs_info) { + Blob::Ptr inputBlob = requests.at(requestId)->getBlob(item.first); + auto app_info = app_inputs_info.at(item.first); + auto precision = app_info.precision; + if (app_info.isImage()) { + if (!imageFiles.empty()) { + // Fill with Images + if (precision == InferenceEngine::Precision::FP32) { + fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); + } else if (precision == InferenceEngine::Precision::FP16) { + fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); + } else if (precision == InferenceEngine::Precision::I32) { + fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); + } else if (precision == InferenceEngine::Precision::I64) { + fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); + } else if (precision == InferenceEngine::Precision::U8) { + fillBlobImage(inputBlob, imageFiles, batchSize, app_info, requestId, imageInputId++, imageInputCount); + } else { + IE_THROW() << "Input precision is not supported for " << item.first; + } + continue; + } + } else { + if (!binaryFiles.empty()) { + // Fill with binary files + if (precision == InferenceEngine::Precision::FP32) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if (precision == InferenceEngine::Precision::FP16) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if (precision == InferenceEngine::Precision::I32) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if (precision == InferenceEngine::Precision::I64) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else if ((precision == InferenceEngine::Precision::U8) || (precision == InferenceEngine::Precision::BOOL)) { + fillBlobBinary(inputBlob, binaryFiles, batchSize, requestId, binaryInputId++, binaryInputCount); + } else { + IE_THROW() << "Input precision is not supported for " << item.first; + } + continue; + } + + if (app_info.isImageInfo() && (input_image_sizes.size() == 1)) { + // Most likely it is image info: fill with image information + auto image_size = input_image_sizes.at(0); + slog::info << "Fill input '" << item.first << "' with image size " << image_size.first << "x" << image_size.second << slog::endl; + if (precision == InferenceEngine::Precision::FP32) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else if (precision == InferenceEngine::Precision::FP16) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else if (precision == InferenceEngine::Precision::I32) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else if (precision == InferenceEngine::Precision::I64) { + fillBlobImInfo(inputBlob, batchSize, image_size); + } else { + IE_THROW() << "Input precision is not supported for image info!"; + } + continue; + } + } + // Fill random + slog::info << "Fill input '" << item.first << "' with random values (" << std::string((app_info.isImage() ? "image" : "some binary data")) + << " is expected)" << slog::endl; + if (precision == InferenceEngine::Precision::FP32) { + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::FP16) { + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::I32) { + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::I64) { + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::U8) { + // uniform_int_distribution is not allowed in the C++17 + // standard and vs2017/19 + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::I8) { + // uniform_int_distribution is not allowed in the C++17 standard + // and vs2017/19 + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::U16) { + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::I16) { + fillBlobRandom(inputBlob); + } else if (precision == InferenceEngine::Precision::BOOL) { + fillBlobRandom(inputBlob, 0, 1); + } else { + IE_THROW() << "Input precision is not supported for " << item.first; + } + } + } +} diff --git a/tools/legacy/benchmark_app/inputs_filling.hpp b/tools/legacy/benchmark_app/inputs_filling.hpp new file mode 100644 index 00000000000..4410faae11e --- /dev/null +++ b/tools/legacy/benchmark_app/inputs_filling.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "infer_request_wrap.hpp" +#include "utils.hpp" + +void fillBlobs(const std::vector& inputFiles, const size_t& batchSize, benchmark_app::InputsInfo& app_inputs_info, + std::vector requests); diff --git a/tools/legacy/benchmark_app/main.cpp b/tools/legacy/benchmark_app/main.cpp new file mode 100644 index 00000000000..51ccba5d39a --- /dev/null +++ b/tools/legacy/benchmark_app/main.cpp @@ -0,0 +1,704 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include +#include +#include "args_helper.hpp" +#include "common.hpp" +#include "slog.hpp" +#include +#include +#include +#include + +#include "benchmark_app.hpp" +#include "infer_request_wrap.hpp" +#include "inputs_filling.hpp" +#include "progress_bar.hpp" +#include "statistics_report.hpp" +#include "utils.hpp" + +using namespace InferenceEngine; + +static const size_t progressBarDefaultTotalCount = 1000; + +uint64_t getDurationInMilliseconds(uint32_t duration) { + return duration * 1000LL; +} + +uint64_t getDurationInNanoseconds(uint32_t duration) { + return duration * 1000000000LL; +} + +bool ParseAndCheckCommandLine(int argc, char* argv[]) { + // ---------------------------Parsing and validating input + // arguments-------------------------------------- + slog::info << "Parsing input parameters" << slog::endl; + gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); + if (FLAGS_help || FLAGS_h) { + showUsage(); + showAvailableDevices(); + return false; + } + + if (FLAGS_m.empty()) { + showUsage(); + throw std::logic_error("Model is required but not set. Please set -m option."); + } + + if (FLAGS_api != "async" && FLAGS_api != "sync") { + throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); + } + + if (!FLAGS_report_type.empty() && FLAGS_report_type != noCntReport && FLAGS_report_type != averageCntReport && FLAGS_report_type != detailedCntReport) { + std::string err = "only " + std::string(noCntReport) + "/" + std::string(averageCntReport) + "/" + std::string(detailedCntReport) + + " report types are supported (invalid -report_type option value)"; + throw std::logic_error(err); + } + + if ((FLAGS_report_type == averageCntReport) && ((FLAGS_d.find("MULTI") != std::string::npos))) { + throw std::logic_error("only " + std::string(detailedCntReport) + " report type is supported for MULTI device"); + } + + bool isNetworkCompiled = fileExt(FLAGS_m) == "blob"; + bool isPrecisionSet = !(FLAGS_ip.empty() && FLAGS_op.empty() && FLAGS_iop.empty()); + if (isNetworkCompiled && isPrecisionSet) { + std::string err = std::string("Cannot set precision for a compiled network. ") + std::string("Please re-compile your network with required precision " + "using compile_tool"); + + throw std::logic_error(err); + } + return true; +} + +static void next_step(const std::string additional_info = "") { + static size_t step_id = 0; + static const std::map step_names = {{1, "Parsing and validating input arguments"}, + {2, "Loading Inference Engine"}, + {3, "Setting device configuration"}, + {4, "Reading network files"}, + {5, "Resizing network to match image sizes and given batch"}, + {6, "Configuring input of the model"}, + {7, "Loading the model to the device"}, + {8, "Setting optimal runtime parameters"}, + {9, "Creating infer requests and filling input blobs with images"}, + {10, "Measuring performance"}, + {11, "Dumping statistics report"}}; + + step_id++; + if (step_names.count(step_id) == 0) + IE_THROW() << "Step ID " << step_id << " is out of total steps number " << step_names.size(); + + std::cout << "[Step " << step_id << "/" << step_names.size() << "] " << step_names.at(step_id) + << (additional_info.empty() ? "" : " (" + additional_info + ")") << std::endl; +} + +template +T getMedianValue(const std::vector& vec) { + std::vector sortedVec(vec); + std::sort(sortedVec.begin(), sortedVec.end()); + return (sortedVec.size() % 2 != 0) ? sortedVec[sortedVec.size() / 2ULL] + : (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast(2.0); +} + +/** + * @brief The entry point of the benchmark application + */ +int main(int argc, char* argv[]) { + std::shared_ptr statistics; + try { + ExecutableNetwork exeNetwork; + + // ----------------- 1. Parsing and validating input arguments + // ------------------------------------------------- + next_step(); + + if (!ParseAndCheckCommandLine(argc, argv)) { + return 0; + } + + bool isNetworkCompiled = fileExt(FLAGS_m) == "blob"; + if (isNetworkCompiled) { + slog::info << "Network is compiled" << slog::endl; + } + + std::vector flags; + StatisticsReport::Parameters command_line_arguments; + gflags::GetAllFlags(&flags); + for (auto& flag : flags) { + if (!flag.is_default) { + command_line_arguments.push_back({flag.name, flag.current_value}); + } + } + if (!FLAGS_report_type.empty()) { + statistics = std::make_shared(StatisticsReport::Config {FLAGS_report_type, FLAGS_report_folder}); + statistics->addParameters(StatisticsReport::Category::COMMAND_LINE_PARAMETERS, command_line_arguments); + } + auto isFlagSetInCommandLine = [&command_line_arguments](const std::string& name) { + return (std::find_if(command_line_arguments.begin(), command_line_arguments.end(), [name](const std::pair& p) { + return p.first == name; + }) != command_line_arguments.end()); + }; + + std::string device_name = FLAGS_d; + + // Parse devices + auto devices = parseDevices(device_name); + + // Parse nstreams per device + std::map device_nstreams = parseNStreamsValuePerDevice(devices, FLAGS_nstreams); + + // Load device config file if specified + std::map> config; +#ifdef USE_OPENCV + if (!FLAGS_load_config.empty()) { + load_config(FLAGS_load_config, config); + } +#endif + /** This vector stores paths to the processed images **/ + std::vector inputFiles; + parseInputFilesArguments(inputFiles); + + // ----------------- 2. Loading the Inference Engine + // ----------------------------------------------------------- + next_step(); + + Core ie; + if (FLAGS_d.find("CPU") != std::string::npos && !FLAGS_l.empty()) { + // CPU (MKLDNN) extensions is loaded as a shared library and passed as a + // pointer to base extension + const auto extension_ptr = std::make_shared(FLAGS_l); + ie.AddExtension(extension_ptr); + slog::info << "CPU (MKLDNN) extensions is loaded " << FLAGS_l << slog::endl; + } + + // Load clDNN Extensions + if ((FLAGS_d.find("GPU") != std::string::npos) && !FLAGS_c.empty()) { + // Override config if command line parameter is specified + if (!config.count("GPU")) + config["GPU"] = {}; + config["GPU"][CONFIG_KEY(CONFIG_FILE)] = FLAGS_c; + } + if (config.count("GPU") && config.at("GPU").count(CONFIG_KEY(CONFIG_FILE))) { + auto ext = config.at("GPU").at(CONFIG_KEY(CONFIG_FILE)); + ie.SetConfig({{CONFIG_KEY(CONFIG_FILE), ext}}, "GPU"); + slog::info << "GPU extensions is loaded " << ext << slog::endl; + } + + slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; + slog::info << "Device info: " << slog::endl; + std::cout << ie.GetVersions(device_name) << std::endl; + + // ----------------- 3. Setting device configuration + // ----------------------------------------------------------- + next_step(); + + bool perf_counts = false; + // Update config per device according to command line parameters + for (auto& device : devices) { + if (!config.count(device)) + config[device] = {}; + std::map& device_config = config.at(device); + + // Set performance counter + if (isFlagSetInCommandLine("pc")) { + // set to user defined value + device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO); + } else if (device_config.count(CONFIG_KEY(PERF_COUNT)) && (device_config.at(CONFIG_KEY(PERF_COUNT)) == "YES")) { + slog::warn << "Performance counters for " << device << " device is turned on. To print results use -pc option." << slog::endl; + } else if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == averageCntReport) { + slog::warn << "Turn on performance counters for " << device << " device since report type is " << FLAGS_report_type << "." << slog::endl; + device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES); + } else if (!FLAGS_exec_graph_path.empty()) { + slog::warn << "Turn on performance counters for " << device << " device due to execution graph dumping." << slog::endl; + device_config[CONFIG_KEY(PERF_COUNT)] = CONFIG_VALUE(YES); + } else { + // set to default value + device_config[CONFIG_KEY(PERF_COUNT)] = FLAGS_pc ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO); + } + perf_counts = (device_config.at(CONFIG_KEY(PERF_COUNT)) == CONFIG_VALUE(YES)) ? true : perf_counts; + + auto setThroughputStreams = [&]() { + const std::string key = device + "_THROUGHPUT_STREAMS"; + if (device_nstreams.count(device)) { + // set to user defined value + std::vector supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + if (std::find(supported_config_keys.begin(), supported_config_keys.end(), key) == supported_config_keys.end()) { + throw std::logic_error("Device " + device + " doesn't support config key '" + key + "'! " + + "Please specify -nstreams for correct devices in format " + ":,:" + + " or via configuration file."); + } + device_config[key] = device_nstreams.at(device); + } else if (!device_config.count(key) && (FLAGS_api == "async")) { + slog::warn << "-nstreams default value is determined automatically for " << device + << " device. " + "Although the automatic selection usually provides a " + "reasonable performance, " + "but it still may be non-optimal for some cases, for more " + "information look at README." + << slog::endl; + if (std::string::npos == device.find("MYRIAD")) // MYRIAD sets the default number of + // streams implicitly (without _AUTO) + device_config[key] = std::string(device + "_THROUGHPUT_AUTO"); + } + if (device_config.count(key)) + device_nstreams[device] = device_config.at(key); + }; + + if (device == "CPU") { // CPU supports few special performance-oriented keys + // limit threading for CPU portion of inference + if (isFlagSetInCommandLine("nthreads")) + device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads); + + if (isFlagSetInCommandLine("enforcebf16")) + device_config[CONFIG_KEY(ENFORCE_BF16)] = FLAGS_enforcebf16 ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO); + + if (isFlagSetInCommandLine("pin")) { + // set to user defined value + device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin; + } else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) { + if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("GPU") != std::string::npos)) { + slog::warn << "Turn off threads pinning for " << device << " device since multi-scenario with GPU device is used." << slog::endl; + device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO); + } + } + + // for CPU execution, more throughput-oriented execution via streams + setThroughputStreams(); + } else if (device == ("GPU")) { + // for GPU execution, more throughput-oriented execution via streams + setThroughputStreams(); + + if ((device_name.find("MULTI") != std::string::npos) && (device_name.find("CPU") != std::string::npos)) { + slog::warn << "Turn on GPU trottling. Multi-device execution with " + "the CPU + GPU performs best with GPU trottling hint," + << "which releases another CPU thread (that is otherwise " + "used by the GPU driver for active polling)" + << slog::endl; + device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1"; + } + } else if (device == "MYRIAD") { + device_config[CONFIG_KEY(LOG_LEVEL)] = CONFIG_VALUE(LOG_WARNING); + setThroughputStreams(); + } else if (device == "GNA") { + if (FLAGS_qb == 8) + device_config[GNA_CONFIG_KEY(PRECISION)] = "I8"; + else + device_config[GNA_CONFIG_KEY(PRECISION)] = "I16"; + + if (isFlagSetInCommandLine("nthreads")) + device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads); + } else { + std::vector supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + auto supported = [&](const std::string& key) { + return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) != std::end(supported_config_keys); + }; + if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) { + device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads); + } + if (supported(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) && isFlagSetInCommandLine("nstreams")) { + device_config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = FLAGS_nstreams; + } + if (supported(CONFIG_KEY(CPU_BIND_THREAD)) && isFlagSetInCommandLine("pin")) { + device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin; + } + } + } + + for (auto&& item : config) { + ie.SetConfig(item.second, item.first); + } + + auto double_to_string = [](const double number) { + std::stringstream ss; + ss << std::fixed << std::setprecision(2) << number; + return ss.str(); + }; + auto get_total_ms_time = [](Time::time_point& startTime) { + return std::chrono::duration_cast(Time::now() - startTime).count() * 0.000001; + }; + + size_t batchSize = FLAGS_b; + Precision precision = Precision::UNSPECIFIED; + std::string topology_name = ""; + benchmark_app::InputsInfo app_inputs_info; + std::string output_name; + + // Takes priority over config from file + if (!FLAGS_cache_dir.empty()) { + ie.SetConfig({{CONFIG_KEY(CACHE_DIR), FLAGS_cache_dir}}); + } + + if (FLAGS_load_from_file && !isNetworkCompiled) { + next_step(); + slog::info << "Skipping the step for loading network from file" << slog::endl; + next_step(); + slog::info << "Skipping the step for loading network from file" << slog::endl; + next_step(); + slog::info << "Skipping the step for loading network from file" << slog::endl; + auto startTime = Time::now(); + exeNetwork = ie.LoadNetwork(FLAGS_m, device_name); + auto duration_ms = double_to_string(get_total_ms_time(startTime)); + slog::info << "Load network took " << duration_ms << " ms" << slog::endl; + if (statistics) + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}}); + if (batchSize == 0) { + batchSize = 1; + } + } else if (!isNetworkCompiled) { + // ----------------- 4. Reading the Intermediate Representation network + // ---------------------------------------- + next_step(); + + slog::info << "Loading network files" << slog::endl; + + auto startTime = Time::now(); + CNNNetwork cnnNetwork = ie.ReadNetwork(FLAGS_m); + auto duration_ms = double_to_string(get_total_ms_time(startTime)); + slog::info << "Read network took " << duration_ms << " ms" << slog::endl; + if (statistics) + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"read network time (ms)", duration_ms}}); + + const InputsDataMap inputInfo(cnnNetwork.getInputsInfo()); + if (inputInfo.empty()) { + throw std::logic_error("no inputs info is provided"); + } + + // ----------------- 5. Resizing network to match image sizes and given + // batch ---------------------------------- + next_step(); + batchSize = cnnNetwork.getBatchSize(); + // Parse input shapes if specified + bool reshape = false; + app_inputs_info = getInputsInfo(FLAGS_shape, FLAGS_layout, FLAGS_b, inputInfo, reshape); + if (reshape) { + InferenceEngine::ICNNNetwork::InputShapes shapes = {}; + for (auto& item : app_inputs_info) + shapes[item.first] = item.second.shape; + slog::info << "Reshaping network: " << getShapesString(shapes) << slog::endl; + startTime = Time::now(); + cnnNetwork.reshape(shapes); + duration_ms = double_to_string(get_total_ms_time(startTime)); + slog::info << "Reshape network took " << duration_ms << " ms" << slog::endl; + if (statistics) + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"reshape network time (ms)", duration_ms}}); + } + // use batch size according to provided layout and shapes + batchSize = (!FLAGS_layout.empty()) ? getBatchSize(app_inputs_info) : cnnNetwork.getBatchSize(); + + topology_name = cnnNetwork.getName(); + slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize << slog::endl; + + // ----------------- 6. Configuring inputs and outputs + // ---------------------------------------------------------------------- + next_step(); + + processPrecision(cnnNetwork, FLAGS_ip, FLAGS_op, FLAGS_iop); + for (auto& item : cnnNetwork.getInputsInfo()) { + // if precision for input set by user, then set it to app_inputs + // if it an image, set U8 + if (!FLAGS_ip.empty() || FLAGS_iop.find(item.first) != std::string::npos) { + app_inputs_info.at(item.first).precision = item.second->getPrecision(); + } else if (app_inputs_info.at(item.first).isImage()) { + app_inputs_info.at(item.first).precision = Precision::U8; + item.second->setPrecision(app_inputs_info.at(item.first).precision); + } + } + + printInputAndOutputsInfo(cnnNetwork); + // ----------------- 7. Loading the model to the device + // -------------------------------------------------------- + next_step(); + startTime = Time::now(); + exeNetwork = ie.LoadNetwork(cnnNetwork, device_name); + duration_ms = double_to_string(get_total_ms_time(startTime)); + slog::info << "Load network took " << duration_ms << " ms" << slog::endl; + if (statistics) + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}}); + } else { + next_step(); + slog::info << "Skipping the step for compiled network" << slog::endl; + next_step(); + slog::info << "Skipping the step for compiled network" << slog::endl; + next_step(); + slog::info << "Skipping the step for compiled network" << slog::endl; + // ----------------- 7. Loading the model to the device + // -------------------------------------------------------- + next_step(); + auto startTime = Time::now(); + exeNetwork = ie.ImportNetwork(FLAGS_m, device_name, {}); + auto duration_ms = double_to_string(get_total_ms_time(startTime)); + slog::info << "Import network took " << duration_ms << " ms" << slog::endl; + if (statistics) + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"import network time (ms)", duration_ms}}); + app_inputs_info = getInputsInfo(FLAGS_shape, FLAGS_layout, FLAGS_b, exeNetwork.GetInputsInfo()); + if (batchSize == 0) { + batchSize = 1; + } + } + // ----------------- 8. Setting optimal runtime parameters + // ----------------------------------------------------- + next_step(); + + // Update number of streams + for (auto&& ds : device_nstreams) { + const std::string key = ds.first + "_THROUGHPUT_STREAMS"; + device_nstreams[ds.first] = ie.GetConfig(ds.first, key).as(); + } + + // Number of requests + uint32_t nireq = FLAGS_nireq; + if (nireq == 0) { + if (FLAGS_api == "sync") { + nireq = 1; + } else { + std::string key = METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS); + try { + nireq = exeNetwork.GetMetric(key).as(); + } catch (const std::exception& ex) { + IE_THROW() << "Every device used with the benchmark_app should " + << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS " + "ExecutableNetwork metric. " + << "Failed to query the metric for the " << device_name << " with error:" << ex.what(); + } + } + } + + // Iteration limit + uint32_t niter = FLAGS_niter; + if ((niter > 0) && (FLAGS_api == "async")) { + niter = ((niter + nireq - 1) / nireq) * nireq; + if (FLAGS_niter != niter) { + slog::warn << "Number of iterations was aligned by request number from " << FLAGS_niter << " to " << niter << " using number of requests " + << nireq << slog::endl; + } + } + + // Time limit + uint32_t duration_seconds = 0; + if (FLAGS_t != 0) { + // time limit + duration_seconds = FLAGS_t; + } else if (FLAGS_niter == 0) { + // default time limit + duration_seconds = deviceDefaultDeviceDurationInSeconds(device_name); + } + uint64_t duration_nanoseconds = getDurationInNanoseconds(duration_seconds); + + if (statistics) { + statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG, + { + {"topology", topology_name}, + {"target device", device_name}, + {"API", FLAGS_api}, + {"precision", std::string(precision.name())}, + {"batch size", std::to_string(batchSize)}, + {"number of iterations", std::to_string(niter)}, + {"number of parallel infer requests", std::to_string(nireq)}, + {"duration (ms)", std::to_string(getDurationInMilliseconds(duration_seconds))}, + }); + for (auto& nstreams : device_nstreams) { + std::stringstream ss; + ss << "number of " << nstreams.first << " streams"; + statistics->addParameters(StatisticsReport::Category::RUNTIME_CONFIG, { + {ss.str(), nstreams.second}, + }); + } + } + + // ----------------- 9. Creating infer requests and filling input blobs + // ---------------------------------------- + next_step(); + + InferRequestsQueue inferRequestsQueue(exeNetwork, nireq); + fillBlobs(inputFiles, batchSize, app_inputs_info, inferRequestsQueue.requests); + + // ----------------- 10. Measuring performance + // ------------------------------------------------------------------ + size_t progressCnt = 0; + size_t progressBarTotalCount = progressBarDefaultTotalCount; + size_t iteration = 0; + + std::stringstream ss; + ss << "Start inference " << FLAGS_api << "hronously"; + if (FLAGS_api == "async") { + if (!ss.str().empty()) { + ss << ", "; + } + ss << nireq << " inference requests"; + std::stringstream device_ss; + for (auto& nstreams : device_nstreams) { + if (!device_ss.str().empty()) { + device_ss << ", "; + } + device_ss << nstreams.second << " streams for " << nstreams.first; + } + if (!device_ss.str().empty()) { + ss << " using " << device_ss.str(); + } + } + ss << ", limits: "; + if (duration_seconds > 0) { + ss << getDurationInMilliseconds(duration_seconds) << " ms duration"; + } + if (niter != 0) { + if (duration_seconds == 0) { + progressBarTotalCount = niter; + } + if (duration_seconds > 0) { + ss << ", "; + } + ss << niter << " iterations"; + } + next_step(ss.str()); + + // warming up - out of scope + auto inferRequest = inferRequestsQueue.getIdleRequest(); + if (!inferRequest) { + IE_THROW() << "No idle Infer Requests!"; + } + if (FLAGS_api == "sync") { + inferRequest->infer(); + } else { + inferRequest->startAsync(); + } + inferRequestsQueue.waitAll(); + auto duration_ms = double_to_string(inferRequestsQueue.getLatencies()[0]); + slog::info << "First inference took " << duration_ms << " ms" << slog::endl; + if (statistics) + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"first inference time (ms)", duration_ms}}); + inferRequestsQueue.resetTimes(); + + auto startTime = Time::now(); + auto execTime = std::chrono::duration_cast(Time::now() - startTime).count(); + + /** Start inference & calculate performance **/ + /** to align number if iterations to guarantee that last infer requests are + * executed in the same conditions **/ + ProgressBar progressBar(progressBarTotalCount, FLAGS_stream_output, FLAGS_progress); + + while ((niter != 0LL && iteration < niter) || (duration_nanoseconds != 0LL && (uint64_t)execTime < duration_nanoseconds) || + (FLAGS_api == "async" && iteration % nireq != 0)) { + inferRequest = inferRequestsQueue.getIdleRequest(); + if (!inferRequest) { + IE_THROW() << "No idle Infer Requests!"; + } + + if (FLAGS_api == "sync") { + inferRequest->infer(); + } else { + // As the inference request is currently idle, the wait() adds no + // additional overhead (and should return immediately). The primary + // reason for calling the method is exception checking/re-throwing. + // Callback, that governs the actual execution can handle errors as + // well, but as it uses just error codes it has no details like ‘what()’ + // method of `std::exception` So, rechecking for any exceptions here. + inferRequest->wait(); + inferRequest->startAsync(); + } + iteration++; + + execTime = std::chrono::duration_cast(Time::now() - startTime).count(); + + if (niter > 0) { + progressBar.addProgress(1); + } else { + // calculate how many progress intervals are covered by current + // iteration. depends on the current iteration time and time of each + // progress interval. Previously covered progress intervals must be + // skipped. + auto progressIntervalTime = duration_nanoseconds / progressBarTotalCount; + size_t newProgress = execTime / progressIntervalTime - progressCnt; + progressBar.addProgress(newProgress); + progressCnt += newProgress; + } + } + + // wait the latest inference executions + inferRequestsQueue.waitAll(); + + double latency = getMedianValue(inferRequestsQueue.getLatencies()); + double totalDuration = inferRequestsQueue.getDurationInMilliseconds(); + double fps = (FLAGS_api == "sync") ? batchSize * 1000.0 / latency : batchSize * 1000.0 * iteration / totalDuration; + + if (statistics) { + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { + {"total execution time (ms)", double_to_string(totalDuration)}, + {"total number of iterations", std::to_string(iteration)}, + }); + if (device_name.find("MULTI") == std::string::npos) { + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { + {"latency (ms)", double_to_string(latency)}, + }); + } + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"throughput", double_to_string(fps)}}); + } + + progressBar.finish(); + + // ----------------- 11. Dumping statistics report + // ------------------------------------------------------------- + next_step(); + +#ifdef USE_OPENCV + if (!FLAGS_dump_config.empty()) { + dump_config(FLAGS_dump_config, config); + slog::info << "Inference Engine configuration settings were dumped to " << FLAGS_dump_config << slog::endl; + } +#endif + + if (!FLAGS_exec_graph_path.empty()) { + try { + CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo(); + execGraphInfo.serialize(FLAGS_exec_graph_path); + slog::info << "executable graph is stored to " << FLAGS_exec_graph_path << slog::endl; + } catch (const std::exception& ex) { + slog::err << "Can't get executable graph: " << ex.what() << slog::endl; + } + } + + if (perf_counts) { + std::vector> perfCounts; + for (size_t ireq = 0; ireq < nireq; ireq++) { + auto reqPerfCounts = inferRequestsQueue.requests[ireq]->getPerformanceCounts(); + if (FLAGS_pc) { + slog::info << "Performance counts for " << ireq << "-th infer request:" << slog::endl; + printPerformanceCounts(reqPerfCounts, std::cout, getFullDeviceName(ie, FLAGS_d), false); + } + perfCounts.push_back(reqPerfCounts); + } + if (statistics) { + statistics->dumpPerformanceCounters(perfCounts); + } + } + + if (statistics) + statistics->dump(); + + std::cout << "Count: " << iteration << " iterations" << std::endl; + std::cout << "Duration: " << double_to_string(totalDuration) << " ms" << std::endl; + if (device_name.find("MULTI") == std::string::npos) + std::cout << "Latency: " << double_to_string(latency) << " ms" << std::endl; + std::cout << "Throughput: " << double_to_string(fps) << " FPS" << std::endl; + } catch (const std::exception& ex) { + slog::err << ex.what() << slog::endl; + + if (statistics) { + statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, { + {"error", ex.what()}, + }); + statistics->dump(); + } + + return 3; + } + + return 0; +} diff --git a/tools/legacy/benchmark_app/progress_bar.hpp b/tools/legacy/benchmark_app/progress_bar.hpp new file mode 100644 index 00000000000..2cd1ebaa69e --- /dev/null +++ b/tools/legacy/benchmark_app/progress_bar.hpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "console_progress.hpp" + +/// @brief Responsible for progress bar handling within the benchmark_app +class ProgressBar { +public: + explicit ProgressBar(size_t totalNum, bool streamOutput = false, bool progressEnabled = false) { + _bar.reset(new ConsoleProgress(totalNum, streamOutput)); + _streamOutput = streamOutput; + _isFinished = true; + _progressEnabled = progressEnabled; + } + + void addProgress(size_t num) { + _isFinished = false; + if (_progressEnabled) { + _bar->addProgress(num); + } + } + + void finish(size_t num = 0) { + if (num > 0) { + addProgress(num); + } + _isFinished = true; + _bar->finish(); + if (_progressEnabled) { + std::cout << std::endl; + } + } + + void newBar(size_t totalNum) { + if (_isFinished) { + _bar.reset(new ConsoleProgress(totalNum, _streamOutput)); + } else { + throw std::logic_error("Cannot create a new bar. Current bar is still in progress"); + } + } + +private: + std::unique_ptr _bar; + bool _streamOutput; + bool _isFinished; + bool _progressEnabled; +}; diff --git a/tools/legacy/benchmark_app/slog.cpp b/tools/legacy/benchmark_app/slog.cpp new file mode 100644 index 00000000000..0fc7d391ef3 --- /dev/null +++ b/tools/legacy/benchmark_app/slog.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "slog.hpp" + +#include + +namespace slog { + +LogStream info("INFO", std::cout); +LogStream warn("WARNING", std::cout); +LogStream err("ERROR", std::cerr); + +LogStream::LogStream(const std::string& prefix, std::ostream& log_stream): _prefix(prefix), _new_line(true) { + _log_stream = &log_stream; +} + +// Specializing for LogStreamEndLine to support slog::endl +LogStream& LogStream::operator<<(const LogStreamEndLine& /*arg*/) { + _new_line = true; + + (*_log_stream) << std::endl; + return *this; +} + +// Specializing for LogStreamBoolAlpha to support slog::boolalpha +LogStream& LogStream::operator<<(const LogStreamBoolAlpha& /*arg*/) { + (*_log_stream) << std::boolalpha; + return *this; +} + +} // namespace slog \ No newline at end of file diff --git a/tools/legacy/benchmark_app/slog.hpp b/tools/legacy/benchmark_app/slog.hpp new file mode 100644 index 00000000000..0f5150b8ef0 --- /dev/null +++ b/tools/legacy/benchmark_app/slog.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +/** + * @brief a header file with logging facility for common samples + * @file log.hpp + */ + +#pragma once + +#include +#include + +namespace slog { +/** + * @class LogStreamEndLine + * @brief The LogStreamEndLine class implements an end line marker for a log stream + */ +class LogStreamEndLine {}; + +static constexpr LogStreamEndLine endl; + +/** + * @class LogStreamBoolAlpha + * @brief The LogStreamBoolAlpha class implements bool printing for a log stream + */ +class LogStreamBoolAlpha {}; + +static constexpr LogStreamBoolAlpha boolalpha; + +/** + * @class LogStream + * @brief The LogStream class implements a stream for sample logging + */ +class LogStream { + std::string _prefix; + std::ostream* _log_stream; + bool _new_line; + +public: + /** + * @brief A constructor. Creates an LogStream object + * @param prefix The prefix to print + */ + LogStream(const std::string& prefix, std::ostream& log_stream); + + /** + * @brief A stream output operator to be used within the logger + * @param arg Object for serialization in the logger message + */ + template + LogStream& operator<<(const T& arg) { + if (_new_line) { + (*_log_stream) << "[ " << _prefix << " ] "; + _new_line = false; + } + + (*_log_stream) << arg; + return *this; + } + + // Specializing for LogStreamEndLine to support slog::endl + LogStream& operator<<(const LogStreamEndLine&); + + // Specializing for LogStreamBoolAlpha to support slog::boolalpha + LogStream& operator<<(const LogStreamBoolAlpha&); +}; + +extern LogStream info; +extern LogStream warn; +extern LogStream err; + +} // namespace slog diff --git a/tools/legacy/benchmark_app/statistics_report.cpp b/tools/legacy/benchmark_app/statistics_report.cpp new file mode 100644 index 00000000000..8ad8443804f --- /dev/null +++ b/tools/legacy/benchmark_app/statistics_report.cpp @@ -0,0 +1,143 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "statistics_report.hpp" + +#include +#include +#include +#include +#include + +void StatisticsReport::addParameters(const Category& category, const Parameters& parameters) { + if (_parameters.count(category) == 0) + _parameters[category] = parameters; + else + _parameters[category].insert(_parameters[category].end(), parameters.begin(), parameters.end()); +} + +void StatisticsReport::dump() { + CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_report.csv"); + + auto dump_parameters = [&dumper](const Parameters& parameters) { + for (auto& parameter : parameters) { + dumper << parameter.first << parameter.second; + dumper.endLine(); + } + }; + if (_parameters.count(Category::COMMAND_LINE_PARAMETERS)) { + dumper << "Command line parameters"; + dumper.endLine(); + + dump_parameters(_parameters.at(Category::COMMAND_LINE_PARAMETERS)); + dumper.endLine(); + } + + if (_parameters.count(Category::RUNTIME_CONFIG)) { + dumper << "Configuration setup"; + dumper.endLine(); + + dump_parameters(_parameters.at(Category::RUNTIME_CONFIG)); + dumper.endLine(); + } + + if (_parameters.count(Category::EXECUTION_RESULTS)) { + dumper << "Execution results"; + dumper.endLine(); + + dump_parameters(_parameters.at(Category::EXECUTION_RESULTS)); + dumper.endLine(); + } + + slog::info << "Statistics report is stored to " << dumper.getFilename() << slog::endl; +} + +void StatisticsReport::dumpPerformanceCountersRequest(CsvDumper& dumper, const PerformaceCounters& perfCounts) { + auto performanceMapSorted = perfCountersSorted(perfCounts); + + long long total = 0L; + long long total_cpu = 0L; + + dumper << "layerName" + << "execStatus" + << "layerType" + << "execType"; + dumper << "realTime (ms)" + << "cpuTime (ms)"; + dumper.endLine(); + + for (const auto& layer : performanceMapSorted) { + dumper << layer.first; // layer name + + switch (layer.second.status) { + case InferenceEngine::InferenceEngineProfileInfo::EXECUTED: + dumper << "EXECUTED"; + break; + case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN: + dumper << "NOT_RUN"; + break; + case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT: + dumper << "OPTIMIZED_OUT"; + break; + } + dumper << layer.second.layer_type << layer.second.exec_type; + dumper << std::to_string(layer.second.realTime_uSec / 1000.0) << std::to_string(layer.second.cpu_uSec / 1000.0); + total += layer.second.realTime_uSec; + total_cpu += layer.second.cpu_uSec; + dumper.endLine(); + } + dumper << "Total" + << "" + << "" + << ""; + dumper << total / 1000.0 << total_cpu / 1000.0; + dumper.endLine(); + dumper.endLine(); +} + +void StatisticsReport::dumpPerformanceCounters(const std::vector& perfCounts) { + if ((_config.report_type.empty()) || (_config.report_type == noCntReport)) { + slog::info << "Statistics collecting for performance counters was not " + "requested. No reports are dumped." + << slog::endl; + return; + } + if (perfCounts.empty()) { + slog::info << "Performance counters are empty. No reports are dumped." << slog::endl; + return; + } + CsvDumper dumper(true, _config.report_folder + _separator + "benchmark_" + _config.report_type + "_report.csv"); + if (_config.report_type == detailedCntReport) { + for (auto& pc : perfCounts) { + dumpPerformanceCountersRequest(dumper, pc); + } + } else if (_config.report_type == averageCntReport) { + auto getAveragePerformanceCounters = [&perfCounts]() { + std::map performanceCountersAvg; + // iterate over each processed infer request and handle its PM data + for (size_t i = 0; i < perfCounts.size(); i++) { + auto performanceMapSorted = perfCountersSorted(perfCounts[i]); + // iterate over each layer from sorted vector and add required PM data + // to the per-layer maps + for (const auto& pm : performanceMapSorted) { + if (performanceCountersAvg.count(pm.first) == 0) { + performanceCountersAvg[pm.first] = perfCounts.at(i).at(pm.first); + } else { + performanceCountersAvg[pm.first].realTime_uSec += perfCounts.at(i).at(pm.first).realTime_uSec; + performanceCountersAvg[pm.first].cpu_uSec += perfCounts.at(i).at(pm.first).cpu_uSec; + } + } + } + for (auto& pm : performanceCountersAvg) { + pm.second.realTime_uSec /= perfCounts.size(); + pm.second.cpu_uSec /= perfCounts.size(); + } + return performanceCountersAvg; + }; + dumpPerformanceCountersRequest(dumper, getAveragePerformanceCounters()); + } else { + throw std::logic_error("PM data can only be collected for average or detailed report types"); + } + slog::info << "Performance counters report is stored to " << dumper.getFilename() << slog::endl; +} diff --git a/tools/legacy/benchmark_app/statistics_report.hpp b/tools/legacy/benchmark_app/statistics_report.hpp new file mode 100644 index 00000000000..bb4877da89a --- /dev/null +++ b/tools/legacy/benchmark_app/statistics_report.hpp @@ -0,0 +1,70 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "common.hpp" +#include "csv_dumper.hpp" +#include "slog.hpp" +#include +#include +#include + +// @brief statistics reports types +static constexpr char noCntReport[] = "no_counters"; +static constexpr char averageCntReport[] = "average_counters"; +static constexpr char detailedCntReport[] = "detailed_counters"; + +/// @brief Responsible for collecting of statistics and dumping to .csv file +class StatisticsReport { +public: + typedef std::map PerformaceCounters; + typedef std::vector> Parameters; + + struct Config { + std::string report_type; + std::string report_folder; + }; + + enum class Category { + COMMAND_LINE_PARAMETERS, + RUNTIME_CONFIG, + EXECUTION_RESULTS, + }; + + explicit StatisticsReport(Config config): _config(std::move(config)) { + _separator = +#if defined _WIN32 || defined __CYGWIN__ + #if defined UNICODE + L"\\"; + #else + "\\"; + #endif +#else + "/"; +#endif + if (_config.report_folder.empty()) + _separator = ""; + } + + void addParameters(const Category& category, const Parameters& parameters); + + void dump(); + + void dumpPerformanceCounters(const std::vector& perfCounts); + +private: + void dumpPerformanceCountersRequest(CsvDumper& dumper, const PerformaceCounters& perfCounts); + + // configuration of current benchmark execution + const Config _config; + + // parameters + std::map _parameters; + + // csv separator + std::string _separator; +}; diff --git a/tools/legacy/benchmark_app/utils.cpp b/tools/legacy/benchmark_app/utils.cpp new file mode 100644 index 00000000000..43e2d27fc19 --- /dev/null +++ b/tools/legacy/benchmark_app/utils.cpp @@ -0,0 +1,194 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// clang-format off +#include +#include +#include +#include "common.hpp" +#include "slog.hpp" +#include +#include +#include + +#include "utils.hpp" +// clang-format on + +#ifdef USE_OPENCV + #include +#endif + +namespace benchmark_app { +bool InputInfo::isImage() const { + if ((layout != "NCHW") && (layout != "NHWC") && (layout != "CHW") && (layout != "HWC")) + return false; + return (channels() == 3); +} +bool InputInfo::isImageInfo() const { + if (layout != "NC") + return false; + return (channels() >= 2); +} +size_t InputInfo::getDimentionByLayout(char character) const { + size_t pos = layout.find(character); + if (pos == std::string::npos) + throw std::runtime_error("Error: Can't get " + std::string(character, 1) + " from layout " + layout); + return shape.at(pos); +} +size_t InputInfo::width() const { + return getDimentionByLayout('W'); +} +size_t InputInfo::height() const { + return getDimentionByLayout('H'); +} +size_t InputInfo::channels() const { + return getDimentionByLayout('C'); +} +size_t InputInfo::batch() const { + return getDimentionByLayout('N'); +} +size_t InputInfo::depth() const { + return getDimentionByLayout('D'); +} +} // namespace benchmark_app + +uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device) { + static const std::map deviceDefaultDurationInSeconds {{"CPU", 60}, {"GPU", 60}, {"VPU", 60}, {"MYRIAD", 60}, + {"HDDL", 60}, {"FPGA", 120}, {"UNKNOWN", 120}}; + uint32_t duration = 0; + for (const auto& deviceDurationInSeconds : deviceDefaultDurationInSeconds) { + if (device.find(deviceDurationInSeconds.first) != std::string::npos) { + duration = std::max(duration, deviceDurationInSeconds.second); + } + } + if (duration == 0) { + const auto unknownDeviceIt = + find_if(deviceDefaultDurationInSeconds.begin(), deviceDefaultDurationInSeconds.end(), [](std::pair deviceDuration) { + return deviceDuration.first == "UNKNOWN"; + }); + + if (unknownDeviceIt == deviceDefaultDurationInSeconds.end()) { + throw std::logic_error("UNKNOWN device was not found in the device duration list"); + } + duration = unknownDeviceIt->second; + slog::warn << "Default duration " << duration << " seconds for unknown device '" << device << "' is used" << slog::endl; + } + return duration; +} + +std::vector split(const std::string& s, char delim) { + std::vector result; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delim)) { + result.push_back(item); + } + return result; +} + +std::vector parseDevices(const std::string& device_string) { + std::string comma_separated_devices = device_string; + if (comma_separated_devices.find(":") != std::string::npos) { + comma_separated_devices = comma_separated_devices.substr(comma_separated_devices.find(":") + 1); + } + if ((comma_separated_devices == "MULTI") || (comma_separated_devices == "HETERO")) + return std::vector(); + auto devices = split(comma_separated_devices, ','); + for (auto& device : devices) + device = device.substr(0, device.find_first_of(".(")); + return devices; +} + +std::map parseNStreamsValuePerDevice(const std::vector& devices, const std::string& values_string) { + // Format: :,: or just + std::map result; + auto device_value_strings = split(values_string, ','); + for (auto& device_value_string : device_value_strings) { + auto device_value_vec = split(device_value_string, ':'); + if (device_value_vec.size() == 2) { + auto device_name = device_value_vec.at(0); + auto nstreams = device_value_vec.at(1); + auto it = std::find(devices.begin(), devices.end(), device_name); + if (it != devices.end()) { + result[device_name] = nstreams; + } else { + throw std::logic_error("Can't set nstreams value " + std::string(nstreams) + " for device '" + device_name + "'! Incorrect device name!"); + } + } else if (device_value_vec.size() == 1) { + auto value = device_value_vec.at(0); + for (auto& device : devices) { + result[device] = value; + } + } else if (device_value_vec.size() != 0) { + throw std::runtime_error("Unknown string format: " + values_string); + } + } + return result; +} + +size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info) { + size_t batch_size = 0; + for (auto& info : inputs_info) { + std::size_t batch_index = info.second.layout.find("N"); + if (batch_index != std::string::npos) { + if (batch_size == 0) + batch_size = info.second.shape[batch_index]; + else if (batch_size != info.second.shape[batch_index]) + throw std::logic_error("Can't deterimine batch size: batch is " + "different for different inputs!"); + } + } + if (batch_size == 0) + batch_size = 1; + return batch_size; +} + +std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes) { + std::stringstream ss; + for (auto& shape : shapes) { + if (!ss.str().empty()) + ss << ", "; + ss << "\'" << shape.first << "': ["; + for (size_t i = 0; i < shape.second.size(); i++) { + if (i > 0) + ss << ", "; + ss << shape.second.at(i); + } + ss << "]"; + } + return ss.str(); +} + +#ifdef USE_OPENCV +void dump_config(const std::string& filename, const std::map>& config) { + cv::FileStorage fs(filename, cv::FileStorage::WRITE); + if (!fs.isOpened()) + throw std::runtime_error("Error: Can't open config file : " + filename); + for (auto device_it = config.begin(); device_it != config.end(); ++device_it) { + fs << device_it->first << "{:"; + for (auto param_it = device_it->second.begin(); param_it != device_it->second.end(); ++param_it) + fs << param_it->first << param_it->second; + fs << "}"; + } + fs.release(); +} + +void load_config(const std::string& filename, std::map>& config) { + cv::FileStorage fs(filename, cv::FileStorage::READ); + if (!fs.isOpened()) + throw std::runtime_error("Error: Can't load config file : " + filename); + cv::FileNode root = fs.root(); + for (auto it = root.begin(); it != root.end(); ++it) { + auto device = *it; + if (!device.isMap()) { + throw std::runtime_error("Error: Can't parse config file : " + filename); + } + for (auto iit = device.begin(); iit != device.end(); ++iit) { + auto item = *iit; + config[device.name()][item.name()] = item.string(); + } + } +} +#endif \ No newline at end of file diff --git a/tools/legacy/benchmark_app/utils.hpp b/tools/legacy/benchmark_app/utils.hpp new file mode 100644 index 00000000000..d2923002242 --- /dev/null +++ b/tools/legacy/benchmark_app/utils.hpp @@ -0,0 +1,123 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +namespace benchmark_app { +struct InputInfo { + InferenceEngine::Precision precision; + InferenceEngine::SizeVector shape; + std::string layout; + bool isImage() const; + bool isImageInfo() const; + size_t getDimentionByLayout(char character) const; + size_t width() const; + size_t height() const; + size_t channels() const; + size_t batch() const; + size_t depth() const; +}; +using InputsInfo = std::map; +} // namespace benchmark_app + +std::vector parseDevices(const std::string& device_string); +uint32_t deviceDefaultDeviceDurationInSeconds(const std::string& device); +std::map parseNStreamsValuePerDevice(const std::vector& devices, const std::string& values_string); +std::string getShapesString(const InferenceEngine::ICNNNetwork::InputShapes& shapes); +size_t getBatchSize(const benchmark_app::InputsInfo& inputs_info); +std::vector split(const std::string& s, char delim); + +template +std::map parseInputParameters(const std::string parameter_string, const std::map& input_info) { + // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all + // inputs) + std::map return_value; + std::string search_string = parameter_string; + auto start_pos = search_string.find_first_of('['); + while (start_pos != std::string::npos) { + auto end_pos = search_string.find_first_of(']'); + if (end_pos == std::string::npos) + break; + auto input_name = search_string.substr(0, start_pos); + auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1); + if (!input_name.empty()) { + return_value[input_name] = input_value; + } else { + for (auto& item : input_info) { + return_value[item.first] = input_value; + } + } + search_string = search_string.substr(end_pos + 1); + if (search_string.empty() || search_string.front() != ',') + break; + search_string = search_string.substr(1); + start_pos = search_string.find_first_of('['); + } + if (!search_string.empty()) + throw std::logic_error("Can't parse input parameter string: " + parameter_string); + return return_value; +} + +template +benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size, + const std::map& input_info, bool& reshape_required) { + std::map shape_map = parseInputParameters(shape_string, input_info); + std::map layout_map = parseInputParameters(layout_string, input_info); + reshape_required = false; + benchmark_app::InputsInfo info_map; + for (auto& item : input_info) { + benchmark_app::InputInfo info; + auto name = item.first; + auto descriptor = item.second->getTensorDesc(); + // Precision + info.precision = descriptor.getPrecision(); + // Shape + if (shape_map.count(name)) { + std::vector parsed_shape; + for (auto& dim : split(shape_map.at(name), ',')) { + parsed_shape.push_back(std::stoi(dim)); + } + info.shape = parsed_shape; + reshape_required = true; + } else { + info.shape = descriptor.getDims(); + } + // Layout + if (layout_map.count(name)) { + info.layout = layout_map.at(name); + std::transform(info.layout.begin(), info.layout.end(), info.layout.begin(), ::toupper); + } else { + std::stringstream ss; + ss << descriptor.getLayout(); + info.layout = ss.str(); + } + // Update shape with batch if needed + if (batch_size != 0) { + std::size_t batch_index = info.layout.find("N"); + if ((batch_index != std::string::npos) && (info.shape.at(batch_index) != batch_size)) { + info.shape[batch_index] = batch_size; + reshape_required = true; + } + } + info_map[name] = info; + } + return info_map; +} + +template +benchmark_app::InputsInfo getInputsInfo(const std::string& shape_string, const std::string& layout_string, const size_t batch_size, + const std::map& input_info) { + bool reshape_required = false; + return getInputsInfo(shape_string, layout_string, batch_size, input_info, reshape_required); +} + +#ifdef USE_OPENCV +void dump_config(const std::string& filename, const std::map>& config); +void load_config(const std::string& filename, std::map>& config); +#endif \ No newline at end of file diff --git a/tools/legacy/benchmark_app/w_dirent.h b/tools/legacy/benchmark_app/w_dirent.h new file mode 100644 index 00000000000..5352a8f8b13 --- /dev/null +++ b/tools/legacy/benchmark_app/w_dirent.h @@ -0,0 +1,176 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#if defined(_WIN32) + + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN_UNDEF + #endif + + #ifndef NOMINMAX + #define NOMINMAX + #define NOMINMAX_UNDEF + #endif + + #if defined(_M_IX86) && !defined(_X86_) && !defined(_AMD64_) + #define _X86_ + #endif + + #if defined(_M_X64) && !defined(_X86_) && !defined(_AMD64_) + #define _AMD64_ + #endif + + #if defined(_M_ARM) && !defined(_ARM_) && !defined(_ARM64_) + #define _ARM_ + #endif + + #if defined(_M_ARM64) && !defined(_ARM_) && !defined(_ARM64_) + #define _ARM64_ + #endif + + // clang-format off + #include + #include + #include + #include + #include + // clang-format on + + // Copied from linux libc sys/stat.h: + #define S_ISREG(m) (((m)&S_IFMT) == S_IFREG) + #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) + +/// @brief structure to store directory names +struct dirent { + char* d_name; + + explicit dirent(const wchar_t* wsFilePath) { + size_t i; + auto slen = wcslen(wsFilePath); + d_name = static_cast(malloc(slen + 1)); + wcstombs_s(&i, d_name, slen + 1, wsFilePath, slen); + } + ~dirent() { + free(d_name); + } +}; + +/// @brief class to store directory data (files meta) +class DIR { + WIN32_FIND_DATAA FindFileData; + HANDLE hFind; + dirent* next; + + static inline bool endsWith(const std::string& src, const char* with) { + int wl = static_cast(strlen(with)); + int so = static_cast(src.length()) - wl; + if (so < 0) + return false; + return 0 == strncmp(with, &src[so], wl); + } + +public: + DIR(const DIR& other) = delete; + DIR(DIR&& other) = delete; + DIR& operator=(const DIR& other) = delete; + DIR& operator=(DIR&& other) = delete; + + explicit DIR(const char* dirPath): next(nullptr) { + std::string ws = dirPath; + if (endsWith(ws, "\\")) + ws += "*"; + else + ws += "\\*"; + hFind = FindFirstFileA(ws.c_str(), &FindFileData); + FindFileData.dwReserved0 = hFind != INVALID_HANDLE_VALUE; + } + + ~DIR() { + if (!next) + delete next; + next = nullptr; + FindClose(hFind); + } + + /** + * @brief Check file handler is valid + * @return status True(success) or False(fail) + */ + bool isValid() const { + return (hFind != INVALID_HANDLE_VALUE && FindFileData.dwReserved0); + } + + /** + * @brief Add directory to directory names struct + * @return pointer to directory names struct + */ + dirent* nextEnt() { + if (next != nullptr) + delete next; + next = nullptr; + + if (!FindFileData.dwReserved0) + return nullptr; + + wchar_t wbuf[4096]; + + size_t outSize; + mbstowcs_s(&outSize, wbuf, 4094, FindFileData.cFileName, 4094); + next = new dirent(wbuf); + FindFileData.dwReserved0 = FindNextFileA(hFind, &FindFileData); + return next; + } +}; + +/** + * @brief Create directory data struct element + * @param string directory path + * @return pointer to directory data struct element + */ +static DIR* opendir(const char* dirPath) { + auto dp = new DIR(dirPath); + if (!dp->isValid()) { + delete dp; + return nullptr; + } + return dp; +} + +/** + * @brief Walk throw directory data struct + * @param pointer to directory data struct + * @return pointer to directory data struct next element + */ +static struct dirent* readdir(DIR* dp) { + return dp->nextEnt(); +} + +/** + * @brief Remove directory data struct + * @param pointer to struct directory data + * @return void + */ +static void closedir(DIR* dp) { + delete dp; +} + + #ifdef WIN32_LEAN_AND_MEAN_UNDEF + #undef WIN32_LEAN_AND_MEAN + #undef WIN32_LEAN_AND_MEAN_UNDEF + #endif + + #ifdef NOMINMAX_UNDEF + #undef NOMINMAX_UNDEF + #undef NOMINMAX + #endif + +#else + + #include + #include + +#endif From d4f77f1d3e880fb893f61f0c2fc90a5c7d4f2cc0 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Mon, 28 Feb 2022 12:30:21 +0300 Subject: [PATCH 117/310] Mute 'maybe-uninitialized' error for RELWITHDEBINFO in intel_gpu (#10682) --- src/plugins/intel_gpu/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/CMakeLists.txt b/src/plugins/intel_gpu/CMakeLists.txt index 06bf07bb4c8..9b30bbd67a5 100644 --- a/src/plugins/intel_gpu/CMakeLists.txt +++ b/src/plugins/intel_gpu/CMakeLists.txt @@ -46,7 +46,8 @@ set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_REL # Workaround to avoid warnings during LTO build if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized") + set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS_RELEASE "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized" + LINK_FLAGS_RELWITHDEBINFO "-Wno-error=maybe-uninitialized -Wno-maybe-uninitialized") endif() if(ENABLE_TESTS) From b9ef57112e73dd11c4da3c3aeed476fcc6cc0b5f Mon Sep 17 00:00:00 2001 From: Maxim Gordeev Date: Mon, 28 Feb 2022 12:31:01 +0300 Subject: [PATCH 118/310] [IE Samples] Fixed memory allocation problem for speech sample (#10671) --- samples/cpp/speech_sample/main.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index 135f310aa26..f62d9f009f0 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -562,7 +562,9 @@ int main(int argc, char* argv[]) { "but in fact we were not able to cast input to Tensor"); throw std::logic_error(errMessage); } - memcpy(minput.data(), inputFrame[i], minput.get_byte_size()); + memcpy(minput.data(), + inputFrame[i], + numFramesThisBatch * numFrameElementsInput[i] * sizeof(float)); // Used to infer fewer frames than the batch size if (batchSize != numFramesThisBatch) { memset(minput.data() + numFramesThisBatch * numFrameElementsInput[i], From 1ceb9729e949e56b43b70d32ec6c7b7f606a4ac4 Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Mon, 28 Feb 2022 14:06:17 +0300 Subject: [PATCH 119/310] [CPU] friendly name duplication fixed for the TypeRelaxed case (#10486) --- .../move_eltwise_up_data_movement.cpp | 3 ++ .../move_eltwise_up_data_movement_test.cpp | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp b/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp index 74eb8dc90cb..ab5717900f4 100644 --- a/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp +++ b/src/plugins/intel_cpu/src/ngraph_transformations/move_eltwise_up_data_movement.cpp @@ -95,6 +95,9 @@ ov::intel_cpu::MoveEltwiseUpThroughDataMov::MoveEltwiseUpThroughDataMov() { ngraph::OutputVector eltwiseInputs = eltwise->input_values(); eltwiseInputs[0] = child->input_value(0); auto newEltwise = eltwise->clone_with_new_inputs(eltwiseInputs); + // WA: it's necessary to set empty friendly name here + // to avoid name duplication in TypeRelaxed cases + newEltwise->set_friendly_name(""); ngraph::copy_runtime_info(eltwise, newEltwise); ngraph::OutputVector childInputs = child->input_values(); diff --git a/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp b/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp index 1358440ec98..969aa008c98 100644 --- a/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp +++ b/src/tests/unit/cpu/ngraph_transformations/move_eltwise_up_data_movement_test.cpp @@ -6,6 +6,7 @@ #include #include +#include "ngraph_ops/type_relaxed.hpp" #include #include @@ -49,6 +50,38 @@ TEST_F(MoveEltwiseUpThroughDataMovTest, SingleUnaryEltwise) { } } +TEST_F(MoveEltwiseUpThroughDataMovTest, TypeRelaxedEltwise) { + const ngraph::Shape shape{1, 3, 224, 224}; + const std::vector input_order = {3, 2, 1, 0}; + { + auto input = std::make_shared(ngraph::element::f32, shape); + auto intermediate_op = std::make_shared(input, 0, 6); + + auto transpose_const = + ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{input_order.size()}, input_order); + auto transpose = std::make_shared(intermediate_op, transpose_const); + + auto mul_const = ngraph::opset8::Constant::create(ngraph::element::f32, {}, {2.f}); + auto multiply = std::make_shared>(transpose, mul_const); + + function = std::make_shared(ngraph::NodeVector{multiply}, ngraph::ParameterVector{input}); + manager.register_pass(); + } + { + auto input = std::make_shared(ngraph::element::f32, shape); + auto intermediate_op = std::make_shared(input, 0, 6); + + auto mul_const = ngraph::opset8::Constant::create(ngraph::element::f32, {}, {2.f}); + auto multiply = std::make_shared>(intermediate_op, mul_const); + + auto transpose_const = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{input_order.size()}, input_order); + auto transpose = std::make_shared(multiply, transpose_const); + + function_ref = + std::make_shared(ngraph::NodeVector{transpose}, ngraph::ParameterVector{input}); + } +} + TEST_F(MoveEltwiseUpThroughDataMovTest, EltwiseSequence) { const ngraph::Shape shape{1, 3, 224, 224}; const std::vector input_order = {1, 2, 0, 3}; From bed0adf5ef2d84d57c8c06e2740a578829b3a044 Mon Sep 17 00:00:00 2001 From: Maxim Shevtsov Date: Mon, 28 Feb 2022 15:04:03 +0300 Subject: [PATCH 120/310] creating remote ocl buffer/tensor per request, to avoid simulteneous locking of the same ocl buffer when auto-batching is used (#10607) --- samples/cpp/benchmark_app/main.cpp | 7 ++- .../benchmark_app/remote_tensors_filling.cpp | 63 ++++++++++--------- .../benchmark_app/remote_tensors_filling.hpp | 3 +- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index b4613fac299..3518ffc5eee 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -789,8 +789,11 @@ int main(int argc, char* argv[]) { std::map inputsData; if (isFlagSetInCommandLine("use_device_mem")) { if (device_name.find("GPU") == 0) { - inputsData = - ::gpu::get_remote_input_tensors(inputFiles, app_inputs_info, compiledModel, clInputsBuffer); + inputsData = ::gpu::get_remote_input_tensors(inputFiles, + app_inputs_info, + compiledModel, + clInputsBuffer, + inferRequestsQueue.requests.size()); useGpuMem = true; } else if (device_name.find("CPU") == 0) { if (newInputType) { diff --git a/samples/cpp/benchmark_app/remote_tensors_filling.cpp b/samples/cpp/benchmark_app/remote_tensors_filling.cpp index 0f2065c2979..40bc581d153 100644 --- a/samples/cpp/benchmark_app/remote_tensors_filling.cpp +++ b/samples/cpp/benchmark_app/remote_tensors_filling.cpp @@ -69,7 +69,8 @@ std::map get_remote_input_tensors( const std::map>& inputFiles, const std::vector& app_inputs_info, const ov::CompiledModel& compiledModel, - std::vector& clBuffer) { + std::vector& clBuffer, + size_t num_requests) { #ifdef HAVE_DEVICE_MEM_SUPPORT slog::info << "Device memory will be used for input and output blobs" << slog::endl; if (inputFiles.size()) { @@ -82,43 +83,45 @@ std::map get_remote_input_tensors( auto& oclContext = static_cast(context); auto oclInstance = std::make_shared(oclContext.get()); - for (auto& inputs_info : app_inputs_info) { - for (auto& input : inputs_info) { - // Fill random - slog::info << "Prepare remote blob for input '" << input.first << "' with random values (" - << std::string((input.second.is_image() ? "image" : "some binary data")) << " is expected)" - << slog::endl; + for (int i = 0; i < num_requests; i++) { + for (auto& inputs_info : app_inputs_info) { + for (auto& input : inputs_info) { + // Fill random + slog::info << "Prepare remote blob for input '" << input.first << "' with random values (" + << std::string((input.second.is_image() ? "image" : "some binary data")) << " is expected)" + << slog::endl; - // Creating and filling shared buffers - cl_int err; - auto elementsNum = std::accumulate(begin(input.second.dataShape), - end(input.second.dataShape), - 1, - std::multiplies()); - auto inputSize = elementsNum * input.second.type.bitwidth() / 8; + // Creating and filling shared buffers + cl_int err; + auto elementsNum = std::accumulate(begin(input.second.dataShape), + end(input.second.dataShape), + 1, + std::multiplies()); + auto inputSize = elementsNum * input.second.type.bitwidth() / 8; - clBuffer.push_back( - cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err)); + clBuffer.push_back( + cl::Buffer(oclInstance->_context, CL_MEM_READ_WRITE, (cl::size_type)inputSize, NULL, &err)); - void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(clBuffer.back(), - CL_TRUE, - CL_MEM_READ_WRITE, - 0, - (cl::size_type)inputSize); + void* mappedPtr = oclInstance->_queue.enqueueMapBuffer(clBuffer.back(), + CL_TRUE, + CL_MEM_READ_WRITE, + 0, + (cl::size_type)inputSize); - auto tensor = oclContext.create_tensor(input.second.type, input.second.dataShape, clBuffer.back().get()); - remoteTensors[input.first].push_back(tensor); + auto tensor = + oclContext.create_tensor(input.second.type, input.second.dataShape, clBuffer.back().get()); + remoteTensors[input.first].push_back(tensor); - if (inputFiles.empty()) { - // Filling in random data - fill_buffer(mappedPtr, elementsNum, input.second.type); - } else { - // TODO: add filling with real image data + if (inputFiles.empty()) { + // Filling in random data + fill_buffer(mappedPtr, elementsNum, input.second.type); + } else { + // TODO: add filling with real image data + } + oclInstance->_queue.enqueueUnmapMemObject(clBuffer.back(), mappedPtr); } - oclInstance->_queue.enqueueUnmapMemObject(clBuffer.back(), mappedPtr); } } - return remoteTensors; #else IE_THROW() << "Device memory requested for GPU device, but OpenCL was not linked"; diff --git a/samples/cpp/benchmark_app/remote_tensors_filling.hpp b/samples/cpp/benchmark_app/remote_tensors_filling.hpp index 7cb919f565f..4e8555b844e 100644 --- a/samples/cpp/benchmark_app/remote_tensors_filling.hpp +++ b/samples/cpp/benchmark_app/remote_tensors_filling.hpp @@ -61,7 +61,8 @@ std::map get_remote_input_tensors( const std::map>& inputFiles, const std::vector& app_inputs_info, const ov::CompiledModel& compiledModel, - std::vector& clBuffer); + std::vector& clBuffer, + size_t num_requests); std::map get_remote_output_tensors(const ov::CompiledModel& compiledModel, std::map& clBuffer); From f6fbef1f66beb0d558574dc108bc3b4022e39400 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Mon, 28 Feb 2022 15:06:03 +0300 Subject: [PATCH 121/310] Allow to specify conformance by shape_type (#10667) * Init * the solution * Remove extra * Update CMakeLists.txt * Readme * fix build * dd --- .../conformance/subgraphs_dumper/CMakeLists.txt | 14 ++++++++++---- .../plugin/conformance/test_runner/README.md | 2 ++ .../conformance_infra/include/gflag_config.hpp | 4 ++++ .../include/read_ir_test/read_ir.hpp | 8 ++++++++ .../test_runner/conformance_infra/src/main.cpp | 15 ++++++++++++++- .../src/read_ir_test/read_ir.cpp | 17 +++++++++-------- 6 files changed, 47 insertions(+), 13 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt b/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt index 7570bcabcb9..a0cc8d349e8 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/CMakeLists.txt @@ -4,6 +4,13 @@ set(TARGET_NAME subgraphsDumper) +list(APPEND DEPENDENCIES + gflags + inference_engine + commonTestUtils + pugixml::static +) + addIeTargetTest( NAME ${TARGET_NAME} ROOT ${CMAKE_CURRENT_SOURCE_DIR} @@ -11,10 +18,9 @@ addIeTargetTest( ${CMAKE_CURRENT_SOURCE_DIR}/include LINK_LIBRARIES PRIVATE - gflags - inference_engine - commonTestUtils - pugixml::static + ${DEPENDENCIES} + DEPENDENCIES + ${DEPENDENCIES} ADD_CPPLINT ) diff --git a/src/tests/functional/plugin/conformance/test_runner/README.md b/src/tests/functional/plugin/conformance/test_runner/README.md index 2e05c79aaec..1e56d86461d 100644 --- a/src/tests/functional/plugin/conformance/test_runner/README.md +++ b/src/tests/functional/plugin/conformance/test_runner/README.md @@ -45,6 +45,8 @@ The target is able to take the following command-line arguments: * `--save_report_timeout` allows to try to save report in cycle using timeout (in seconds). * `--output_folder` Paths to the output folder to save report. * `--extract_body` allows to count extracted operation bodies to report. +* `--shape_mode` Optional. Allows to run `static`, `dynamic` or both scenarios. Default value is empty string allows to run both scenarios. Possible values + are `static`, `dynamic`, `` * All `gtest` command-line parameters The result of execution is `report.xml` file. It demonstrates tests statistic like pass rate, passed, crashed, skipped failed tests and plugin implementation diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp index 8a0c9f6b67b..836bf0a0c4f 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/gflag_config.hpp @@ -33,6 +33,8 @@ static const char skip_config_path_message[] = "Optional. Allows to specify path static const char config_path_message[] = "Optional. Allows to specify path to file contains plugin config. " "Default value is empty string."; static const char extract_body_message[] = "Optional. Allows to count extracted operation bodies to report. Default value is false."; +static const char shape_mode_message[] = "Optional. Allows to run `static`, `dynamic` or both scenarios. Default value is empty string allows to run both" + " scenarios. Possible values are `static`, `dynamic`, ``"; DEFINE_bool(h, false, help_message); @@ -47,6 +49,7 @@ DEFINE_bool(disable_test_config, true, disable_test_config_message); DEFINE_bool(extend_report, false, extend_report_config_message); DEFINE_bool(report_unique_name, false, report_unique_name_message); DEFINE_bool(extract_body, false, extract_body_message); +DEFINE_string(shape_mode, "", shape_mode_message); /** * @brief This function shows a help message @@ -68,6 +71,7 @@ static void showUsage() { std::cout << " --input_folders \"\" " << input_folders_message << std::endl; std::cout << " --output_folder \"\" " << output_folder_message << std::endl; std::cout << " --plugin_lib_name " << output_folder_message << std::endl; + std::cout << " --shape_mode \"\" " << shape_mode_message << std::endl; } } // namespace conformance diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/read_ir_test/read_ir.hpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/read_ir_test/read_ir.hpp index f2c7ea1903b..c0dab47a33c 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/read_ir_test/read_ir.hpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/include/read_ir_test/read_ir.hpp @@ -10,6 +10,14 @@ namespace ov { namespace test { namespace subgraph { +enum ShapeMode { + DYNAMIC, + STATIC, + BOTH +}; + +extern ShapeMode shapeMode; + using ReadIRParams = std::tuple< std::string, // IR path std::string, // Target Device diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp index 0aaa25761ef..3ec2b46441b 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/main.cpp @@ -2,12 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#ifdef _WIN32 +#include +#endif + #include "gtest/gtest.h" #include "common_test_utils/file_utils.hpp" #include "functional_test_utils/skip_tests_config.hpp" -#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/layer_test_utils/environment.hpp" +#include "read_ir_test/read_ir.hpp" #include "gflag_config.hpp" #include "conformance.hpp" @@ -42,6 +48,13 @@ int main(int argc, char* argv[]) { LayerTestsUtils::Summary::setSaveReportWithUniqueName(FLAGS_report_unique_name); LayerTestsUtils::Summary::setOutputFolder(FLAGS_output_folder); LayerTestsUtils::Summary::setSaveReportTimeout(FLAGS_save_report_timeout); + if (FLAGS_shape_mode == std::string("static")) { + ov::test::subgraph::shapeMode = ov::test::subgraph::ShapeMode::STATIC; + } else if (FLAGS_shape_mode == std::string("dynamic")) { + ov::test::subgraph::shapeMode = ov::test::subgraph::ShapeMode::DYNAMIC; + } else if (FLAGS_shape_mode != std::string("")) { + throw std::runtime_error("Incorrect value for `--shape_mode`. Should be `dynamic`, `static` or ``. Current value is `" + FLAGS_shape_mode + "`"); + } // ---------------------------Initialization of Gtest env ----------------------------------------------- ov::test::conformance::targetDevice = FLAGS_device.c_str(); diff --git a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp index 44a36a53f0b..551bf062d28 100644 --- a/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp +++ b/src/tests/functional/plugin/conformance/test_runner/conformance_infra/src/read_ir_test/read_ir.cpp @@ -24,6 +24,9 @@ namespace ov { namespace test { namespace subgraph { + +ShapeMode shapeMode = ShapeMode::BOTH; + std::string ReadIRTest::getTestCaseName(const testing::TestParamInfo &obj) { using namespace CommonTestUtils; std::string pathToModel, deviceName; @@ -158,19 +161,17 @@ void ReadIRTest::SetUp() { } } } - std::vector staticShapes; - for (const auto param : function->get_parameters()) { - if (param->get_partial_shape().is_static()) { - staticShapes.push_back(param->get_shape()); - } else { - staticShapes.push_back(param->get_partial_shape().get_max_shape()); - } - } std::vector inputShapes; for (const auto& param : function -> get_parameters()) { if (param->get_partial_shape().is_static()) { + if (ov::test::subgraph::shapeMode == ov::test::subgraph::ShapeMode::DYNAMIC) { + GTEST_SKIP() << "Static cases are skipped according `shape_mode`"; + } inputShapes.push_back(InputShape{{}, {param->get_shape()}}); } else { + if (ov::test::subgraph::shapeMode == ov::test::subgraph::ShapeMode::STATIC) { + GTEST_SKIP() << "Dynamic cases are skipped according `shape_mode`"; + } ov::Shape midShape; for (const auto s : param->get_partial_shape()) { int dimValue = s.get_length(); From 7d0d950b9a1278aafa54925903e4e113f4dd45dd Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Mon, 28 Feb 2022 15:30:33 +0300 Subject: [PATCH 122/310] Add pytorch Resnext101 from fb into documentation (#10665) --- .../prepare_model/convert_model/Convert_Model_From_PyTorch.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md index 35aa0741a88..5915feb05c2 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_PyTorch.md @@ -40,6 +40,8 @@ Here is the list of models that are tested and guaranteed to be supported. Howev instruction which is used instead of steps 2 and 3 of [regular instructions](#typical-pytorch). * [BERT_NER](https://github.com/kamalkraj/BERT-NER) topology can be converted using steps described in [Convert PyTorch* BERT-NER to the IR](pytorch_specific/Convert_Bert_ner.md) instruction which is used instead of steps 2 and 3 of [regular instructions](#typical-pytorch). +* ResNeXt-101 from [facebookresearch/semi-supervised-ImageNet1K-models](https://github.com/facebookresearch/semi-supervised-ImageNet1K-models) + can be converted using [regular instructions](#typical-pytorch). ## Typical steps to convert PyTorch\* model From 33ad1b96d4aa5f9ff7904c0b45fc88169e71c3fd Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Mon, 28 Feb 2022 16:26:07 +0300 Subject: [PATCH 123/310] [POT] Update samples and samplers with the new DataLoader format (#10595) * Update samples and samplers with the new DataLoader format * Update with utils * Pylint updates * Update metric with the exception * Pylint * Update with the exception * Pylint * Revert index sampler changes * Update ImageLoader & SimplifiedEngine * Update with the different solution * Remove utils * Pylint * Remove list wrapping * Remove list from meta_data --- tools/pot/openvino/tools/pot/api/metric.py | 3 ++- .../3d_segmentation/3d_segmentation_sample.py | 11 ++--------- .../classification/classification_sample.py | 10 ++-------- .../face_detection/face_detection_sample.py | 12 +----------- .../api/samples/object_detection/data_loader.py | 8 ++++---- .../samples/segmentation/segmentation_sample.py | 14 +++----------- tools/pot/openvino/tools/pot/engines/ie_engine.py | 8 ++++++-- 7 files changed, 20 insertions(+), 46 deletions(-) diff --git a/tools/pot/openvino/tools/pot/api/metric.py b/tools/pot/openvino/tools/pot/api/metric.py index f5cb021cd2c..bff089f5b64 100644 --- a/tools/pot/openvino/tools/pot/api/metric.py +++ b/tools/pot/openvino/tools/pot/api/metric.py @@ -12,9 +12,10 @@ class Metric(ABC): self.reset() @property - @abstractmethod def value(self): """ Returns accuracy metric value for the last model output. """ + raise Exception('The value() property should be implemented to use this metric ' + 'with AccuracyAwareQuantization algorithm!') @property @abstractmethod diff --git a/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py b/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py index 2056d7e215a..4d0906f68fc 100644 --- a/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py +++ b/tools/pot/openvino/tools/pot/api/samples/3d_segmentation/3d_segmentation_sample.py @@ -38,9 +38,8 @@ class BRATSDataLoader(DataLoader): mask_path = os.path.join(self.config.mask_dir, self._img_ids[index]) image_path = os.path.join(self.config.data_source, self._img_ids[index]) - annotation = (index, self._read_image(mask_path)) image, image_meta = self._preprocess_image(self._read_image(image_path)) - return annotation, image, image_meta + return image, self._read_image(mask_path), image_meta def __len__(self): """ Returns size of the dataset """ @@ -120,13 +119,6 @@ class DiceIndex(Metric): self._name = 'dice_index' self._overall_metric = [] - @property - def value(self): - """ Returns accuracy metric value for the last model output. - Possible format: {metric_name: [metric_values_per_image]} - """ - return {self._name: [np.mean(self._overall_metric[-1])]} - @property def avg_value(self): """ Returns accuracy metric value for all model outputs. @@ -195,6 +187,7 @@ class SegmentationEngine(IEEngine): processed_outputs = [] for output, meta in zip(outputs.values(), metadata): # Resize to bounding box size and extend to mask size + output = output[0] low = meta['bbox'][0] high = meta['bbox'][1] box_shape = tuple((high - low).astype(np.int32)) diff --git a/tools/pot/openvino/tools/pot/api/samples/classification/classification_sample.py b/tools/pot/openvino/tools/pot/api/samples/classification/classification_sample.py index 4cc2b7db1d5..33db1bdf51f 100644 --- a/tools/pot/openvino/tools/pot/api/samples/classification/classification_sample.py +++ b/tools/pot/openvino/tools/pot/api/samples/classification/classification_sample.py @@ -29,9 +29,8 @@ class ImageNetDataLoader(DataLoader): if index >= len(self): raise IndexError - annotation = (index, self._annotations[self._img_ids[index]])\ - if self._annotations else (index, None) - return annotation, self._read_image(self._img_ids[index]) + annotation = self._annotations[self._img_ids[index]] if self._annotations else None + return self._read_image(self._img_ids[index]), annotation # Methods specific to the current implementation @staticmethod @@ -84,11 +83,6 @@ class Accuracy(Metric): self._name = 'accuracy@top{}'.format(self._top_k) self._matches = [] - @property - def value(self): - """ Returns accuracy metric value for the last model output. """ - return {self._name: self._matches[-1]} - @property def avg_value(self): """ Returns accuracy metric value for all model outputs. """ diff --git a/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py b/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py index d9e6cc90aec..c7051ba090e 100644 --- a/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py +++ b/tools/pot/openvino/tools/pot/api/samples/face_detection/face_detection_sample.py @@ -46,8 +46,7 @@ class WiderFaceLoader(DataLoader): if index >= len(self): raise IndexError - annotation = (index, self._annotations[self._img_ids[index]]) - return annotation, self._read_image(self._img_ids[index]) + return self._read_image(self._img_ids[index]), self._annotations[self._img_ids[index]] def __len__(self): """ Returns size of the dataset """ @@ -312,15 +311,6 @@ class Recall(Metric): self._n_recorded_faces = [] self._n_total_preds = [] - @property - def value(self): - """ Returns metric value for the last model output. - Possible format: {metric_name: [metric_values_per_image]} - """ - tp = np.cumsum(self._true_positives[-1])[np.arange(self._n_total_preds[-1])] - recalls = tp / np.maximum(self._n_recorded_faces[-1], np.finfo(np.float64).eps) - return {self._name: [recalls[-1]]} - @property def avg_value(self): """ Returns average metric value for all model outputs. diff --git a/tools/pot/openvino/tools/pot/api/samples/object_detection/data_loader.py b/tools/pot/openvino/tools/pot/api/samples/object_detection/data_loader.py index 0a400fd9919..3c2856d794d 100644 --- a/tools/pot/openvino/tools/pot/api/samples/object_detection/data_loader.py +++ b/tools/pot/openvino/tools/pot/api/samples/object_detection/data_loader.py @@ -12,8 +12,8 @@ from openvino.tools.pot import DataLoader class COCOLoader(DataLoader): def __init__(self, config): super().__init__(config) - self.images_path = config.images_path - self.annotation_path = config.annotation_path + self.images_path = self.config.images_path + self.annotation_path = self.config.annotation_path self.images = os.listdir(self.images_path) self.labels = None self.data, self.bbox = self.prepare_annotation() @@ -61,8 +61,8 @@ class COCOLoader(DataLoader): annotation = {'boxes': bbox, 'labels': labels, 'iscrowd': iscrowd, 'x_maxs': x_maxs, 'x_mins': x_mins, 'y_maxs': y_maxs, 'y_mins': y_mins} - annotation = (index, [annotation, shape_image]) - return annotation, self._read_and_preprocess_image(self.images_path + self.data[index]['file_name']) + annotation = [annotation, shape_image] + return self._read_and_preprocess_image(self.images_path + self.data[index]['file_name']), annotation def __len__(self): return len(self.images) diff --git a/tools/pot/openvino/tools/pot/api/samples/segmentation/segmentation_sample.py b/tools/pot/openvino/tools/pot/api/samples/segmentation/segmentation_sample.py index 0eacbe8937d..ec20ee4615a 100644 --- a/tools/pot/openvino/tools/pot/api/samples/segmentation/segmentation_sample.py +++ b/tools/pot/openvino/tools/pot/api/samples/segmentation/segmentation_sample.py @@ -33,8 +33,8 @@ class VOCSegmentationLoader(DataLoader): # Required methods: def __init__(self, config): super().__init__(config) - self._image_size = config.image_size - self._img_ids = self._read_img_ids(config) + self._image_size = self.config.image_size + self._img_ids = self._read_img_ids(self.config) def __getitem__(self, index): """ @@ -49,8 +49,7 @@ class VOCSegmentationLoader(DataLoader): mask_path = os.path.join(self.config.mask_dir, self._img_ids[index] + '.png') image_path = os.path.join(self.config.data_source, self._img_ids[index] + '.jpg') - annotation = (index, self._read_and_preprocess_mask(mask_path)) - return annotation, self._read_and_preprocess_image(image_path) + return self._read_and_preprocess_image(image_path), self._read_and_preprocess_mask(mask_path) def __len__(self): """ Returns size of the dataset """ @@ -93,13 +92,6 @@ class MeanIOU(Metric): self._current_cm = [] self._total_cm = np.zeros((self._classes_num, self._classes_num)) - @property - def value(self): - """ Returns metric value for the last model output. - Possible format: {metric_name: [metric_values_per_image]} - """ - return {self._name: [self._evaluate(cm) for cm in self._current_cm]} - @property def avg_value(self): """ Returns average metric value for all model outputs. diff --git a/tools/pot/openvino/tools/pot/engines/ie_engine.py b/tools/pot/openvino/tools/pot/engines/ie_engine.py index bff5a1e0d4d..d5d80c3055f 100644 --- a/tools/pot/openvino/tools/pot/engines/ie_engine.py +++ b/tools/pot/openvino/tools/pot/engines/ie_engine.py @@ -403,11 +403,15 @@ class IEEngine(Engine): raise RuntimeError('Inconsistent data in the batch. ' 'Some items contain annotation, and some do not.') + if not all([isinstance(item[0], tuple) for item in batch]): + images, image_annotation = [data[0] for data in batch], [(idx, data[1]) for idx, data in enumerate(batch)] + else: + images, image_annotation = [data[1] for data in batch], [data[0] for data in batch] + if all([len(item) == 2 for item in batch]): - image_annotation, images = map(list, zip(*batch)) meta_data = [{}]*len(images) elif all([len(item) == 3 for item in batch]): - image_annotation, images, meta_data = map(list, zip(*batch)) + meta_data = [data[2] for data in batch] else: raise RuntimeError('Inconsistent data in the batch. ' 'Some items contain meta data, and some do not.') From 4a8b142fefcaa190378312cae42dd8864b846f5e Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Mon, 28 Feb 2022 15:00:51 +0100 Subject: [PATCH 124/310] [PYTHON] fix importing lstm_sequence for opsets >= 5 (#10637) * [PYTHON] fix importing lstm_sequence for opsets >= 5 * update compat opsets --- src/bindings/python/src/compatibility/ngraph/opset5/__init__.py | 2 +- src/bindings/python/src/compatibility/ngraph/opset6/__init__.py | 2 +- src/bindings/python/src/compatibility/ngraph/opset7/__init__.py | 2 +- src/bindings/python/src/compatibility/ngraph/opset8/__init__.py | 2 +- src/bindings/python/src/openvino/runtime/opset5/__init__.py | 2 +- src/bindings/python/src/openvino/runtime/opset6/__init__.py | 2 +- src/bindings/python/src/openvino/runtime/opset7/__init__.py | 2 +- src/bindings/python/src/openvino/runtime/opset8/__init__.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/bindings/python/src/compatibility/ngraph/opset5/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset5/__init__.py index 4605a5faa65..5a60a9ff2aa 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset5/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/opset5/__init__.py @@ -74,7 +74,7 @@ from ngraph.opset5.ops import log_softmax from ngraph.opset5.ops import loop from ngraph.opset1.ops import lrn from ngraph.opset4.ops import lstm_cell -from ngraph.opset1.ops import lstm_sequence +from ngraph.opset5.ops import lstm_sequence from ngraph.opset1.ops import matmul from ngraph.opset1.ops import max_pool from ngraph.opset1.ops import maximum diff --git a/src/bindings/python/src/compatibility/ngraph/opset6/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset6/__init__.py index f1e175aa6f6..6a15cdfcaa3 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset6/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/opset6/__init__.py @@ -76,7 +76,7 @@ from ngraph.opset5.ops import log_softmax from ngraph.opset5.ops import loop from ngraph.opset1.ops import lrn from ngraph.opset4.ops import lstm_cell -from ngraph.opset1.ops import lstm_sequence +from ngraph.opset5.ops import lstm_sequence from ngraph.opset1.ops import matmul from ngraph.opset1.ops import max_pool from ngraph.opset1.ops import maximum diff --git a/src/bindings/python/src/compatibility/ngraph/opset7/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset7/__init__.py index 13a39ef7815..ca7be65d95b 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset7/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/opset7/__init__.py @@ -79,7 +79,7 @@ from ngraph.opset5.ops import log_softmax from ngraph.opset5.ops import loop from ngraph.opset1.ops import lrn from ngraph.opset4.ops import lstm_cell -from ngraph.opset1.ops import lstm_sequence +from ngraph.opset5.ops import lstm_sequence from ngraph.opset1.ops import matmul from ngraph.opset1.ops import max_pool from ngraph.opset1.ops import maximum diff --git a/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py b/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py index fc94b1243b4..d6e225e665f 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py +++ b/src/bindings/python/src/compatibility/ngraph/opset8/__init__.py @@ -84,7 +84,7 @@ from ngraph.opset5.ops import log_softmax from ngraph.opset5.ops import loop from ngraph.opset1.ops import lrn from ngraph.opset4.ops import lstm_cell -from ngraph.opset1.ops import lstm_sequence +from ngraph.opset5.ops import lstm_sequence from ngraph.opset1.ops import matmul from ngraph.opset8.ops import matrix_nms from ngraph.opset8.ops import max_pool diff --git a/src/bindings/python/src/openvino/runtime/opset5/__init__.py b/src/bindings/python/src/openvino/runtime/opset5/__init__.py index 74fb610a02b..f18d61e927d 100644 --- a/src/bindings/python/src/openvino/runtime/opset5/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset5/__init__.py @@ -74,7 +74,7 @@ from openvino.runtime.opset5.ops import log_softmax from openvino.runtime.opset5.ops import loop from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence +from openvino.runtime.opset5.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset6/__init__.py b/src/bindings/python/src/openvino/runtime/opset6/__init__.py index 3153669ea09..9721ad311b0 100644 --- a/src/bindings/python/src/openvino/runtime/opset6/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset6/__init__.py @@ -76,7 +76,7 @@ from openvino.runtime.opset5.ops import log_softmax from openvino.runtime.opset5.ops import loop from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence +from openvino.runtime.opset5.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset7/__init__.py b/src/bindings/python/src/openvino/runtime/opset7/__init__.py index 5c794e986ef..ec91b1b0f74 100644 --- a/src/bindings/python/src/openvino/runtime/opset7/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset7/__init__.py @@ -79,7 +79,7 @@ from openvino.runtime.opset5.ops import log_softmax from openvino.runtime.opset5.ops import loop from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence +from openvino.runtime.opset5.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset1.ops import max_pool from openvino.runtime.opset1.ops import maximum diff --git a/src/bindings/python/src/openvino/runtime/opset8/__init__.py b/src/bindings/python/src/openvino/runtime/opset8/__init__.py index cce21756f76..9fe98e1a76a 100644 --- a/src/bindings/python/src/openvino/runtime/opset8/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset8/__init__.py @@ -84,7 +84,7 @@ from openvino.runtime.opset5.ops import log_softmax from openvino.runtime.opset5.ops import loop from openvino.runtime.opset1.ops import lrn from openvino.runtime.opset4.ops import lstm_cell -from openvino.runtime.opset1.ops import lstm_sequence +from openvino.runtime.opset5.ops import lstm_sequence from openvino.runtime.opset1.ops import matmul from openvino.runtime.opset8.ops import matrix_nms from openvino.runtime.opset8.ops import max_pool From b319acc672e4d2ae81caa806ffd4fd08395aed6e Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Mon, 28 Feb 2022 17:01:18 +0300 Subject: [PATCH 125/310] [CPU] Prohibit to load model with dynamic output shapes (#10643) --- src/plugins/intel_cpu/src/plugin.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index cadea8efd78..0e7daa2e707 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -676,6 +676,16 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std auto nGraphFunc = clonedNetwork.getFunction(); TransformationUpToCPUSpecificOpSet(nGraphFunc, enableLPT, enableSnippets, isLegacyAPI()); + // need to check that all outputs have static shapes + // checking that all inputs have static shapes is performed in the common part + if (isLegacyAPI()) { + for (const auto& res : nGraphFunc->get_results()) { + if (res->get_input_partial_shape(0).is_dynamic()) { + IE_THROW() << "CPU plug-in can't load a model with dynamic output shapes via legacy API."; + } + } + } + ApplyPerformanceHints(config, nGraphFunc); ConvertToCPUSpecificOpset(nGraphFunc); From 173f328c53d39dd42ecdb9de9e04f9d2c266683f Mon Sep 17 00:00:00 2001 From: Mikhail Nosov Date: Mon, 28 Feb 2022 17:04:59 +0300 Subject: [PATCH 126/310] Checking compatibility between 'pyopenvino' and 'libopenvino' (#10668) * Checking compatibility between 'pyopenvino' and 'libopenvino' on 'import phase' This fix is to prevent undefined behavior when user loads OpenVINO from python, but pyopenvino loads different version of 'libopenvino' This may happen if user has several releases installed and played around PATH/PYTHONPATH environment variables. In such case, user may have undefined behavior - application may crash in the middle of the usage or use incorrect release. Fix checks build versions for pyopenvino and ov::get_openvino_version. If mismatch occurs, exception is thrown. This logic is disabled if user has built OpenVINO locally, experienced developers probably know what they're doing, so if version has 'custom_' prefix - this logic is disabled * Removed custom logic for CI_BUILD_NUMBER, it is reused from already included version.cmake * Use addVersionDefines macro --- src/bindings/python/src/pyopenvino/CMakeLists.txt | 2 ++ src/bindings/python/src/pyopenvino/pyopenvino.cpp | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/bindings/python/src/pyopenvino/CMakeLists.txt b/src/bindings/python/src/pyopenvino/CMakeLists.txt index 7e645628278..c4c14077828 100644 --- a/src/bindings/python/src/pyopenvino/CMakeLists.txt +++ b/src/bindings/python/src/pyopenvino/CMakeLists.txt @@ -78,6 +78,8 @@ endif() target_include_directories(${PROJECT_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_link_libraries(${PROJECT_NAME} PRIVATE openvino::runtime ${OFFLINE_TRANSFORMATIONS_LIB}) +addVersionDefines(pyopenvino.cpp CI_BUILD_NUMBER) + # perform copy if(OpenVINO_SOURCE_DIR) add_custom_command(TARGET ${PROJECT_NAME} diff --git a/src/bindings/python/src/pyopenvino/pyopenvino.cpp b/src/bindings/python/src/pyopenvino/pyopenvino.cpp index 012a5bf6fc5..53ac0003301 100644 --- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp +++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp @@ -69,6 +69,21 @@ std::string get_version() { PYBIND11_MODULE(pyopenvino, m) { m.doc() = "Package openvino.pyopenvino which wraps openvino C++ APIs"; + std::string pyopenvino_version = CI_BUILD_NUMBER; + std::string runtime_version = get_version(); + bool is_custom_pyopenvino_version = pyopenvino_version.empty() || pyopenvino_version.find("custom_") == 0; + bool is_custom_runtime_version = runtime_version.empty() || runtime_version.find("custom_") == 0; + auto versions_compatible = + is_custom_pyopenvino_version || is_custom_runtime_version || pyopenvino_version == runtime_version; + OPENVINO_ASSERT(versions_compatible, + "OpenVINO Python version (", + pyopenvino_version, + ") mismatches with OpenVINO Runtime library version (", + runtime_version, + "). It can happen if you have 2 or more different versions of OpenVINO installed in system. " + "Please ensure that environment variables (e.g. PATH, PYTHONPATH) are set correctly so that " + "OpenVINO Runtime and Python libraries point to same release."); + m.def("get_version", &get_version); m.def("get_batch", &ov::get_batch); m.def("set_batch", &ov::set_batch); From 4b29eed013e1e86bab079831b5be7f126890daba Mon Sep 17 00:00:00 2001 From: Andrei Kochin Date: Mon, 28 Feb 2022 18:55:44 +0300 Subject: [PATCH 127/310] Update MO requirements to allow TF1.15 if already installed (#10673) * Update MO requirements to allow TF1.15 if already installed * Removing pyhton version check as redundant * Updating requirements.txt as well --- tools/mo/requirements.txt | 3 +-- tools/mo/requirements_tf.txt | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/mo/requirements.txt b/tools/mo/requirements.txt index b672d62af54..7f5243164f5 100644 --- a/tools/mo/requirements.txt +++ b/tools/mo/requirements.txt @@ -1,5 +1,4 @@ -tensorflow~=2.5.0; python_version <= "3.6" -tensorflow~=2.5.3; python_version > "3.6" +tensorflow>=1.15.5,<2.6 mxnet~=1.2.0; sys_platform == 'win32' mxnet~=1.7.0.post2; sys_platform != 'win32' networkx~=2.5; python_version <= "3.6" diff --git a/tools/mo/requirements_tf.txt b/tools/mo/requirements_tf.txt index 6ba68c986cc..86a0c421c4d 100644 --- a/tools/mo/requirements_tf.txt +++ b/tools/mo/requirements_tf.txt @@ -1,5 +1,4 @@ -tensorflow~=2.5.0; python_version <= "3.6" -tensorflow~=2.5.3; python_version > "3.6" +tensorflow>=1.15.5,<2.6 networkx~=2.5; python_version <= "3.6" networkx~=2.6; python_version > "3.6" numpy>=1.16.6,<1.20 From 9da124544a1a790a97d1d9899e8365759560503d Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Tue, 1 Mar 2022 09:03:59 +0300 Subject: [PATCH 128/310] Transformation guide (#10628) * Fixed some comments about transformations * Changed transformation guide * Fixed typo * Moved transformation doc to extensibility * Moved images to Extensibility_UG * Added separate document for each pass * Added see also section * Fixed comments --- docs/Extensibility_UG/Intro.md | 1 + docs/Extensibility_UG/graph_rewrite_pass.md | 28 ++ .../img/graph_rewrite_efficient_search.png | 0 .../img/graph_rewrite_execution.png | 0 .../img/ngraph_insert_node.png | 0 .../img/ngraph_replace_node.png | 0 .../img/register_new_node.png | 0 .../img/transformations_structure.png | 0 docs/Extensibility_UG/matcher_pass.md | 101 +++++ docs/Extensibility_UG/model_pass.md | 17 + docs/Extensibility_UG/ov_transformations.md | 172 ++++++++ docs/IE_PLUGIN_DG/PluginTesting.md | 8 +- docs/OV_Runtime_UG/model_representation.md | 2 +- docs/OV_Runtime_UG/openvino_temporary.md | 1 - docs/OV_Runtime_UG/ov_transformations.md | 407 ------------------ docs/documentation.md | 1 + docs/snippets/example_ngraph_utils.cpp | 116 ----- docs/snippets/ov_model_snippets.cpp | 229 ++++++++++ .../template_model_transformation.cpp | 6 +- .../template_model_transformation.hpp | 8 +- .../template_pattern_transformation.cpp | 13 +- .../template_pattern_transformation.hpp | 2 +- src/core/include/openvino/core/core.hpp | 1 + 23 files changed, 567 insertions(+), 546 deletions(-) create mode 100644 docs/Extensibility_UG/graph_rewrite_pass.md rename docs/{OV_Runtime_UG => Extensibility_UG}/img/graph_rewrite_efficient_search.png (100%) rename docs/{OV_Runtime_UG => Extensibility_UG}/img/graph_rewrite_execution.png (100%) rename docs/{OV_Runtime_UG => Extensibility_UG}/img/ngraph_insert_node.png (100%) rename docs/{OV_Runtime_UG => Extensibility_UG}/img/ngraph_replace_node.png (100%) rename docs/{OV_Runtime_UG => Extensibility_UG}/img/register_new_node.png (100%) rename docs/{OV_Runtime_UG => Extensibility_UG}/img/transformations_structure.png (100%) create mode 100644 docs/Extensibility_UG/matcher_pass.md create mode 100644 docs/Extensibility_UG/model_pass.md create mode 100644 docs/Extensibility_UG/ov_transformations.md delete mode 100644 docs/OV_Runtime_UG/ov_transformations.md create mode 100644 docs/snippets/ov_model_snippets.cpp diff --git a/docs/Extensibility_UG/Intro.md b/docs/Extensibility_UG/Intro.md index a738f3dfd77..67902dd56e6 100644 --- a/docs/Extensibility_UG/Intro.md +++ b/docs/Extensibility_UG/Intro.md @@ -110,5 +110,6 @@ After the build you can use path to your extension library to load your extensio ## See Also +* [OpenVINO Transformations](./ov_transformations.md) * [Using Inference Engine Samples](../OV_Runtime_UG/Samples_Overview.md) * [Hello Shape Infer SSD sample](../../samples/cpp/hello_reshape_ssd/README.md) diff --git a/docs/Extensibility_UG/graph_rewrite_pass.md b/docs/Extensibility_UG/graph_rewrite_pass.md new file mode 100644 index 00000000000..11f178be4e3 --- /dev/null +++ b/docs/Extensibility_UG/graph_rewrite_pass.md @@ -0,0 +1,28 @@ +# OpenVINO Graph Rewrite Pass {#openvino_docs_Extensibility_UG_graph_rewrite_pass} + +`ov::pass::GraphRewrite` serves for running multiple matcher passes on `ov::Model` in a single graph traversal. +Example: + +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:graph_rewrite + +In addition, GraphRewrite handles nodes that were registered by MatcherPasses during their execution. This nodes will be added to the beginning of the sequence with nodes for pattern matching. + +> **NOTE**: when using `ov::pass::Manager` temporary GraphRewrite is used to execute single MatcherPass. + +GraphRewrite has two algorithms for MatcherPasses execution. First algorithm is straightforward. It applies each MatcherPass in registration order to current node. + +![graph_rewrite_execution] + +But it is not really efficient when you have a lot of registered passes. So first of all GraphRewrite checks that all MatcherPass patterns has type-based root node (it means that type of this node is not hidden into predicate). +And then creates map from registered MatcherPasses. That helps to avoid additional cost of applying each MatcherPass for each node. + +![graph_rewrite_efficient_search] + +> **NOTE**: GraphRewrite execution algorithm cannot be set manually and depends only on root nodes registered inside MatcherPasses. + +## See Also + +* [OpenVINO™ Transformations](./ov_transformations.md) + +[graph_rewrite_execution]: ./img/graph_rewrite_execution.png +[graph_rewrite_efficient_search]: ./img/graph_rewrite_efficient_search.png diff --git a/docs/OV_Runtime_UG/img/graph_rewrite_efficient_search.png b/docs/Extensibility_UG/img/graph_rewrite_efficient_search.png similarity index 100% rename from docs/OV_Runtime_UG/img/graph_rewrite_efficient_search.png rename to docs/Extensibility_UG/img/graph_rewrite_efficient_search.png diff --git a/docs/OV_Runtime_UG/img/graph_rewrite_execution.png b/docs/Extensibility_UG/img/graph_rewrite_execution.png similarity index 100% rename from docs/OV_Runtime_UG/img/graph_rewrite_execution.png rename to docs/Extensibility_UG/img/graph_rewrite_execution.png diff --git a/docs/OV_Runtime_UG/img/ngraph_insert_node.png b/docs/Extensibility_UG/img/ngraph_insert_node.png similarity index 100% rename from docs/OV_Runtime_UG/img/ngraph_insert_node.png rename to docs/Extensibility_UG/img/ngraph_insert_node.png diff --git a/docs/OV_Runtime_UG/img/ngraph_replace_node.png b/docs/Extensibility_UG/img/ngraph_replace_node.png similarity index 100% rename from docs/OV_Runtime_UG/img/ngraph_replace_node.png rename to docs/Extensibility_UG/img/ngraph_replace_node.png diff --git a/docs/OV_Runtime_UG/img/register_new_node.png b/docs/Extensibility_UG/img/register_new_node.png similarity index 100% rename from docs/OV_Runtime_UG/img/register_new_node.png rename to docs/Extensibility_UG/img/register_new_node.png diff --git a/docs/OV_Runtime_UG/img/transformations_structure.png b/docs/Extensibility_UG/img/transformations_structure.png similarity index 100% rename from docs/OV_Runtime_UG/img/transformations_structure.png rename to docs/Extensibility_UG/img/transformations_structure.png diff --git a/docs/Extensibility_UG/matcher_pass.md b/docs/Extensibility_UG/matcher_pass.md new file mode 100644 index 00000000000..f85d0ecaefe --- /dev/null +++ b/docs/Extensibility_UG/matcher_pass.md @@ -0,0 +1,101 @@ +# OpenVINO Matcher Pass {#openvino_docs_Extensibility_UG_matcher_pass} + +`ov::pass::MatcherPass` is used for pattern-based transformations. + +Template for MatcherPass transformation class +@snippet src/transformations/template_pattern_transformation.hpp graph_rewrite:template_transformation_hpp + +@snippet src/transformations/template_pattern_transformation.cpp graph_rewrite:template_transformation_cpp + +To use `ov::pass::MatcherPass`, you need to complete these steps: +1. Create a pattern +2. Implement a callback +3. Register the pattern and Matcher +4. Execute MatcherPass + +So let's go through each of these steps. + +## Create a pattern + +Pattern is a single root `ov::Model`. But the only difference is that you do not need to create a model object, you just need to create and connect opset or special pattern operations. +Then you need to take the last created operation and put it as a root of the pattern. This root node will be used as a root node in pattern matching. +> **NOTE**: Any nodes in a pattern that have no consumers and are not registered as root will not be used in pattern matching. + +@snippet ov_model_snippets.cpp pattern:simple_example + +The `Parameter` operation in the example above has type and shape specified. These attributes are needed only to create Parameter operation class and will not be used in pattern matching. + +For more pattern examples, refer to the [pattern matching](#pattern_matching) section. + +## Implement callback + +Callback is an action applied to every pattern entrance. In general, callback is the lambda function that takes Matcher object with detected subgraph. + +@snippet ov_model_snippets.cpp pattern:callback_example + +The example above shows the callback structure and how Matcher can be used for accessing nodes detected by pattern. +Callback return value is `true` if root node was replaced and another pattern cannot be applied to the same root node; otherwise, it is `false`. +> **NOTE**: It is not recommended to manipulate with nodes that are under root node. This may affect GraphRewrite execution as it is expected that all nodes that come after root node in topological order are valid and can be used in pattern matching. + +MatcherPass also provides functionality that allows reporting of the newly created nodes that can be used in additional pattern matching. +If MatcherPass was registered in `ov::pass::Manager` or `ov::pass::GraphRewrite`, these registered nodes will be added for additional pattern matching. +That means that matcher passes registered in `ov::pass::GraphRewrite` will be applied to these nodes. + +The example below shows how single MatcherPass can fuse sequence of operations using the `register_new_node` method. + +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:relu_fusion + +> **NOTE**: If you register multiple nodes, please add them in topological order. We do not topologically sort these nodes as it is a time-consuming operation. + +## Register pattern and Matcher + +The last step is to register Matcher and callback inside the MatcherPass pass. To do this, call the `register_matcher` method. +> **NOTE**: Only one matcher can be registered for a single MatcherPass class. + +```cpp +// Register matcher and callback +register_matcher(m, callback); +``` +## Execute MatcherPass + +MatcherPass has multiple ways to be executed: +* Run on a single node - it can be useful if you want to run MatcherPass inside another transformation. +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:run_on_node +* Run on `ov::Model` using GraphRewrite - this approach gives ability to run MatcherPass on whole `ov::Model`. Moreover, multiple MatcherPass transformation can be registered in a single GraphRewite to be executed in a single graph traversal. +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:graph_rewrite +* Run on `ov::Model` using `ov::pass::Manager` - this approach helps you to register MatcherPass for execution on `ov::Model` as another transformation types. +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager + +## Pattern Matching + +Sometimes patterns cannot be expressed via regular operations or it is too complicated. +For example, if you want to detect **Convolution->Add** sub-graph without specifying particular input type for Convolution operation or you want to create a pattern where some of operations can have different types. +And for these cases OpenVINO™ provides additional helpers to construct patterns for GraphRewrite transformations. + +There are two main helpers: +1. `ov::pass::pattern::any_input` - helps to express inputs if their types are undefined. +2. `ov::pass::pattern::wrap_type` - helps to express nodes of pattern without specifying node attributes. + +Let's go through the example to have better understanding of how it works: + +> **NOTE**: Node attributes do not participate in pattern matching and are needed only for operations creation. Only operation types participate in pattern matching. + +The example below shows basic usage of `ov::passpattern::any_input`. +Here we construct Multiply pattern with arbitrary first input and Constant as a second input. +Also as Multiply is commutative operation, it does not matter in which order we set inputs (any_input/Constant or Constant/any_input) because both cases will be matched. + +@snippet ov_model_snippets.cpp pattern:label_example + +This example shows how we can construct a pattern when operation has arbitrary number of inputs. + +@snippet ov_model_snippets.cpp pattern:concat_example + +This example shows how to use predicate to construct a pattern. Also it shows how to match pattern manually on given node. + +@snippet ov_model_snippets.cpp pattern:predicate_example + +> **NOTE**: Be careful with manual matching because Matcher object holds matched nodes. To clear a match, use the m->clear_state() method. + +## See Also + +* [OpenVINO™ Transformations](./ov_transformations.md) diff --git a/docs/Extensibility_UG/model_pass.md b/docs/Extensibility_UG/model_pass.md new file mode 100644 index 00000000000..d2add64d3a5 --- /dev/null +++ b/docs/Extensibility_UG/model_pass.md @@ -0,0 +1,17 @@ +# OpenVINO Model Pass {#openvino_docs_Extensibility_UG_model_pass} + +`ov::pass::ModelPass` is used for transformations that take entire `ov::Model` as an input and process it. + +Template for ModelPass transformation class + +@snippet src/transformations/template_model_transformation.hpp model_pass:template_transformation_hpp + +@snippet src/transformations/template_model_transformation.cpp model_pass:template_transformation_cpp + +Using `ov::pass::ModelPass`, you need to override the `run_on_model` method where you will write the transformation code. +Return value is `true` if the original model has changed during transformation (new operation was added, or operations replacement was made, or node attributes were changed); otherwise, it is `false`. +Also `ov::pass::ModelPass` based transformations can be executed via `ov::pass::Manager`. + +## See Also + +* [OpenVINO™ Transformations](./ov_transformations.md) diff --git a/docs/Extensibility_UG/ov_transformations.md b/docs/Extensibility_UG/ov_transformations.md new file mode 100644 index 00000000000..3a39866eb95 --- /dev/null +++ b/docs/Extensibility_UG/ov_transformations.md @@ -0,0 +1,172 @@ +# Overview of Transformations API {#openvino_docs_transformations} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_Extensibility_UG_model_pass + openvino_docs_Extensibility_UG_matcher_pass + openvino_docs_Extensibility_UG_graph_rewrite_pass + +@endsphinxdirective + +This guide contains all necessary information that you need to start implementing OpenVINO™ transformations. + +## Working with Model + +Before the moving to transformation part it is needed to say several words about functions which allow to modify `ov::Model`. +This chapter extends the [model representation guide](../OV_Runtime_UG/model_representation.md) and shows an API that allows us to manipulate with `ov::Model`. + +### Working with node input and output ports + +First of all let's talk about `ov::Node` input/output ports. Each OpenVINO™ operation has input and output ports except cases when operation has `Parameter` or `Constant` type. + +Every port belongs to its node, so using a port we can access parent node, get shape and type for particular input/output, get all consumers in case of output port, and get producer node in case of input port. +With output port we can set inputs for newly created operations. + +Lets look at the code example. + +@snippet ov_model_snippets.cpp ov:ports_example + +### Node replacement + +OpenVINO™ provides two ways for node replacement: via OpenVINO™ helper function and directly via port methods. We are going to review both of them. + +Let's start with OpenVINO™ helper functions. The most popular function is `ov::replace_node(old_node, new_node)`. + +We will review real replacement case where Negative operation is replaced with Multiply. + +![ngraph_replace_node] + +@snippet ov_model_snippets.cpp ov:replace_node + +`ov::replace_node` has a constraint that number of output ports for both of ops must be the same; otherwise, it raises an exception. + + +The alternative way to do the same replacement is the following: + +@snippet ov_model_snippets.cpp ov:manual_replace + +Another transformation example is insertion. + +![ngraph_insert_node] + +@snippet ov_model_snippets.cpp ov:insert_node + +The alternative way to the insert operation is to make a node copy and use `ov::replace_node()`: + +@snippet ov_model_snippets.cpp ov:insert_node_with_copy + +### Node elimination + +Another type of node replacement is its elimination. + +To eliminate operation, OpenVINO™ has special method that considers all limitations related to OpenVINO™ Runtime. + +@snippet ov_model_snippets.cpp ov:eliminate_node + +`ov::replace_output_update_name()` in case of successful replacement it automatically preserves friendly name and runtime info. + +## Transformations types + +OpenVINO™ Runtime has three main transformation types: + +* [Model pass](./model_pass.md) - straightforward way to work with `ov::Model` directly +* [Matcher pass](./matcher_pass.md) - pattern-based transformation approach +* [Graph rewrite pass](./graph_rewrite_pass.md) - container for matcher passes needed for efficient execution + +![transformations_structure] + +## Transformation conditional compilation + +Transformation library has two internal macros to support conditional compilation feature. + +* `MATCHER_SCOPE(region)` - allows to disable the MatcherPass if matcher isn't used. The region name should be unique. This macro creates a local variable `matcher_name` which you should use as a matcher name. +* `RUN_ON_MODEL_SCOPE(region)` - allows to disable run_on_model pass if it isn't used. The region name should be unique. + +## Transformation writing essentials + +When developing a transformation, you need to follow these transformation rules: + +###1. Friendly Names + +Each `ov::Node` has an unique name and a friendly name. In transformations we care only about friendly name because it represents the name from the model. +To avoid losing friendly name when replacing node with other node or subgraph, set the original friendly name to the latest node in replacing subgraph. See the example below. + +@snippet ov_model_snippets.cpp ov:replace_friendly_name + +In more advanced cases, when replaced operation has several outputs and we add additional consumers to its outputs, we make a decision how to set friendly name by arrangement. + +###2. Runtime Info + +Runtime info is a map `std::map` located inside `ov::Node` class. It represents additional attributes in `ov::Node`. +These attributes can be set by users or by plugins and when executing transformation that changes `ov::Model` we need to preserve these attributes as they will not be automatically propagated. +In most cases, transformations have the following types: 1:1 (replace node with another node), 1:N (replace node with a sub-graph), N:1 (fuse sub-graph into a single node), N:M (any other transformation). +Currently, there is no mechanism that automatically detects transformation types, so we need to propagate this runtime information manually. See the examples below. + +@snippet ov_model_snippets.cpp ov:copy_runtime_info + +When transformation has multiple fusions or decompositions, `ov::copy_runtime_info` must be called multiple times for each case. + +**Note**: copy_runtime_info removes rt_info from destination nodes. If you want to keep it, you need to specify them in source nodes like this: copy_runtime_info({a, b, c}, {a, b}) + +###3. Constant Folding + +If your transformation inserts constant sub-graphs that need to be folded, do not forget to use `ov::pass::ConstantFolding()` after your transformation or call constant folding directly for operation. +The example below shows how constant subgraph can be constructed. + +@snippet ov_model_snippets.cpp ov:constant_subgraph + +Manual constant folding is more preferable than `ov::pass::ConstantFolding()` because it is much faster. + +Below you can find an example of manual constant folding: + +@snippet src/transformations/template_pattern_transformation.cpp manual_constant_folding + +## Common mistakes in transformations + +In transformation development process: + +* Do not use deprecated OpenVINO™ API. Deprecated methods has the `OPENVINO_DEPRECATED` macros in its definition. +* Do not pass `shared_ptr` as an input for other node if type of node is unknown or it has multiple outputs. Use explicit output port. +* If you replace node with another node that produces different shape, remember that new shape will not be propagated until the first `validate_nodes_and_infer_types` call for `ov::Model`. If you are using `ov::pass::Manager`, it will automatically call this method after each transformation execution. +* Do not forget to call the `ov::pass::ConstantFolding` pass if your transformation creates constant subgraphs. +* Use latest OpSet if you are not developing downgrade transformation pass. +* When developing a callback for `ov::pass::MatcherPass`, do not change nodes that come after the root node in topological order. + +## Using pass manager + +`ov::pass::Manager` is a container class that can store the list of transformations and execute them. The main idea of this class is to have high-level representation for grouped list of transformations. +It can register and apply any [transformation pass](#transformations_types) on model. +In addition, `ov::pass::Manager` has extended debug capabilities (find more information in the [how to debug transformations](#how_to_debug_transformations) section). + +The example below shows basic usage of `ov::pass::Manager` + +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager3 + +Another example shows how multiple matcher passes can be united into single GraphRewrite. + +@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager2 + +## How to debug transformations + +If you are using `ngraph::pass::Manager` to run sequence of transformations, you can get additional debug capabilities by using the following environment variables: + +``` +OV_PROFILE_PASS_ENABLE=1 - enables performance measurement for each transformation and prints execution status +OV_ENABLE_VISUALIZE_TRACING=1 - enables visualization after each transformation. By default, it saves dot and svg files. +``` + +> **Note**: Make sure that you have dot installed on your machine; otherwise, it will silently save only dot file without svg file. + +## See Also + +* [OpenVINO™ Model Representation](../OV_Runtime_UG/model_representation.md) +* [OpenVINO™ Extensions](./Intro.md) + +[ngraph_replace_node]: ./img/ngraph_replace_node.png +[ngraph_insert_node]: ./img/ngraph_insert_node.png +[transformations_structure]: ./img/transformations_structure.png +[register_new_node]: ./img/register_new_node.png diff --git a/docs/IE_PLUGIN_DG/PluginTesting.md b/docs/IE_PLUGIN_DG/PluginTesting.md index 9ed3fa8911f..f985ad57771 100644 --- a/docs/IE_PLUGIN_DG/PluginTesting.md +++ b/docs/IE_PLUGIN_DG/PluginTesting.md @@ -41,18 +41,14 @@ To use these tests for your own plugin development, link the `IE::funcSharedTest To build test binaries together with other build artifacts, use the `make all` command. For details, see [Build Plugin Using CMake*](@ref openvino_docs_ie_plugin_dg_plugin_build). -### Tests for plugin-specific OpenVINO™ transformations - -Please, refer to [Transformation testing](@ref openvino_docs_transformations) guide. - ### How to Extend Inference Engine Plugin Tests Inference Engine Plugin tests are open for contribution. Add common test case definitions applicable for all plugins to the `IE::funcSharedTests` target within the DLDT repository. Then, any other plugin supporting corresponding functionality can instantiate the new test. -All Inference Engine per-layer tests check test layers functionality. They are developed using ov::Models +All Inference Engine per-layer tests check test layers functionality. They are developed using ov::Model. as input graphs used by tests. In this case, to test a new layer with layer tests, extend -the `IE::ngraphFunctions` library, which is also included in the Inference Engine Developer package, with a new OpenVINO™ Model +the `IE::ngraphFunctions` library, which is also included in the Inference Engine Developer package, with a new model. including the corresponding operation. > **NOTE**: When implementing a new subgraph test, add new single-layer tests for each operation of the subgraph if such test does not exist. diff --git a/docs/OV_Runtime_UG/model_representation.md b/docs/OV_Runtime_UG/model_representation.md index 9f24bc0d9c3..f3edf5e2f24 100644 --- a/docs/OV_Runtime_UG/model_representation.md +++ b/docs/OV_Runtime_UG/model_representation.md @@ -83,7 +83,7 @@ The following code creates a model with several outputs: @snippet example_ngraph_utils.cpp ov:serialize ### How can I develop my own transformation pass? - See the [Transformations Developer Guide](./ov_transformations.md). + See the [Transformations Developer Guide](./../Extensibility_UG/ov_transformations.md). ## See Also diff --git a/docs/OV_Runtime_UG/openvino_temporary.md b/docs/OV_Runtime_UG/openvino_temporary.md index 073ca36706a..aa2c6adb66f 100644 --- a/docs/OV_Runtime_UG/openvino_temporary.md +++ b/docs/OV_Runtime_UG/openvino_temporary.md @@ -12,7 +12,6 @@ openvino_docs_IE_DG_Model_caching_overview openvino_docs_IE_DG_Int8Inference openvino_docs_IE_DG_Bfloat16Inference - openvino_docs_transformations @endsphinxdirective diff --git a/docs/OV_Runtime_UG/ov_transformations.md b/docs/OV_Runtime_UG/ov_transformations.md deleted file mode 100644 index 14a65c34d4c..00000000000 --- a/docs/OV_Runtime_UG/ov_transformations.md +++ /dev/null @@ -1,407 +0,0 @@ -# Overview of Transformations API {#openvino_docs_transformations} - -This guide contains all necessary information that you need to start implementing OpenVINO™ transformations. - -## Transformations types - -OpenVINO™ Runtime has three main transformation types: - -* `ov::pass::ModelPass` - straightforward way to work with `ov::Model` directly -* `ov::pass::MatcherPass` - pattern-based transformation approach -* `ov::pass::GraphRewrite` - container for matcher passes needed for efficient execution - -![transformations_structure] - -### ov::pass::ModelPass - -`ov::pass::ModelPass` is used for transformations that take entire `ov::Model` as an input and process it. - -Template for FunctionPass transformation class - -@snippet src/transformations/template_model_transformation.hpp model_pass:template_transformation_hpp - -@snippet src/transformations/template_model_transformation.cpp model_pass:template_transformation_cpp - -Using `ov::pass::ModelPass`, you need to override the `run_on_model` method where you will write the transformation code. -Return value is `true` if the original function has changed during transformation (new operation was added, or operations replacement was made, or node attributes were changed); otherwise, it is `false`. -For transformation API, please follow the [working with ov::Model](#working_with_ov_model) section. -Also `ov::pass::ModelPass` based transformations can be executed via `ov::pass::Manager`. See the examples in the [Using pass manager](#using_pass_manager) section. - -### ov::pass::MatcherPass - -`ov::pass::MatcherPass` is used for pattern-based transformations. - -Template for MatcherPass transformation class -@snippet src/transformations/template_pattern_transformation.hpp graph_rewrite:template_transformation_hpp - -@snippet src/transformations/template_pattern_transformation.cpp graph_rewrite:template_transformation_cpp - -To use `ov::pass::MatcherPass`, you need to complete these steps: -1. Create a pattern -2. Implement a callback -3. Register the pattern and Matcher -4. Execute MatcherPass - -So let's go through each of these steps. - -### Create a pattern -Pattern is a single root `ov::Model`. But the only difference is that you do not need to create a function object, you just need to create and connect opset or special pattern operations. -Then you need to take the last created operation and put it as a root of the pattern. This root node will be used as a root node in pattern matching. -> **NOTE**: Any nodes in a pattern that have no consumers and are not registered as root will not be used in pattern matching. - -@snippet example_ngraph_utils.cpp pattern:simple_example - -The `Parameter` operation in the example above has type and shape specified. These attributes are needed only to create Parameter operation class and will not be used in pattern matching. - -For more pattern examples, refer to the [pattern matching](#pattern_matching) section. - -### Implement callback -Callback is an action applied to every pattern entrance. In general, callback is the lambda function that takes Matcher object with detected subgraph. - -@snippet example_ngraph_utils.cpp pattern:callback_example - -The example above shows the callback structure and how Matcher can be used for accessing nodes detected by pattern. -Callback return value is `true` if root node was replaced and another pattern cannot be applied to the same root node; otherwise, it is `false`. -> **NOTE**: It is not recommended to manipulate with nodes that are under root node. This may affect GraphRewrite execution as it is expected that all nodes that come after root node in topological order are valid and can be used in pattern matching. - -MatcherPass also provides functionality that allows reporting of the newly created nodes that can be used in additional pattern matching. -If MatcherPass was registered in `pass::Manager` or `pass::GraphRewrite`, these registered nodes will be added for additional pattern matching. -That means that matcher passes registered in `pass::GraphRewrite` will be applied to these nodes. - -The example below shows how single MatcherPass can fuse sequence of operations using the `register_new_node` method. - -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:relu_fusion - -> **NOTE**: If you register multiple nodes, please add them in topological order. We do not topologically sort these nodes as it is a time-consuming operation. - -### Register pattern and Matcher -The last step is to register Matcher and callback inside the MatcherPass pass. To do this, call the `register_matcher` method. -> **NOTE**: Only one matcher can be registered for a single MatcherPass class. - -```cpp -// Register matcher and callback -register_matcher(m, callback); -``` -### Execute MatcherPass -MatcherPass has multiple ways to be executed: -* Run on a single node - it can be useful if you want to run MatcherPass inside another transformation. -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:run_on_node -* Run on `ngraph::Function` using GraphRewrite - this approach gives ability to run MatcherPass on whole `ngraph::Function`. Moreover, multiple MatcherPass transformation can be registered in a single GraphRewite to be executed in a single graph traversal. -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:graph_rewrite -* Run on `ngraph::Function` using `pass::Manager` - this approach helps you to register MatcherPass for execution on `ngraph::Function` as another transformation types. -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager - - -### ngraph::pass::GraphRewrite - -GraphRewrite pass serves for running multiple matcher passes on `ngraph::Function` in a single graph traversal. -Example: - -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:graph_rewrite - -In addition, GraphRewrite handles nodes that were registered by MatcherPasses during their execution. This nodes will be added to the beginning of the sequence with nodes for pattern matching. - -> **NOTE**: when using `pass::Manager` temporary GraphRewrite is used to execute single MatcherPass. - -GraphRewrite has two algorithms for MatcherPasses execution. First algorithm is straightforward. It applies each MatcherPass in registration order to current node. - -![graph_rewrite_execution] - -But it is not really efficient when you have a lot of registered passes. So first of all GraphRewrite checks that all MatcherPass patterns has type-based root node (it means that type of this node is not hidden into predicate). -And then creates map from registered MatcherPasses. That helps to avoid additional cost of applying each MatcherPass for each node. - -![graph_rewrite_efficient_search] - -> **NOTE**: GraphRewrite execution algorithm cannot be set manually and depends only on root nodes registered inside MatcherPasses. - -## Pattern Matching - -Sometimes patterns cannot be expressed via regular nGraph operations or it is too complicated. -For example, if you want to detect Convolution->Add sub-graph without specifying particular input type for Convolution operation or you want to create a pattern where some of operations can have different types. -And for these cases nGraph provides additional helpers to construct patterns for GraphRewrite transformations. - -There are two main helpers: -1. `ngraph::pattern::any_input` - helps to express inputs if their types are undefined. -2. `ngraph::pattern::wrap_type` - helps to express nodes of pattern without specifying node attributes. - -Let's go through the example to have better understanding of how it works: - -> **NOTE**: Node attributes do not participate in pattern matching and are needed only for operations creation. Only operation types participate in pattern matching. - -The example below shows basic usage of `pattern::any_input`. -Here we construct Multiply pattern with arbitrary first input and Constant as a second input. -Also as Multiply is commutative operation, it does not matter in which order we set inputs (any_input/Constant or Constant/any_input) because both cases will be matched. - -@snippet example_ngraph_utils.cpp pattern:label_example - -This example shows how we can construct a pattern when operation has arbitrary number of inputs. - -@snippet example_ngraph_utils.cpp pattern:concat_example - -This example shows how to use predicate to construct a pattern. Also it shows how to match pattern manually on given node. - -@snippet example_ngraph_utils.cpp pattern:predicate_example - -> **NOTE**: Be careful with manual matching because Matcher object holds matched nodes. To clear a match, use the m->clear_state() method. - -## Working with ngraph::Function - -In this chapter we will review nGraph API that allows us to manipulate with `ngraph::Function`. - -### ngraph::Node input and output ports - -First of all let's talk about `ngraph::Node` input/output ports. Each nGraph operation has input and output ports except cases when operation has `Result`, `Parameter`, or `Constant` type. - -Every port belongs to its node, so using a port we can access parent node, get shape and type for particular input/output, get all consumers in case of output port, and get producer node in case of input port. -With output port we can set inputs for newly created operations. - -Lets look at the code example. - -@snippet example_ngraph_utils.cpp ngraph:ports_example - -You may notice that we usually construct operations in this way: -```cpp -std::shared_ptr neg_const = opset1::Constant::create(sub->get_input_element_type(1), Shape{1}, {-1})); -Output data = node->input_value(0); -auto neg = std::make_shared(data, neg_const); -``` -In this example, the `opset3::Multiply` operation takes `Output` and `std::shared_ptr` as inputs. But the constructor takes both as `Output`. -In this case, `std::shared_ptr` will be automatically converted to `Output` if node has exactly one output port; otherwise, conversion raises an exception. - -### ngraph::Node replacement - -nGraph provides two ways for node replacement: via nGraph helper function and directly via port methods. We are going to review both of them. - -Let's start with nGraph helper functions. The most popular function is `ngraph::replace_node(old_node, new_node)`. - -We will review real replacement case where Negative operation is replaced with Multiply. - -![ngraph_replace_node] - -@snippet example_ngraph_utils.cpp ngraph:replace_node - -`ngraph::replace_node` has a constraint that number of output ports for both of ops must be the same; otherwise, it raises an exception. - - -The alternative way to do the same replacement is the following: -```cpp -// All neg->output(0) consumers will be moved to mul->output(0) port -neg->output(0).replace(mul->output(0)); -``` - -Another transformation example is insertion. - -![ngraph_insert_node] - -@snippet example_ngraph_utils.cpp ngraph:insert_node - -The alternative way to the insert operation is to make a node copy and use `replace_node`: - -@snippet example_ngraph_utils.cpp ngraph:insert_node_with_copy - -### ngraph::Node elimination - -Another type of node replacement is its elimination. - -To eliminate operation, nGraph has special method that considers all limitations related to InferenceEngine. - -@snippet example_ngraph_utils.cpp ngraph:eliminate_node - -`replace_output_update_name` in case of successful replacement it automatically preserves friendly name and runtime info. - - -## Transformation conditional compilation - -Transformation library has two internal macros to support conditional compilation feature. - -* `MATCHER_SCOPE(region)` - allows to disable the MatcherPass if matcher isn't used. The region name should be unique. This macro creates a local variable `matcher_name` which you should use as a matcher name. -* `RUN_ON_FUNCTION_SCOPE(region)` - allows to disable run_on_function pass if it isn't used. The region name should be unique. - -## Transformation writing essentials - -When developing a transformation, you need to follow these transformation rules: - -###1. Operation Set (OpSet) - -Use the latest version of OpSet in your transformation. An exception is op_conversion transformations, where different opsets can be used. - -@snippet example_ngraph_utils.cpp ov:include - -###2. Dynamic Shape and Rank - -nGraph has two types for shape representation: -`ngraph::Shape` - represents static shape. -`ngraph::PartialShape` - represents dynamic shape. It means that rank or some of dimensions are dynamic (undefined). -`ngraph::PartialShape` can be converted to `ngraph::Shape` using the `get_shape()` method if all dimensions are static; otherwise, conversion raises an exception. - -@snippet example_ngraph_utils.cpp ngraph:shape - -But in most cases before getting static shape using `get_shape()` method, you need to check that shape is static. - -Also if your transformation requires only input shape rank or particular dimension value, please do not use the `get_shape()` method. See the example below demonstrating how to avoid using `get_shape()` - -@snippet example_ngraph_utils.cpp ngraph:shape_check - -Not using `get_shape()` method makes your transformation more flexible and applicable for more cases. - -###3. Friendly Names - -Each `ngraph::Node` has a unique name (used for nGraph internals) and a friendly name. In transformations we care only about friendly name because it represents the name from intermediate representation (IR). -Also friendly name is used as output tensor name (until we do not have other way to represent output tensor name) and user code that requests intermediate outputs based on these names. -To avoid losing friendly name when replacing node with other node or subgraph, set the original friendly name to the latest node in replacing subgraph. See the example below. - -```cpp -// Replace Div operation with Power and Multiply sub-graph and set original friendly name to Multiply operation -auto pow = std::make_shared(div->input(1).get_source_output(), - op::Constant::create(div->get_input_element_type(1), Shape{1}, {-1})); -auto mul = std::make_shared(div->input(0).get_source_output(), pow); -mul->set_friendly_name(div->get_friendly_name()); -ngraph::replace_node(div, mul); -``` - -In more advanced cases, when replaced operation has several outputs and we add additional consumers to its outputs, we make a decision how to set friendly name by arrangement. - -###4. Runtime Info - -Runtime info is a map `std::map` located inside `ngraph::Node` class. It represents additional attributes in `ngraph::Node`. -These attributes can be set by users or by plugins and when executing transformation that changes `ngraph::Function` we need to preserve these attributes as they will not be automatically propagated. -In most cases, transformations have the following types: 1:1 (replace node with another node), 1:N (replace node with a sub-graph), N:1 (fuse sub-graph into a single node), N:M (any other transformation). -Currently, there is no mechanism that automatically detects transformation types, so we need to propagate this runtime information manually. See the examples below. - -```cpp -// Replace Transpose with Reshape operation (1:1) -ngraph::copy_runtime_info(transpose, reshape); -``` - -```cpp -// Replace Div operation with Power and Multiply sub-graph (1:N) -ngraph::copy_runtime_info(div, {pow, mul}); -``` - -```cpp -// Fuse Convolution with Add operation (N:1) -ngraph::copy_runtime_info({conv, bias}, {conv_ie}); -``` - -```cpp -// Any other transformation that replaces one sub-graph with another sub-graph (N:M) -ngraph::copy_runtime_info({a, b, c}, {e, f}); -``` - -When transformation has multiple fusions or decompositions, `ngraph::copy_runtime_info` must be called multiple times for each case. - -> **Note**: copy_runtime_info removes rt_info from destination nodes. If you want to keep it, you need to specify them in source nodes like this: copy_runtime_info({a, b, c}, {a, b}) - -###5. Constant Folding - -If your transformation inserts constant sub-graphs that need to be folded, do not forget to use `ngraph::pass::ConstantFolding()` after your transformation or call constant folding directly for operation. -The example below shows how constant subgraph can be constructed. - -```cpp -// After ConstantFolding pass Power will be replaced with Constant -auto pow = std::make_shared( - opset3::Constant::create(element::f32, Shape{1}, {2}) - opset3::Constant::create(element::f32, Shape{1}, {3})); -auto mul = std::make_shared(input /* not constant input */, pow); -``` - -Manual constant folding is more preferable than `ngraph::pass::ConstantFolding()` because it is much faster. - -Below you can find an example of manual constant folding: - -@snippet src/transformations/template_pattern_transformation.cpp manual_constant_folding - -## Common mistakes in transformations - -In transformation development process: - -* Do not use deprecated nGraph API. Deprecated methods has the `NGRAPH_DEPRECATED` macros in its definition. -* Do not pass `shared_ptr` as an input for other node if type of node is unknown or it has multiple outputs. Use explicit output port. -* If you replace node with another node that produces different shape, remember that new shape will not be propagated until the first `validate_nodes_and_infer_types` call for `ngraph::Function`. If you are using `pass::Manager`, it will automatically call this method after each transformation execution. -* Do not forget to call the `ngraph::ConstantFolding` pass if your transformation creates constant subgraphs. -* Use latest OpSet if you are not developing downgrade transformation pass. -* When developing a callback for `ngraph::pass::MatcherPass`, do not change nodes that come after the root node in topological order. - -## Using pass manager - -`ngraph::pass::Manager` is a container class that can store the list of transformations and execute them. The main idea of this class is to have high-level representation for grouped list of transformations. -It can register and apply any [transformation types](#transformations_types) on function. -In addition, `ngraph::pass::Manager` has extended debug capabilities (find more information in the [how to debug transformations](#how_to_debug_transformations) section). - -The example below shows basic usage of `ngraph::pass::Manager` - -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager3 - -Another example shows how multiple matcher passes can be united into single GraphRewrite. - -@snippet src/transformations/template_pattern_transformation.cpp matcher_pass:manager2 - -> **NOTE**: nGraph used to have the `pass::PassConfig` class for transformation pipeline manipulation. -This mechanism is now obsolete and the `pass::PassConfig` class will be removed in future release. - -## How to debug transformations - -The most popular tool for transformations debugging is the `ngraph::pass::VisualizeTree` transformation, which visualizes ngraph::Function. - -Usage example: - -@snippet example_ngraph_utils.cpp ov:visualize - -`ngraph::pass::VisualizeTree` can be parametrized via environment variables: - -``` -OV_VISUALIZE_TREE_OUTPUT_SHAPES=1 - visualize shapes -OV_VISUALIZE_TREE_OUTPUT_TYPES=1 - visualize types -OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1 - pretty denormal values -OV_VISUALIZE_TREE_RUNTIME_INFO=1 - print runtime information -OV_VISUALIZE_TREE_IO=1 - print I/O ports -OV_VISUALIZE_TREE_MEMBERS_NAME=1 - print member names -``` - -> **Note**: current VisualTree does not have user-friendly interface and it will be changed in the nearest future. The intention is to move visualization abilities inside transformations. - -If you are using `ngraph::pass::Manager` to run sequence of transformations, you can get additional debug capabilities by using the following environment variables: - -``` -OV_PROFILE_PASS_ENABLE=1 - enables performance measurement for each transformation and prints execution status -OV_ENABLE_VISUALIZE_TRACING=1 - enables visualization after each transformation. By default, it saves dot and svg files. -``` - -> **Note**: Make sure that you have dot installed on your machine; otherwise, it will silently save only dot file without svg file. - -## Disabling/Enabling specific transformations for plugin X - -In transformation library, we provide plugins transformations like CommonOptimizations, which contains predefined sequence of transformations. -We also provide a tool that helps to disable or partially disable particular transformations in a transformation pipeline. -For example, if a plugin uses the CommonOptimization transformation and needs to disable the ConvertGELU transformation, then inside the plugin we have to take the PassConfig instance -from pass::Manger and call disable method. - -@snippet example_ngraph_utils.cpp ngraph:disable_gelu - -In some cases, we need to disable transformation for some condition: - -@snippet example_ngraph_utils.cpp ngraph:disable_callback - -In some cases, pass::Manager pipelines inside transformations may have transformations disabled by default but enabled inside plugins. - -@snippet example_ngraph_utils.cpp ngraph:disabled_by_default - -PassConfig instance taken from pass::Manager is shared across all registered transformations including nested transformations. So it does not matter where we work with this object (before passes registration or after). - -## Transformations testing - -If you are developing new transformation inside plugin, you need to add test into the `template_plugin/tests/functional/transformations` folder. -We have two types of tests: nGraph reader tests located in `src/tests/functional/inference_engine/ngraph_reader` and transformation tests located in `src/tests/functional/inference_engine/transformations` -Reader tests are IR based and test end-to-end conversion from IR to CNNNetwork. Transformation tests test single ngraph transformations or low-level functions that are used inside transformations. - -The basic transformation test looks like this: - -@snippet tests/functional/transformations/template_transformations_test.cpp transformation:test - - -[ngraph_replace_node]: ./img/ngraph_replace_node.png -[ngraph_insert_node]: ./img/ngraph_insert_node.png -[transformations_structure]: ./img/transformations_structure.png -[register_new_node]: ./img/register_new_node.png -[graph_rewrite_execution]: ./img/graph_rewrite_execution.png -[graph_rewrite_efficient_search]: ./img/graph_rewrite_efficient_search.png diff --git a/docs/documentation.md b/docs/documentation.md index ba726707a03..ea26b3f22ff 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -69,6 +69,7 @@ :hidden: openvino_docs_Extensibility_UG_Intro + openvino_docs_transformations Inference Engine Plugin Developer Guide groupie_dev_api Plugin Transformation Pipeline diff --git a/docs/snippets/example_ngraph_utils.cpp b/docs/snippets/example_ngraph_utils.cpp index c6d32373551..185e9f425cf 100644 --- a/docs/snippets/example_ngraph_utils.cpp +++ b/docs/snippets/example_ngraph_utils.cpp @@ -70,65 +70,6 @@ std::shared_ptr create_advanced_function() { } // ! [ov:create_advanced_model] -void pattern_matcher_examples(std::shared_ptr node) { -{ -// ! [pattern:simple_example] -// Pattern example -auto input = std::make_shared(ov::element::i64, ov::Shape{1}); -auto shapeof = std::make_shared(input); - -// Create Matcher with Parameter->ShapeOf pattern -auto m = std::make_shared(shapeof, "MyPatternBasedTransformation"); -// ! [pattern:simple_example] - -// ! [pattern:callback_example] -ov::graph_rewrite_callback callback = [](ov::pass::pattern::Matcher& m) { - // Get root node - std::shared_ptr root_node = m.get_match_root(); - - // Get all nodes matched by pattern - ov::NodeVector nodes = m.get_matched_nodes(); - - // Transformation code - return false; -}; -// ! [pattern:callback_example] -} - -{ -// ! [pattern:label_example] -// Detect Multiply with arbitrary first input and second as Constant -// ov::pattern::op::Label - represent arbitrary input -auto input = ov::pass::pattern::any_input(); -auto value = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {0.5}); -auto mul = std::make_shared(input, value); -auto m = std::make_shared(mul, "MultiplyMatcher"); -// ! [pattern:label_example] -} - -{ -// ! [pattern:concat_example] -// Detect Concat operation with arbitrary number of inputs -auto concat = ov::pass::pattern::wrap_type(); -auto m = std::make_shared(concat, "ConcatMatcher"); -// ! [pattern:concat_example] -} - -{ -// ! [pattern:predicate_example] -// Detect Multiply->Add sequence where mul has exactly one consumer -auto mul = ov::pass::pattern::wrap_type(ov::pass::pattern::consumers_count(1)/*сheck consumers count*/); -auto add = ov::pass::pattern::wrap_type({mul, ov::pass::pattern::any_input()}); -auto m = std::make_shared(add, "MultiplyAddMatcher"); -// Matcher can be used to match pattern manually on given node -if (m->match(node->output(0))) { - // Successfully matched -} -// ! [pattern:predicate_example] -} - -} - bool ngraph_api_examples(std::shared_ptr node) { { // ! [ngraph:ports_example] @@ -185,63 +126,6 @@ auto dim = partial_shape[1].get_length(); return true; } -// ! [ngraph:replace_node] -bool ngraph_replace_node(std::shared_ptr node) { - // Step 1. Verify that node has opset8::Negative type - auto neg = std::dynamic_pointer_cast(node); - if (!neg) { - return false; - } - - // Step 2. Create opset8::Multiply operation where the first input is negative operation input and second as Constant with -1 value - auto mul = std::make_shared(neg->input_value(0), - ov::opset8::Constant::create(neg->get_element_type(), ov::Shape{1}, {-1})); - - mul->set_friendly_name(neg->get_friendly_name()); - // TODO: Move to new API - ngraph::copy_runtime_info(neg, mul); - - // Step 3. Replace Negative operation with Multiply operation - ov::replace_node(neg, mul); - return true; - - // Step 4. Negative operation will be removed automatically because all consumers was moved to Multiply operation -} -// ! [ngraph:replace_node] - -// ! [ngraph:insert_node] -// Step 1. Lets suppose that we have a node with single output port and we want to insert additional operation new_node after it -void insert_example(std::shared_ptr node) { - // Get all consumers for node - auto consumers = node->output(0).get_target_inputs(); - - // Step 2. Create new node. Let it be opset1::Relu. - auto new_node = std::make_shared(node); - - // Step 3. Reconnect all consumers to new_node - for (auto input : consumers) { - input.replace_source_output(new_node); - } -} -// ! [ngraph:insert_node] - -// ! [ngraph:insert_node_with_copy] -void insert_example_with_copy(std::shared_ptr node) { - // Make a node copy - auto node_copy = node->clone_with_new_inputs(node->input_values()); - // Create new node - auto new_node = std::make_shared(node_copy); - ov::replace_node(node, new_node); -} -// ! [ngraph:insert_node_with_copy] - -void eliminate_example(std::shared_ptr node) { -// ! [ngraph:eliminate_node] -// Suppose we have a node that we want to remove -bool success = replace_output_update_name(node->output(0), node->input_value(0)); -// ! [ngraph:eliminate_node] -} - // ! [ov:serialize] void serialize_example(std::shared_ptr f) { ov::pass::Manager manager; diff --git a/docs/snippets/ov_model_snippets.cpp b/docs/snippets/ov_model_snippets.cpp new file mode 100644 index 00000000000..e5ccca530c6 --- /dev/null +++ b/docs/snippets/ov_model_snippets.cpp @@ -0,0 +1,229 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +void pattern_matcher_examples(std::shared_ptr node) { +{ +// ! [pattern:simple_example] +// Pattern example +auto input = std::make_shared(ov::element::i64, ov::Shape{1}); +auto shapeof = std::make_shared(input); + +// Create Matcher with Parameter->ShapeOf pattern +auto m = std::make_shared(shapeof, "MyPatternBasedTransformation"); +// ! [pattern:simple_example] + +// ! [pattern:callback_example] +ov::graph_rewrite_callback callback = [](ov::pass::pattern::Matcher& m) { + // Get root node + std::shared_ptr root_node = m.get_match_root(); + + // Get all nodes matched by pattern + ov::NodeVector nodes = m.get_matched_nodes(); + + // Transformation code + return false; +}; +// ! [pattern:callback_example] +} + +{ +// ! [pattern:label_example] +// Detect Multiply with arbitrary first input and second as Constant +// ov::pattern::op::Label - represent arbitrary input +auto input = ov::pass::pattern::any_input(); +auto value = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {0.5}); +auto mul = std::make_shared(input, value); +auto m = std::make_shared(mul, "MultiplyMatcher"); +// ! [pattern:label_example] +} + +{ +// ! [pattern:concat_example] +// Detect Concat operation with arbitrary number of inputs +auto concat = ov::pass::pattern::wrap_type(); +auto m = std::make_shared(concat, "ConcatMatcher"); +// ! [pattern:concat_example] +} + +{ +// ! [pattern:predicate_example] +// Detect Multiply->Add sequence where mul has exactly one consumer +auto mul = ov::pass::pattern::wrap_type(ov::pass::pattern::consumers_count(1)/*сheck consumers count*/); +auto add = ov::pass::pattern::wrap_type({mul, ov::pass::pattern::any_input()}); +auto m = std::make_shared(add, "MultiplyAddMatcher"); +// Matcher can be used to match pattern manually on given node +if (m->match(node->output(0))) { + // Successfully matched +} +// ! [pattern:predicate_example] +} + +} + +bool openvino_api_examples(std::shared_ptr node) { +{ +// ! [ov:ports_example] +// Let's suppose that node is opset8::Convolution operation +// as we know opset8::Convolution has two input ports (data, weights) and one output port +ov::Input data = node->input(0); +ov::Input weights = node->input(1); +ov::Output output = node->output(0); + +// Getting shape and type +auto pshape = data.get_partial_shape(); +auto el_type = data.get_element_type(); + +// Getting parent for input port +ov::Output parent_output; +parent_output = data.get_source_output(); + +// Another short way to get partent for output port +parent_output = node->input_value(0); + +// Getting all consumers for output port +auto consumers = output.get_target_inputs(); +// ! [ov:ports_example] +} + +{ +// ! [ngraph:shape_check] +auto partial_shape = node->input(0).get_partial_shape(); // get zero input partial shape + +// Check that input shape rank is static +if (!partial_shape.rank().is_static()) { + return false; +} +auto rank_size = partial_shape.rank().get_length(); + +// Check that second dimension is not dynamic +if (rank_size < 2 || partial_shape[1].is_dynamic()) { + return false; +} +auto dim = partial_shape[1].get_length(); +// ! [ngraph:shape_check] +} + +return true; +} + +// ! [ov:replace_node] +bool ov_replace_node(std::shared_ptr node) { + // Step 1. Verify that node has opset8::Negative type + auto neg = std::dynamic_pointer_cast(node); + if (!neg) { + return false; + } + + // Step 2. Create opset8::Multiply operation where the first input is negative operation input and second as Constant with -1 value + auto mul = std::make_shared(neg->input_value(0), + ov::opset8::Constant::create(neg->get_element_type(), ov::Shape{1}, {-1})); + + mul->set_friendly_name(neg->get_friendly_name()); + ov::copy_runtime_info(neg, mul); + + // Step 3. Replace Negative operation with Multiply operation + ov::replace_node(neg, mul); + return true; + + // Step 4. Negative operation will be removed automatically because all consumers was moved to Multiply operation +} +// ! [ov:replace_node] + +bool ov_manual_replace_node(std::shared_ptr node) { +auto neg = std::dynamic_pointer_cast(node); +if (!neg) { + return false; +} + +auto mul = std::make_shared(neg->input_value(0), + ov::opset8::Constant::create(neg->get_element_type(), ov::Shape{1}, {-1})); + +mul->set_friendly_name(neg->get_friendly_name()); +ov::copy_runtime_info(neg, mul); + +// ! [ov:manual_replace] +// All neg->output(0) consumers will be moved to mul->output(0) port +neg->output(0).replace(mul->output(0)); +// ! [ov:manual_replace] +return true; +} + +// ! [ov:insert_node] +// Step 1. Lets suppose that we have a node with single output port and we want to insert additional operation new_node after it +void insert_example(std::shared_ptr node) { + // Get all consumers for node + auto consumers = node->output(0).get_target_inputs(); + + // Step 2. Create new node. Let it be opset8::Relu. + auto new_node = std::make_shared(node); + + // Step 3. Reconnect all consumers to new_node + for (auto input : consumers) { + input.replace_source_output(new_node); + } +} +// ! [ov:insert_node] + +// ! [ov:insert_node_with_copy] +void insert_example_with_copy(std::shared_ptr node) { + // Make a node copy + auto node_copy = node->clone_with_new_inputs(node->input_values()); + // Create new node + auto new_node = std::make_shared(node_copy); + ov::replace_node(node, new_node); +} +// ! [ov:insert_node_with_copy] + +void eliminate_example(std::shared_ptr node) { +// ! [ov:eliminate_node] +// Suppose we have a node that we want to remove +bool success = ov::replace_output_update_name(node->output(0), node->input_value(0)); +// ! [ov:eliminate_node] +} + +void replace_friendly_name() { +auto div = std::make_shared(); +// ! [ov:replace_friendly_name] +// Replace Div operation with Power and Multiply sub-graph and set original friendly name to Multiply operation +auto pow = std::make_shared(div->input(1).get_source_output(), + ov::op::v0::Constant::create(div->get_input_element_type(1), ov::Shape{1}, {-1})); +auto mul = std::make_shared(div->input(0).get_source_output(), pow); +mul->set_friendly_name(div->get_friendly_name()); +ngraph::replace_node(div, mul); +// ! [ov:replace_friendly_name] +} + +void constant_subgraph() { +// ! [ov:constant_subgraph] +// After ConstantFolding pass Power will be replaced with Constant +auto input = std::make_shared(ov::element::f32, ov::Shape{1}); +auto pow = std::make_shared(ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {2}), + ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {3})); +auto mul = std::make_shared(input /* not constant input */, pow); +// ! [ov:constant_subgraph] +} + +void copy_runtime_info_snippet() { +std::shared_ptr transpose, reshape, div, pow, mul, conv, bias, conv_fused, a, b, c, e, f; +// ! [ov:copy_runtime_info] +// Replace Transpose with Reshape operation (1:1) +ov::copy_runtime_info(transpose, reshape); + +// Replace Div operation with Power and Multiply sub-graph (1:N) +ov::copy_runtime_info(div, {pow, mul}); + +// Fuse Convolution with Add operation (N:1) +ov::copy_runtime_info({conv, bias}, {conv_fused}); + +// Any other transformation that replaces one sub-graph with another sub-graph (N:M) +ov::copy_runtime_info({a, b, c}, {e, f}); +// ! [ov:copy_runtime_info] +} diff --git a/docs/template_plugin/src/transformations/template_model_transformation.cpp b/docs/template_plugin/src/transformations/template_model_transformation.cpp index 4319da6fcbe..dd6e56b1c42 100644 --- a/docs/template_plugin/src/transformations/template_model_transformation.cpp +++ b/docs/template_plugin/src/transformations/template_model_transformation.cpp @@ -4,13 +4,13 @@ #include "template_model_transformation.hpp" -#include +#include "openvino/cc/pass/itt.hpp" // ! [model_pass:template_transformation_cpp] // template_function_transformation.cpp -bool ov::pass::MyFunctionTransformation::run_on_model(const std::shared_ptr& f) { - RUN_ON_MODEL_SCOPE(MyFunctionTransformation); +bool ov::pass::MyModelTransformation::run_on_model(const std::shared_ptr& f) { + RUN_ON_MODEL_SCOPE(MyModelTransformation); // Example transformation code NodeVector nodes; diff --git a/docs/template_plugin/src/transformations/template_model_transformation.hpp b/docs/template_plugin/src/transformations/template_model_transformation.hpp index 7a6c7cc97b9..66c5a63bfa9 100644 --- a/docs/template_plugin/src/transformations/template_model_transformation.hpp +++ b/docs/template_plugin/src/transformations/template_model_transformation.hpp @@ -4,21 +4,21 @@ #pragma once -#include +#include "openvino/pass/pass.hpp" namespace ov { namespace pass { -class MyFunctionTransformation; +class MyModelTransformation; } // namespace pass } // namespace ov // ! [model_pass:template_transformation_hpp] // template_model_transformation.hpp -class ov::pass::MyFunctionTransformation : public ov::pass::ModelPass { +class ov::pass::MyModelTransformation : public ov::pass::ModelPass { public: - OPENVINO_RTTI("MyFunctionTransformation", "0"); + OPENVINO_RTTI("MyModelTransformation", "0"); bool run_on_model(const std::shared_ptr& f) override; }; // ! [model_pass:template_transformation_hpp] diff --git a/docs/template_plugin/src/transformations/template_pattern_transformation.cpp b/docs/template_plugin/src/transformations/template_pattern_transformation.cpp index 21da6aa2d64..7c451be5317 100644 --- a/docs/template_plugin/src/transformations/template_pattern_transformation.cpp +++ b/docs/template_plugin/src/transformations/template_pattern_transformation.cpp @@ -4,12 +4,11 @@ #include "transformations/template_pattern_transformation.hpp" -#include -#include -#include -#include -#include - +#include "openvino/cc/pass/itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/opsets/opset3.hpp" +#include "openvino/pass/manager.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/template_model_transformation.hpp" // ! [graph_rewrite:template_transformation_cpp] @@ -122,7 +121,7 @@ void run_matcher_with_manager2(std::shared_ptr f) { void run_matcher_with_manager3(std::shared_ptr f) { // ! [matcher_pass:manager3] ov::pass::Manager manager; - manager.register_pass(); + manager.register_pass(); // Two matchers will run independently (two independent graph traversals) // pass::Manager automatically creates GraphRewrite container for each MatcherPass manager.register_pass(); diff --git a/docs/template_plugin/src/transformations/template_pattern_transformation.hpp b/docs/template_plugin/src/transformations/template_pattern_transformation.hpp index 7f62a4bd916..5a16133b451 100644 --- a/docs/template_plugin/src/transformations/template_pattern_transformation.hpp +++ b/docs/template_plugin/src/transformations/template_pattern_transformation.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include "openvino/pass/graph_rewrite.hpp" namespace ov { namespace pass { diff --git a/src/core/include/openvino/core/core.hpp b/src/core/include/openvino/core/core.hpp index c25d5940d87..e0c3e46b5fd 100644 --- a/src/core/include/openvino/core/core.hpp +++ b/src/core/include/openvino/core/core.hpp @@ -28,6 +28,7 @@ #include "openvino/core/node_vector.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/rank.hpp" +#include "openvino/core/rt_info.hpp" #include "openvino/core/rtti.hpp" #include "openvino/core/runtime_attribute.hpp" #include "openvino/core/shape.hpp" From 6c787157494167db021f320349a49fff4aacdfc9 Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Tue, 1 Mar 2022 10:57:24 +0300 Subject: [PATCH 129/310] [MO] Clean up Model Optimizer options, help, and documentation (#10653) * [MO] Clean-up MO cmd-line options Remove the following Model Optimizer deprecated options that are no longer used for several releases: disable_fusing, disable_gfusing, generate_deprecated_IR_V7, legacy_ir_generation, keep_shape_ops, move_to_preprocess Deprecate through CLI the following options for which functionality triggered from POT or automatically: disable_weights_compression, disable_nhwc_to_nchw, disable_resnet_optimization, finegrain_fusing. Correct and extend description of each MO option to be printed during model conversion. Signed-off-by: Roman Kazantsev * Correct documentation about input shapes Signed-off-by: Roman Kazantsev * Perform final corrections in documentation Signed-off-by: Roman Kazantsev * Remove legacy_ir_generation overall Signed-off-by: Roman Kazantsev * Clean-up tests from deprecated options Signed-off-by: Roman Kazantsev * Recover disable_fusing option as deprecated Signed-off-by: Roman Kazantsev * Fix keys for static_shape and extensions Signed-off-by: Roman Kazantsev * Remove extension key that does not work Signed-off-by: Roman Kazantsev * Apply feedback: remove disable_gfusing, correct docs Signed-off-by: Roman Kazantsev * Recover disable_fusing option for unit-tests Signed-off-by: Roman Kazantsev * Apply feedback for documentation Signed-off-by: Roman Kazantsev * Apply feedback about parameters use_legacy_frontend and use_new_frontend Signed-off-by: Roman Kazantsev * DO minor fixes for indentation of MO logs Signed-off-by: Roman Kazantsev * Revert log.error for fallback message Signed-off-by: Roman Kazantsev * Revert disable_weights_compression parameter for tests Signed-off-by: Roman Kazantsev --- .../convert_model/Converting_Model.md | 56 +++++++-------- tools/mo/openvino/tools/mo/main.py | 21 +++--- tools/mo/openvino/tools/mo/middle/fusings.py | 9 ++- .../tools/mo/utils/check_ie_bindings.py | 2 +- .../mo/openvino/tools/mo/utils/cli_parser.py | 70 ++++++++++--------- .../mo/frontend_ngraph_test_actual.py | 2 - .../mo/utils/mo_fallback_test_actual.py | 2 - .../mo/utils/test_mo_model_analysis_actual.py | 2 - 8 files changed, 79 insertions(+), 85 deletions(-) diff --git a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md index 2602d789f68..60d4ad1f934 100644 --- a/docs/MO_DG/prepare_model/convert_model/Converting_Model.md +++ b/docs/MO_DG/prepare_model/convert_model/Converting_Model.md @@ -24,7 +24,7 @@ To convert the model to the Intermediate Representation (IR), run Model Optimizer using the following command: ```sh -mo --input_model INPUT_MODEL --output_dir +mo --input_model INPUT_MODEL ``` The output directory must have write permissions, so you can run Model Optimizer from the output directory or specify an output path with the `--output_dir` option. @@ -71,12 +71,9 @@ Framework-agnostic parameters: square brackets, for example [1,3,227,227] or (1,227,227,3), where the order of dimensions depends on the framework input layout of the model. For - example, [N,C,H,W] is used for Caffe* models and - [N,H,W,C] for TensorFlow* models. Model Optimizer - performs necessary transformations to convert the - shape to the layout required by Inference Engine - (N,C,H,W). The shape should not contain undefined - dimensions (? or -1) and should fit the dimensions + example, [N,C,H,W] is used for ONNX* models and + [N,H,W,C] for TensorFlow* models. The shape can contain + undefined dimensions (? or -1) and should fit the dimensions defined in the input operation of the graph. Boundaries of undefined dimension can be specified with ellipsis, for example [1,1..10,128,128]. One boundary can be undefined, @@ -156,13 +153,12 @@ Framework-agnostic parameters: original model is in FP32 and --data_type=FP16 is specified, all model weights and biases are compressed to FP16. - --disable_fusing Turn off fusing of linear operations to Convolution + --disable_fusing [DEPRECATED] Turn off fusing of linear operations to Convolution. --disable_resnet_optimization - Turn off resnet optimization + [DEPRECATED] Turn off ResNet optimization. --finegrain_fusing FINEGRAIN_FUSING - Regex for layers/operations that won't be fused. + [DEPRECATED] Regex for layers/operations that won't be fused. Example: --finegrain_fusing Convolution1,.*Scale.* - --disable_gfusing Turn off fusing of grouped convolutions --enable_concat_optimization Turn on Concat optimization. --extensions EXTENSIONS @@ -187,7 +183,7 @@ Framework-agnostic parameters: to `Constant`). Changing model input shape using the OpenVINO Runtime API in runtime may fail for such an IR. --disable_weights_compression - Disable compression and store weights with original + [DEPRECATED] Disable compression and store weights with original precision. --progress Enable model conversion progress display. --stream_output Switch model conversion progress display to a @@ -195,8 +191,13 @@ Framework-agnostic parameters: --transformations_config TRANSFORMATIONS_CONFIG Use the configuration file with transformations description. - --use_new_frontend Force the usage of new frontend API for model processing. - --use_legacy_frontend Force the usage of legacy API for model processing. + --use_new_frontend Force the usage of new Frontend of Model Optimizer for model conversion into IR. + The new Frontend is C++ based and is available for ONNX* and PaddlePaddle* models. + Model optimizer uses new Frontend for ONNX* and PaddlePaddle* by default that means + `--use_new_frontend` and `--use_legacy_frontend` options are not specified. + --use_legacy_frontend Force the usage of legacy Frontend of Model Optimizer for model conversion into IR. + The legacy Frontend is Python based and is available for TensorFlow*, ONNX*, MXNet*, + Caffe*, and Kaldi* models. ``` The sections below provide details on using particular parameters and examples of CLI commands. @@ -218,7 +219,8 @@ There is no a universal recipe for determining the mean/scale values for a parti * Open the model in a visualization tool and check for layers performing subtraction or multiplication (like `Sub`, `Mul`, `ScaleShift`, `Eltwise` etc) of the input data. If such layers exist, pre-processing is probably part of the model. ## When to Specify Input Shapes -There are situations when the input data shape for the model is not fixed, like for the fully-convolutional neural networks. In this case, for example, TensorFlow\* models contain `-1` values in the `shape` attribute of the `Placeholder` operation. Inference Engine does not support input layers with undefined size, so if the input shapes are not defined in the model, the Model Optimizer fails to convert the model. The solution is to provide the input shape(s) using the `--input` or `--input_shape` command line parameter for all input(s) of the model or provide the batch size using the `-b` command line parameter if the model contains just one input with undefined batch size only. In the latter case, the `Placeholder` shape for the TensorFlow\* model looks like this `[-1, 224, 224, 3]`. +There are situations when Model Optimizer is unable to deduce input shapes of the model, for example, in case of model cutting due to unsupported operations. +The solution is to provide input shapes of a static rank explicitly. ## When to Reverse Input Channels Input data for your application can be of RGB or BRG color input order. For example, OpenVINO Samples load input images in the BGR channels order. However, the model may be trained on images loaded with the opposite order (for example, most TensorFlow\* models are trained with images in RGB order). In this case, inference results using the OpenVINO samples may be incorrect. The solution is to provide `--reverse_input_channels` command line parameter. Taking this parameter, the Model Optimizer performs first convolution or other channel dependent operation weights modification so these operations output will be like the image is passed with RGB channels order. @@ -231,54 +233,50 @@ Resulting Intermediate Representation will not be resizable with the help of Ope Launch the Model Optimizer for the Caffe bvlc_alexnet model with debug log level: ```sh -mo --input_model bvlc_alexnet.caffemodel --log_level DEBUG --output_dir +mo --input_model bvlc_alexnet.caffemodel --log_level DEBUG ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with the output IR called `result.*` in the specified `output_dir`: ```sh -mo --input_model bvlc_alexnet.caffemodel --model_name result --output_dir /../../models/ +mo --input_model bvlc_alexnet.caffemodel --model_name result --output_dir ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with one input with scale values: ```sh -mo --input_model bvlc_alexnet.caffemodel --scale_values [59,59,59] --output_dir +mo --input_model bvlc_alexnet.caffemodel --scale_values [59,59,59] ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with multiple inputs with scale values: ```sh -mo --input_model bvlc_alexnet.caffemodel --input data,rois --scale_values [59,59,59],[5,5,5] --output_dir +mo --input_model bvlc_alexnet.caffemodel --input data,rois --scale_values [59,59,59],[5,5,5] ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with multiple inputs with scale and mean values specified for the particular nodes: ```sh -mo --input_model bvlc_alexnet.caffemodel --input data,rois --mean_values data[59,59,59] --scale_values rois[5,5,5] --output_dir +mo --input_model bvlc_alexnet.caffemodel --input data,rois --mean_values data[59,59,59] --scale_values rois[5,5,5] ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with specified input layer, overridden input shape, scale 5, batch 8 and specified name of an output operation: ```sh -mo --input_model bvlc_alexnet.caffemodel --input "data[1 3 224 224]" --output pool5 -s 5 -b 8 --output_dir -``` -Launch the Model Optimizer for the Caffe bvlc_alexnet model with disabled fusing for linear operations to Convolution and grouped convolutions: -```sh -mo --input_model bvlc_alexnet.caffemodel --disable_fusing --disable_gfusing --output_dir +mo --input_model bvlc_alexnet.caffemodel --input data --output pool5 -s 5 -b 8 ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with reversed input channels order between RGB and BGR, specified mean values to be used for the input image per channel and specified data type for input tensor values: ```sh -mo --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16 --output_dir +mo --input_model bvlc_alexnet.caffemodel --reverse_input_channels --mean_values [255,255,255] --data_type FP16 ``` Launch the Model Optimizer for the Caffe bvlc_alexnet model with extensions listed in specified directories, specified mean_images binaryproto file. For more information about extensions, please refer to the [OpenVINO™ Extensibility Mechanism](../../../Extensibility_UG/Intro.md). ```sh -mo --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto --output_dir +mo --input_model bvlc_alexnet.caffemodel --extensions /home/,/some/other/path/ --mean_file /path/to/binaryproto ``` Launch the Model Optimizer for TensorFlow* FaceNet* model with a placeholder freezing value. It replaces the placeholder with a constant layer that contains the passed value. For more information about FaceNet conversion, please refer to [this](tf_specific/Convert_FaceNet_From_Tensorflow.md) page. ```sh -mo --input_model FaceNet.pb --input "phase_train->False" --output_dir +mo --input_model FaceNet.pb --input "phase_train->False" ``` Launch the Model Optimizer for any model with a placeholder freezing tensor of values. It replaces the placeholder with a constant layer that contains the passed values. @@ -287,7 +285,7 @@ Tensor here is represented in square brackets with each value separated from ano If data type is set in the model, this tensor will be reshaped to a placeholder shape and casted to placeholder data type. Otherwise, it will be casted to data type passed to `--data_type` parameter (by default, it is FP32). ```sh -mo --input_model FaceNet.pb --input "placeholder_layer_name->[0.1 1.2 2.3]" --output_dir +mo --input_model FaceNet.pb --input "placeholder_layer_name->[0.1 1.2 2.3]" ``` diff --git a/tools/mo/openvino/tools/mo/main.py b/tools/mo/openvino/tools/mo/main.py index 1f20f8d0c47..208877049cc 100644 --- a/tools/mo/openvino/tools/mo/main.py +++ b/tools/mo/openvino/tools/mo/main.py @@ -28,7 +28,8 @@ from openvino.tools.mo.middle.pattern_match import for_graph_and_each_sub_graph_ from openvino.tools.mo.pipeline.common import prepare_emit_ir, get_ir_version from openvino.tools.mo.pipeline.unified import unified_pipeline from openvino.tools.mo.utils import import_extensions -from openvino.tools.mo.utils.cli_parser import check_available_transforms, get_caffe_cli_options, \ +from openvino.tools.mo.utils.cli_parser import check_available_transforms, \ + get_advanced_cli_options, get_caffe_cli_options, \ get_common_cli_options, get_freeze_placeholder_values, get_kaldi_cli_options, get_layout_values, \ get_mean_scale_dictionary, get_meta_info, get_model_name, get_mxnet_cli_options, get_onnx_cli_options, \ get_placeholder_shapes, get_tf_cli_options, get_tuple_values, parse_transform, parse_tuple_pairs @@ -61,6 +62,7 @@ def print_argv(argv: argparse.Namespace, is_caffe: bool, is_tf: bool, is_mxnet: print('Model Optimizer arguments:') props = OrderedDict() props['common_args'] = get_common_cli_options(model_name) + props['advanced_args'] = get_advanced_cli_options() if is_caffe: props['caffe_args'] = get_caffe_cli_options() if is_tf: @@ -74,6 +76,7 @@ def print_argv(argv: argparse.Namespace, is_caffe: bool, is_tf: bool, is_mxnet: framework_specifics_map = { 'common_args': 'Common parameters:', + 'advanced_args': 'Advanced parameters:', 'caffe_args': 'Caffe specific parameters:', 'tf_args': 'TensorFlow specific parameters:', 'mxnet_args': 'MXNet specific parameters:', @@ -245,9 +248,6 @@ def arguments_post_parsing(argv: argparse.Namespace): # This is just to check that transform key is valid and transformations are available check_available_transforms(parse_transform(argv.transform)) - if argv.legacy_ir_generation and len(argv.transform) != 0: - raise Error("--legacy_ir_generation and --transform keys can not be used at the same time.") - # For C++ frontends there are no specific Python installation requirements, check only generic ones if moc_front_end: ret_code = check_requirements() @@ -434,13 +434,12 @@ def emit_ir(graph: Graph, argv: argparse.Namespace): return_code = "not executed" try: - if not argv.legacy_ir_generation: - from openvino.tools.mo.back.offline_transformations import apply_offline_transformations - apply_offline_transformations(orig_model_name, argv) - if "compress_fp16" in argv and argv.compress_fp16: - # restore data_type cmd parameter - argv.data_type = 'FP16' - return_code = 0 + from openvino.tools.mo.back.offline_transformations import apply_offline_transformations + apply_offline_transformations(orig_model_name, argv) + if "compress_fp16" in argv and argv.compress_fp16: + # restore data_type cmd parameter + argv.data_type = 'FP16' + return_code = 0 except Exception as e: return_code = "failed" log.error(e) diff --git a/tools/mo/openvino/tools/mo/middle/fusings.py b/tools/mo/openvino/tools/mo/middle/fusings.py index 238dfed3260..c313e6bfc2e 100644 --- a/tools/mo/openvino/tools/mo/middle/fusings.py +++ b/tools/mo/openvino/tools/mo/middle/fusings.py @@ -77,12 +77,11 @@ class Fusing(MiddleReplacementPattern): for_graph_and_each_sub_graph_recursively(graph, fuse_linear_ops) for_graph_and_each_sub_graph_recursively(graph, lambda G: G.clean_up()) - if not argv.disable_gfusing: - for_graph_and_each_sub_graph_recursively(graph, grouped_convolutions_fusing) + for_graph_and_each_sub_graph_recursively(graph, grouped_convolutions_fusing) + for_graph_and_each_sub_graph_recursively(graph, lambda G: G.clean_up()) + if not argv.disable_fusing: + for_graph_and_each_sub_graph_recursively(graph, fuse_linear_ops) for_graph_and_each_sub_graph_recursively(graph, lambda G: G.clean_up()) - if not argv.disable_fusing: - for_graph_and_each_sub_graph_recursively(graph, fuse_linear_ops) - for_graph_and_each_sub_graph_recursively(graph, lambda G: G.clean_up()) for_graph_and_each_sub_graph_recursively(graph, normalize_eltwise_inputs) for_graph_and_each_sub_graph_recursively(graph, lambda G: G.clean_up()) diff --git a/tools/mo/openvino/tools/mo/utils/check_ie_bindings.py b/tools/mo/openvino/tools/mo/utils/check_ie_bindings.py index acc20097c14..22d3ac39c40 100644 --- a/tools/mo/openvino/tools/mo/utils/check_ie_bindings.py +++ b/tools/mo/openvino/tools/mo/utils/check_ie_bindings.py @@ -66,7 +66,7 @@ def import_core_modules(silent: bool, path_to_module: str): ie_version = str(get_version()) mo_version = str(v.get_version()) # pylint: disable=no-member,no-name-in-module - print("\t- {}: \t{}".format("OpenVINO runtime found in", os.path.dirname(openvino.__file__))) + print("{}: \t{}".format("OpenVINO runtime found in", os.path.dirname(openvino.__file__))) print("{}: \t{}".format("OpenVINO runtime version", ie_version)) print("{}: \t{}".format("Model Optimizer version", mo_version)) diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py index 0b75e063fd4..fafde3d9eb5 100644 --- a/tools/mo/openvino/tools/mo/utils/cli_parser.py +++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py @@ -34,6 +34,13 @@ class DeprecatedStoreTrue(argparse.Action): setattr(namespace, self.dest, True) +class DeprecatedOptionCommon(argparse.Action): + def __call__(self, parser, args, values, option_string): + dep_msg = "Use of deprecated cli option {} detected. Option use in the following releases will be fatal. ".format(option_string) + log.error(dep_msg, extra={'is_warning': True}) + setattr(args, self.dest, values) + + class IgnoredAction(argparse.Action): def __init__(self, nargs=0, **kw): super().__init__(nargs=nargs, **kw) @@ -254,10 +261,8 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): 'Shape is defined as a comma-separated list of integer numbers enclosed in ' 'parentheses or square brackets, for example [1,3,227,227] or (1,227,227,3), where ' 'the order of dimensions depends on the framework input layout of the model. ' - 'For example, [N,C,H,W] is used for Caffe* models and [N,H,W,C] for TensorFlow* ' - 'models. Model Optimizer performs necessary transformations to convert the shape to ' - 'the layout required by Inference Engine (N,C,H,W). The shape could contain ' - 'undefined dimensions (-1) and should fit the dimensions defined in the input ' + 'For example, [N,C,H,W] is used for ONNX* models and [N,H,W,C] for TensorFlow* ' + 'models. The shape can contain undefined dimensions (? or -1) and should fit the dimensions defined in the input ' 'operation of the graph. Boundaries of undefined dimension can be specified with ' 'ellipsis, for example [1,1..10,128,128]. One boundary can be undefined, for ' 'example [1,..100] or [1,3,1..,1..]. If there are multiple inputs in the model, ' @@ -371,23 +376,18 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): 'Available transformations: "LowLatency2", "MakeStateful"', default="") common_group.add_argument('--disable_fusing', - help='Turn off fusing of linear operations to Convolution', + help='[DEPRECATED] Turn off fusing of linear operations to Convolution.', action=DeprecatedStoreTrue) common_group.add_argument('--disable_resnet_optimization', - help='Turn off resnet optimization', - action='store_true') + help='[DEPRECATED] Turn off ResNet optimization.', + action=DeprecatedStoreTrue, default=False) common_group.add_argument('--finegrain_fusing', - help='Regex for layers/operations that won\'t be fused. ' + - 'Example: --finegrain_fusing Convolution1,.*Scale.*') - common_group.add_argument('--disable_gfusing', - help='Turn off fusing of grouped convolutions', - action=DeprecatedStoreTrue) + help='[DEPRECATED] Regex for layers/operations that won\'t be fused. ' + + 'Example: --finegrain_fusing Convolution1,.*Scale.*', + action=DeprecatedOptionCommon) common_group.add_argument('--enable_concat_optimization', - help='Turn on Concat optimization.', - action='store_true') - common_group.add_argument('--move_to_preprocess', - help='Move mean values to IR preprocess section', - action=DeprecatedStoreTrue) + help='[DEPRECATED] Turn on Concat optimization.', + action=DeprecatedStoreTrue, default=False) # we use CanonicalizeDirCheckExistenceAction instead of readable_dirs to handle empty strings common_group.add_argument("--extensions", help="Directory or a comma separated list of directories with extensions. To disable all " @@ -417,19 +417,14 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): 'It will be DEPRECATED in future releases. ' 'Use --input option to specify a value for freezing.', default=None) - common_group.add_argument('--generate_deprecated_IR_V7', - help=argparse.SUPPRESS, action=IgnoredAction, default=False) common_group.add_argument('--static_shape', help='Enables IR generation for fixed input shape (folding `ShapeOf` operations and ' 'shape-calculating sub-graphs to `Constant`). Changing model input shape using ' - 'the Inference Engine API in runtime may fail for such an IR.', + 'the OpenVINO Runtime API in runtime may fail for such an IR.', action='store_true', default=False) - common_group.add_argument('--keep_shape_ops', - help=argparse.SUPPRESS, - action=IgnoredAction, default=True) common_group.add_argument('--disable_weights_compression', - help='Disable compression and store weights with original precision.', - action='store_true', default=False) + help='[DEPRECATED] Disable compression and store weights with original precision.', + action=DeprecatedStoreTrue, default=False) common_group.add_argument('--progress', help='Enable model conversion progress display.', action='store_true', default=False) @@ -442,13 +437,16 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): 'from the current directory, as absolute path or as a' 'relative path from the mo root directory', action=CanonicalizeTransformationPathCheckExistenceAction) - common_group.add_argument('--legacy_ir_generation', - help=argparse.SUPPRESS, action=DeprecatedStoreTrue, default=False) common_group.add_argument("--use_new_frontend", - help="Force the usage of new frontend API for model processing", + help='Force the usage of new Frontend of Model Optimizer for model conversion into IR. ' + 'The new Frontend is C++ based and is available for ONNX* and PaddlePaddle* models. ' + 'Model optimizer uses new Frontend for ONNX* and PaddlePaddle* by default that means ' + '`--use_new_frontend` and `--use_legacy_frontend` options are not specified.', action='store_true', default=False) common_group.add_argument("--use_legacy_frontend", - help="Force the usage of legacy API for model processing", + help='Force the usage of legacy Frontend of Model Optimizer for model conversion into IR. ' + 'The legacy Frontend is Python based and is available for TensorFlow*, ONNX*, MXNet*, ' + 'Caffe*, and Kaldi* models.', action='store_true', default=False) return parser @@ -471,14 +469,20 @@ def get_common_cli_options(model_name): d['scale'] = ['- Scale factor', lambda x: x if x else 'Not specified'] d['data_type'] = ['- Precision of IR', lambda x: 'FP32' if x == 'float' else 'FP16' if x == 'half' else x] d['disable_fusing'] = ['- Enable fusing', lambda x: not x] - d['disable_gfusing'] = ['- Enable grouped convolutions fusing', lambda x: not x] - d['move_to_preprocess'] = '- Move mean values to preprocess section' + d['transform'] = ['- User transformations', lambda x: x if x else 'Not specified'] d['reverse_input_channels'] = '- Reverse input channels' - d['use_legacy_frontend'] = '- Use legacy API for model processing' + d['static_shape'] = '- Enable IR generation for fixed input shape' d['transformations_config'] = '- Use the transformations config file' return d +def get_advanced_cli_options(): + d = OrderedDict() + d['use_legacy_frontend'] = '- Force the usage of legacy Frontend of Model Optimizer for model conversion into IR' + d['use_new_frontend'] = '- Force the usage of new Frontend of Model Optimizer for model conversion into IR' + return d + + def get_caffe_cli_options(): d = { 'input_proto': ['- Path to the Input prototxt', lambda x: x], @@ -657,7 +661,7 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None): tf_group.add_argument('--disable_nhwc_to_nchw', help='[DEPRECATED] Disables the default translation from NHWC to NCHW. Since 2022.1 this option ' 'is deprecated and used only to maintain backward compatibility with previous releases.', - action='store_true') + action=DeprecatedStoreTrue, default=False) return parser diff --git a/tools/mo/unit_tests/mo/frontend_ngraph_test_actual.py b/tools/mo/unit_tests/mo/frontend_ngraph_test_actual.py index 8653190f1b3..97f2f2c6397 100644 --- a/tools/mo/unit_tests/mo/frontend_ngraph_test_actual.py +++ b/tools/mo/unit_tests/mo/frontend_ngraph_test_actual.py @@ -48,7 +48,6 @@ def replaceArgsHelper(log_level='DEBUG', model_name='abc', input_model='abc.test_mo_mock_mdl', transform=[], - legacy_ir_generation=False, scale=None, output=None, _input=None, @@ -67,7 +66,6 @@ def replaceArgsHelper(log_level='DEBUG', model_name=model_name, input_model=input_model, transform=transform, - legacy_ir_generation=legacy_ir_generation, scale=scale, output=output, input=_input, diff --git a/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py b/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py index 168c35a54ae..87297b7005c 100644 --- a/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py +++ b/tools/mo/unit_tests/mo/utils/mo_fallback_test_actual.py @@ -34,7 +34,6 @@ def base_args_config(use_legacy_fe:bool=None, use_new_fe:bool=None): args.input_model = None args.silent = True args.transform=[] - args.legacy_ir_generation = False args.scale = None args.output=None args.input=None @@ -47,7 +46,6 @@ def base_args_config(use_legacy_fe:bool=None, use_new_fe:bool=None): args.transformations_config = None args.disable_fusing = None args.finegrain_fusing = None - args.disable_gfusing = None args.disable_resnet_optimization = None args.enable_concat_optimization = None args.static_shape = None diff --git a/tools/mo/unit_tests/mo/utils/test_mo_model_analysis_actual.py b/tools/mo/unit_tests/mo/utils/test_mo_model_analysis_actual.py index 9a8ec8a741d..19d7f782b84 100644 --- a/tools/mo/unit_tests/mo/utils/test_mo_model_analysis_actual.py +++ b/tools/mo/unit_tests/mo/utils/test_mo_model_analysis_actual.py @@ -29,7 +29,6 @@ def base_args_config(): args.input_model = None args.silent = True args.transform=[] - args.legacy_ir_generation = False args.scale = None args.output=None args.input=None @@ -42,7 +41,6 @@ def base_args_config(): args.transformations_config = None args.disable_fusing = None args.finegrain_fusing = None - args.disable_gfusing = None args.disable_resnet_optimization = None args.enable_concat_optimization = None args.static_shape = None From 5f40ba9a236a3dbd202abd0feeead9588d0a4395 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 1 Mar 2022 11:12:12 +0300 Subject: [PATCH 130/310] Fixed onecoreuap.toolchain.cmake (#10646) * Fixed onecoreuap.toolchain.cmake * Updated mt.runtime.win32.toolchain.cmake --- .../mt.runtime.win32.toolchain.cmake | 2 +- cmake/toolchains/oecore.arm64.toolchain.cmake | 41 ------------------- cmake/toolchains/onecoreuap.toolchain.cmake | 18 ++++---- 3 files changed, 10 insertions(+), 51 deletions(-) delete mode 100644 cmake/toolchains/oecore.arm64.toolchain.cmake diff --git a/cmake/toolchains/mt.runtime.win32.toolchain.cmake b/cmake/toolchains/mt.runtime.win32.toolchain.cmake index 0da0ccf1e6e..62619cd5683 100644 --- a/cmake/toolchains/mt.runtime.win32.toolchain.cmake +++ b/cmake/toolchains/mt.runtime.win32.toolchain.cmake @@ -25,7 +25,7 @@ endif() if(use_static_runtime) foreach(lang C CXX) foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO") - set(flag_var "CMAKE_${lang}_FLAGS${build_type}") + set(flag_var "CMAKE_${lang}_FLAGS${build_type}_INIT") string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") endforeach() endforeach() diff --git a/cmake/toolchains/oecore.arm64.toolchain.cmake b/cmake/toolchains/oecore.arm64.toolchain.cmake deleted file mode 100644 index 0d0b63398b7..00000000000 --- a/cmake/toolchains/oecore.arm64.toolchain.cmake +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2018-2022 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -if(DEFINED OECORE_BASE_DIR) - # OECORE_BASE_DIR was passed via CMake command line, nothing to do -elseif(DEFINED ENV{OECORE_BASE_DIR}) - # User sets OECORE_BASE_DIR environment variable - set(OECORE_BASE_DIR $ENV{OECORE_BASE_DIR}) -elseif(DEFINED ENV{OECORE_NATIVE_SYSROOT}) - # OECORE_NATIVE_SYSROOT is a default environment variable for the OECore toolchain - set(OECORE_BASE_DIR "$ENV{OECORE_NATIVE_SYSROOT}/../..") -else() - # Use default value - set(OECORE_BASE_DIR "/usr/local/oecore-x86_64") -endif() - -set(OECORE_TARGET_NAME "aarch64-ese-linux") -set(OECORE_TARGET_SYSROOT "${OECORE_BASE_DIR}/sysroots/${OECORE_TARGET_NAME}") -set(OECORE_HOST_SYSROOT "${OECORE_BASE_DIR}/sysroots/x86_64-esesdk-linux") -set(OECORE_HOST_COMPILER_BIN_DIR "${OECORE_HOST_SYSROOT}/usr/bin/${OECORE_TARGET_NAME}") - -set(CMAKE_SYSTEM_NAME "Linux") -set(CMAKE_SYSTEM_PROCESSOR "aarch64") - -set(CMAKE_SYSROOT "${OECORE_TARGET_SYSROOT}") - -set(CMAKE_C_COMPILER "${OECORE_HOST_COMPILER_BIN_DIR}/aarch64-ese-linux-gcc") -set(CMAKE_CXX_COMPILER "${OECORE_HOST_COMPILER_BIN_DIR}/aarch64-ese-linux-g++") - -set(CMAKE_C_FLAGS_INIT "-mcpu=cortex-a53 -mtune=cortex-a53 --sysroot=${OECORE_TARGET_SYSROOT}") -set(CMAKE_CXX_FLAGS_INIT "-mcpu=cortex-a53 -mtune=cortex-a53 --sysroot=${OECORE_TARGET_SYSROOT}") - -set(CMAKE_EXE_LINKER_FLAGS_INIT "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}") -set(CMAKE_SHARED_LINKER_FLAGS_INIT "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}") -set(CMAKE_MODULE_LINKER_FLAGS_INIT "-Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed --sysroot=${OECORE_TARGET_SYSROOT}") - -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) diff --git a/cmake/toolchains/onecoreuap.toolchain.cmake b/cmake/toolchains/onecoreuap.toolchain.cmake index 0accc4ed7da..c97bca379ed 100644 --- a/cmake/toolchains/onecoreuap.toolchain.cmake +++ b/cmake/toolchains/onecoreuap.toolchain.cmake @@ -35,14 +35,14 @@ if(_onecoreuap_arch STREQUAL "x64") # Forcefull make VS search for C++ libraries in these folders prior to other c++ standard libraries localizations. add_link_options("/LIBPATH:\"\$\(VC_LibraryPath_VC_x64_OneCore\)\"") - set(CMAKE_C_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/\$\(Platform\)/OneCoreUap.lib" CACHE STRING "" FORCE) - set(CMAKE_CXX_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/\$\(Platform\)/OneCoreUap.lib" CACHE STRING "" FORCE) + set(CMAKE_C_STANDARD_LIBRARIES_INIT "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/\$\(Platform\)/OneCoreUap.lib" CACHE STRING "" FORCE) + set(CMAKE_CXX_STANDARD_LIBRARIES_INIT "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/\$\(Platform\)/OneCoreUap.lib" CACHE STRING "" FORCE) elseif(_onecoreuap_arch STREQUAL "X86") add_link_options("/LIBPATH:\"\$\(VCInstallDir\)lib/onecore\"") add_link_options("/LIBPATH:\"\$\(VC_LibraryPath_VC_x86_OneCore\)\"") - set(CMAKE_C_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/x86/OneCoreUap.lib" CACHE STRING "" FORCE) - set(CMAKE_CXX_STANDARD_LIBRARIES "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/x86/OneCoreUap.lib" CACHE STRING "" FORCE) + set(CMAKE_C_STANDARD_LIBRARIES_INIT "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/x86/OneCoreUap.lib" CACHE STRING "" FORCE) + set(CMAKE_CXX_STANDARD_LIBRARIES_INIT "\$\(UCRTContentRoot\)lib/\$\(TargetUniversalCRTVersion\)/um/x86/OneCoreUap.lib" CACHE STRING "" FORCE) else() message(FATAL_ERROR "Unsupported architecture ${_onecoreuap_arch}. Only X86 or X86_64 are supported") endif() @@ -52,8 +52,8 @@ unset(_onecoreuap_arch) # compile flags set(includes "/I\"\$\(UniversalCRT_IncludePath\)\"") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${includes}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${includes}") +set(CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS_INIT} ${includes}") +set(CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS_INIT} ${includes}") unset(includes) # linker flags @@ -62,9 +62,9 @@ foreach(lib kernel32 user32 advapi32 ole32 mscoree combase) set(linker_flags "/NODEFAULTLIB:${lib}.lib ${linker_flags}") endforeach() -set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${linker_flags}") -set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${linker_flags}") -set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${linker_flags}") +set(CMAKE_SHARED_LINKER_FLAGS_INIT "${CMAKE_SHARED_LINKER_FLAGS_INIT} ${linker_flags}") +set(CMAKE_MODULE_LINKER_FLAGS_INIT "${CMAKE_MODULE_LINKER_FLAGS_INIT} ${linker_flags}") +set(CMAKE_EXE_LINKER_FLAGS_INIT "${CMAKE_EXE_LINKER_FLAGS_INIT} ${linker_flags}") unset(linker_flags) # From 64fca57af45095a158005313a2141fdd9864e863 Mon Sep 17 00:00:00 2001 From: Nikita Semaev Date: Tue, 1 Mar 2022 12:14:45 +0300 Subject: [PATCH 131/310] Fix NMS Conformance tests for Template plugin (#9273) * Added inputs argument to all compare() function overloads * Rewritten compare() function for NMS * Implemented sorting by name of expected outputs * Implemented sorting by name of actual outputs * Added accounting for simultaneous dynamism and the need to convert outputs in Template plugin * Added a separate case to the GetBlob function for correct dimensions * Rewritten Expected outputs sorting to work correctly on cpuFuncTests * Fixing code style problems * Implemented sorting by name of actual outputs for functional tests * Debug prints removed * Replacing a raw pointer with a vector * Fixing code style problems * Shifting the sorting place Expected outputs * Added sorting of Expected exits in one more place * Quality transition to SLT2.0 * Removing unnecessary code after SLT2.0 * Fix soft_nms_sigma argument * Removing unnecessary parts after SLT2.0 * Remove unnecessary outputs sorting * Removing parts from the code for debugging * Fix for NMS * Trying to make CI green * Checking test passage without adding convert precision * Checking CI * There is an algorithm that adds Convert only if there is f16, fp16 in inputs * Add Convert Op in cases where inputs are not already installed f32 * Check that the CI will go away if you put everything back * Revert changes, validate f32 change on ci * Adding Convert f16-f32 only if there is a function parameter of type f16 * The presence of f16/bf16 as a parameter type is now mandatory to add Convert * Added prints for params, inputs, outputs * Logic checking the absence of Convert * Cosmetic fixes * Setting the correct value for selected_scores_type NMS-5 * Fix bf * Increased readability * Missing parts added * Removed the static for the vector --- .../template_plugin/backend/evaluates_map.cpp | 2 +- .../src/base/ov_subgraph.cpp | 21 +++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/docs/template_plugin/backend/evaluates_map.cpp b/docs/template_plugin/backend/evaluates_map.cpp index af0d8c37dcb..1a95c4e9e95 100644 --- a/docs/template_plugin/backend/evaluates_map.cpp +++ b/docs/template_plugin/backend/evaluates_map.cpp @@ -857,7 +857,7 @@ bool evaluate(const shared_ptr& op, &valid_outputs, info.sort_result_descending); - auto selected_scores_type = (inputs.size() < 4) ? element::f32 : inputs[3]->get_element_type(); + auto selected_scores_type = (outputs.size() < 3) ? element::f32 : outputs[1]->get_element_type(); runtime::reference::nms5_postprocessing(outputs, info.output_type, diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index e3313db12b8..2700cf0eb45 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -239,11 +239,24 @@ std::vector SubgraphBaseTest::calculate_refs() { auto functionToProcess = ov::clone_model(*functionRefs); //TODO: remove this conversions as soon as function interpreter fully support bf16 and f16 - static const precisions_array precisions = { - { ngraph::element::bf16, ngraph::element::f32 }, - { ngraph::element::f16, ngraph::element::f32} + precisions_array precisions = { + { ngraph::element::bf16, ngraph::element::f32 } }; - + auto convert_added = false; + for (const auto ¶m : function->get_parameters()) { + for (size_t i = 0; i < param->get_output_size(); i++) { + for (const auto &node : param->get_output_target_inputs(i)) { + std::shared_ptr nodePtr = node.get_node()->shared_from_this(); + if (std::dynamic_pointer_cast(nodePtr)) { + convert_added = true; + break; + } + } + } + } + if (!convert_added) { + precisions.push_back({ ngraph::element::f16, ngraph::element::f32}); + } pass::Manager manager; manager.register_pass(precisions); manager.run_passes(functionToProcess); From 8e0978818c1f1a63dbdeed8deac8c8539a992eae Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Tue, 1 Mar 2022 14:31:21 +0300 Subject: [PATCH 132/310] [CPU] Prevent internalBlobs cleanup for dynamic deconv node (#10697) --- src/plugins/intel_cpu/src/nodes/deconv.cpp | 14 ++++++++++++++ src/plugins/intel_cpu/src/nodes/deconv.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index 1000c0850c8..adb012da93a 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -466,6 +466,20 @@ void MKLDNNDeconvolutionNode::setDynamicBatchLim(int lim) { MKLDNNNode::setDynamicBatchLim(lim); } +void MKLDNNDeconvolutionNode::cleanup() { + if (!isDynamicNode()) { + internalBlobs.clear(); + } + + for (auto it : fusedWith) { + it->cleanup(); + } + + for (auto it : mergedWith) { + it->cleanup(); + } +} + void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) { if (!execPtr) { IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled"; diff --git a/src/plugins/intel_cpu/src/nodes/deconv.h b/src/plugins/intel_cpu/src/nodes/deconv.h index 25496851aad..151a737fcb7 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.h +++ b/src/plugins/intel_cpu/src/nodes/deconv.h @@ -56,6 +56,8 @@ public: void setDynamicBatchLim(int lim) override; + void cleanup() override; + protected: AttrPtr initPrimitiveAttr() override; AttrPtr makePrimitiveAttr(const VectorDims& dims); From 1d469a2b874a4cd8bdf6f2fa1bf77544288c312c Mon Sep 17 00:00:00 2001 From: Karol Blaszczak Date: Tue, 1 Mar 2022 13:00:38 +0100 Subject: [PATCH 133/310] [DOCS] hddl update (#10616) * [DOCS] hddl update include info on hddl and myriad working at the same time * Update docs/OV_Runtime_UG/supported_plugins/MYRIAD.md Co-authored-by: Andrey Zaytsev * Update HDDL.md * Update MYRIAD.md Co-authored-by: Ilya Lavrenov Co-authored-by: Andrey Zaytsev --- docs/OV_Runtime_UG/supported_plugins/HDDL.md | 4 ++++ docs/OV_Runtime_UG/supported_plugins/MYRIAD.md | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/docs/OV_Runtime_UG/supported_plugins/HDDL.md b/docs/OV_Runtime_UG/supported_plugins/HDDL.md index d4642f6fcae..bb05719a223 100644 --- a/docs/OV_Runtime_UG/supported_plugins/HDDL.md +++ b/docs/OV_Runtime_UG/supported_plugins/HDDL.md @@ -8,6 +8,10 @@ The OpenVINO Runtime HDDL plugin was developed for inference with neural network To configure your Intel® Vision Accelerator Design With Intel® Movidius™ on supported operating systems, refer to the Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs section in the installation guides for [Linux](../../install_guides/installing-openvino-linux.md) or [Windows](../../install_guides/installing-openvino-windows.md). +> **NOTE**: The HDDL and Myriad plugins may cause conflicts when used at the same time. +> To ensure proper operation in such a case, the number of booted devices needs to be limited in the 'hddl_autoboot.config' file. +> Otherwise, the HDDL plugin will boot all available Intel® Movidius™ Myriad™ X devices. + ## Supported networks To see the list of supported networks for the HDDL plugin, refer to the list on the [MYRIAD Plugin page](MYRIAD.md). diff --git a/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md b/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md index a5d1a46b67f..d6ecdf5ce51 100644 --- a/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md +++ b/docs/OV_Runtime_UG/supported_plugins/MYRIAD.md @@ -8,6 +8,10 @@ The OpenVINO Runtime MYRIAD plugin has been developed for inference of neural ne To configure your Intel® Vision Accelerator Design With Intel® Movidius™ on supported operating systemss, refer to the Steps for Intel® Vision Accelerator Design with Intel® Movidius™ VPUs section in the installation guides for [Linux](../../install_guides/installing-openvino-linux.md) or [Windows](../../install_guides/installing-openvino-windows.md). + > **NOTE**: The HDDL and MYRIAD plugins may cause conflicts when used at the same time. +> To ensure proper operation in such a case, the number of booted devices needs to be limited in the 'hddl_autoboot.config' file. +> Otherwise, the HDDL plugin will boot all available Intel® Movidius™ Myriad™ X devices. + ## Supported Configuration Parameters See VPU common configuration parameters for the [VPU Plugins](VPU.md). From 6c6aa8fa957677bea8db9129c5ee8fef35753fee Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 1 Mar 2022 15:15:04 +0300 Subject: [PATCH 134/310] [GPU] Fix RemoteBlob lock() and ulock() behaviour in case of multiple threads (#10685) * [GPU] Fix RemoteBlob lock() and ulock() behaviour in case of multiple threads and add tests --- .../intel_gpu/plugin/remote_context.hpp | 2 + .../intel_gpu/src/plugin/remote_context.cpp | 20 +++++-- .../cldnn_remote_blob_tests.cpp | 59 +++++++++++++++++++ 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp index c8334220e17..5ddefc3dc70 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_context.hpp @@ -89,6 +89,8 @@ protected: cldnn::memory::ptr m_memObject; + mutable std::mutex lockedMutex; + mutable size_t lockedCounter; mutable std::unique_ptr> lockedHolder; mutable void* _handle; mutable std::shared_ptr _allocator; diff --git a/src/plugins/intel_gpu/src/plugin/remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp index 0720ec68eb4..1eaf01aea0b 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp @@ -25,7 +25,7 @@ RemoteBlobImpl::RemoteBlobImpl(ClContext::Ptr context, uint32_t plane, BlobType mem_type) : m_context(context), m_stream(stream), m_layout(layout), m_mem_type(mem_type), m_mem(mem), m_surf(surf), m_plane(plane), - _handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedHolder(nullptr) { + _handle(nullptr), _allocator(nullptr), m_memObject(nullptr), lockedCounter(0), lockedHolder(nullptr) { auto _impl = getContextImpl(m_context.lock()); auto eng = _impl->GetEngine(); @@ -189,14 +189,22 @@ void RemoteBlobImpl::lock() const { if (!is_allocated()) { IE_THROW(NotAllocated) << "[GPU] Remote blob can't be locked as it's not allocated"; } - lockedHolder = std::unique_ptr>(new cldnn::mem_lock(m_memObject, m_stream)); - auto ptr = lockedHolder->data(); - _handle = reinterpret_cast(ptr); - m_allocator.regLockedBlob(_handle, this); + + std::lock_guard locker(lockedMutex); + if (lockedCounter == 0) { + lockedHolder = std::unique_ptr>(new cldnn::mem_lock(m_memObject, m_stream)); + auto ptr = lockedHolder->data(); + _handle = reinterpret_cast(ptr); + m_allocator.regLockedBlob(_handle, this); + } + lockedCounter++; } void RemoteBlobImpl::unlock() const { - lockedHolder.reset(); + std::lock_guard locker(lockedMutex); + lockedCounter--; + if (lockedCounter == 0) + lockedHolder.reset(); } LockedMemory RemoteBlobImpl::buffer() noexcept { diff --git a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp index c3f30667002..0cab1150cf9 100644 --- a/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp +++ b/src/tests/functional/plugin/gpu/remote_blob_tests/cldnn_remote_blob_tests.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -98,6 +99,64 @@ TEST_P(RemoteBlob_Test, smoke_canInputUserBlob) { } } +TEST_P(RemoteBlob_Test, smoke_canUseRemoteBlobSimultaneously) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + const int batch = 2; + const int channels = 3; + const int height = 512; + const int width = 512; + const size_t img_size = batch * channels * height * width; + cl_int err; + + const InferenceEngine::TensorDesc tensor_desc{InferenceEngine::Precision::U8, + {batch, channels, height, width}, + InferenceEngine::Layout::NHWC}; + + InferenceEngine::Blob::Ptr ref_blob = FuncTestUtils::createAndFillBlob(tensor_desc); + + auto ie = PluginCache::get().ie(); + auto ocl_instance = std::make_shared(); + ocl_instance->_queue = cl::CommandQueue(ocl_instance->_context, ocl_instance->_device); + + // Allocate OpenCL buffer for data + cl::Buffer shared_buffer(ocl_instance->_context, CL_MEM_READ_WRITE, img_size, NULL, &err); + + // Create shared context + auto remote_context = make_shared_context(*ie, deviceName, ocl_instance->_queue.get()); + + // Wrap buffer above with IE blob + Blob::Ptr shared_blob = make_shared_blob(tensor_desc, remote_context, shared_buffer); + // Allocate is needed to actually trigger memory handle sharing. For other buffers it's called inside SetBlob impl + // TODO: Why do we need to call it explicitly? Consider doing it internally + shared_blob->allocate(); + + // Copy data from ordinary blob to OpenCL buffer + { + void* buffer = ref_blob->buffer(); + ocl_instance->_queue.enqueueWriteBuffer(shared_buffer, true, 0, img_size, buffer); + } + + // Lock remote buffer in multiple threads and compare data with ordinary one + const int threads_num = 8; + std::vector threads; + for (int i = 0; i < threads_num; i++) { + threads.emplace_back(std::thread{[&] { + auto ref_blob_buf = ref_blob->cbuffer(); + auto ref_blob_ptr = ref_blob_buf.as(); + auto remote_blob_buf = shared_blob->cbuffer(); + auto remote_blob_ptr = remote_blob_buf.as(); + ASSERT_EQ(ref_blob->size(), shared_blob->size()); + for (size_t j = 0; j < ref_blob->size(); j++) { + ASSERT_EQ(ref_blob_ptr[j], remote_blob_ptr[j]); + } + }}); + } + + for (auto& t : threads) + t.join(); +} TEST_P(RemoteBlob_Test, smoke_canInputPluginRemoteBlob) { #if defined(ANDROID) From 9eca8515b88088c3a2889c75e6c86e2a43ba73b9 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Tue, 1 Mar 2022 16:01:30 +0300 Subject: [PATCH 135/310] [IE TESTS] Extend EvaluatorMaps by Greater, If, Equal (#10026) * [IE TESTS] Extend EvaluatesMap * fix code style --- .../template_plugin/backend/evaluates_map.cpp | 185 +++++++++++++++++- .../template_plugin/backend/opset_int_tbl.hpp | 2 + 2 files changed, 185 insertions(+), 2 deletions(-) diff --git a/docs/template_plugin/backend/evaluates_map.cpp b/docs/template_plugin/backend/evaluates_map.cpp index 1a95c4e9e95..53101ae7d47 100644 --- a/docs/template_plugin/backend/evaluates_map.cpp +++ b/docs/template_plugin/backend/evaluates_map.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -399,6 +401,42 @@ bool evaluate(const shared_ptr& op, return true; } +template +bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, const HostTensorVector& inputs) { + const auto in0_data_ptr = inputs[0]->get_data_ptr(); + const auto in1_data_ptr = inputs[1]->get_data_ptr(); + const auto out_data_ptr = outputs[0]->get_data_ptr(); + const auto in0_shape = inputs[0]->get_shape(); + const auto in1_shape = inputs[1]->get_shape(); + const auto broadcast_spec = op->get_autob(); + runtime::reference::greater::value_type, + typename element_type_traits::value_type>(in0_data_ptr, + in1_data_ptr, + out_data_ptr, + in0_shape, + in1_shape, + broadcast_spec); + return true; +} + +template +bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, const HostTensorVector& inputs) { + const auto in0_data_ptr = inputs[0]->get_data_ptr(); + const auto in1_data_ptr = inputs[1]->get_data_ptr(); + const auto out_data_ptr = outputs[0]->get_data_ptr(); + const auto in0_shape = inputs[0]->get_shape(); + const auto in1_shape = inputs[1]->get_shape(); + const auto broadcast_spec = op->get_autob(); + runtime::reference::equal::value_type, + typename element_type_traits::value_type>(in0_data_ptr, + in1_data_ptr, + out_data_ptr, + in0_shape, + in1_shape, + broadcast_spec); + return true; +} + namespace cum_sum_v0 { template inline void evaluate(const shared_ptr& op, @@ -428,6 +466,145 @@ bool evaluate(const shared_ptr& op, const HostTensorVector& outp return true; } +namespace if_op { +bool call(const HostTensorVector& func_outputs, + const HostTensorVector& func_inputs, + const std::shared_ptr& function) { + // map function params -> HostTensor + std::unordered_map> tensor_map; + size_t input_count = 0; + for (const auto& param : function->get_parameters()) { + for (size_t i = 0; i < param->get_output_size(); ++i) { + descriptor::Tensor* tensor = ¶m->output(i).get_tensor(); + tensor_map.insert({tensor, func_inputs[input_count++]}); + } + } + + std::unordered_map, size_t> results_map; + // map function outputs -> HostTensor + for (size_t output_count = 0; output_count < function->get_results().size(); ++output_count) { + auto output = function->get_results()[output_count]; + results_map[output] = output_count; + } + + // for each ordered op in the graph + for (const auto& op : function->get_ordered_ops()) { + if (op::is_parameter(op)) { + continue; + } + + // get op inputs from map + std::vector> op_inputs; + for (auto input : op->inputs()) { + descriptor::Tensor* tensor = &input.get_tensor(); + op_inputs.push_back(tensor_map.at(tensor)); + } + + // get op outputs from map or create + std::vector> op_outputs; + for (size_t i = 0; i < op->get_output_size(); ++i) { + descriptor::Tensor* tensor = &op->output(i).get_tensor(); + std::shared_ptr host_tensor; + auto it = tensor_map.find(tensor); + if (op::is_output(op)) { + host_tensor = func_outputs[results_map[op]]; + } else if (it == tensor_map.end()) { + host_tensor = std::make_shared(op->output(i)); + tensor_map.insert({tensor, host_tensor}); + } else { + host_tensor = it->second; + } + op_outputs.push_back(host_tensor); + } + op->validate_and_infer_types(); + OPENVINO_SUPPRESS_DEPRECATED_START + if (!op->evaluate(op_outputs, op_inputs)) { + auto evaluates_map = ngraph::runtime::interpreter::get_evaluators_map(); + auto it = evaluates_map.find(op->get_type_info()); + if (!it->second(op, op_outputs, op_inputs)) { + return false; + } + } + OPENVINO_SUPPRESS_DEPRECATED_END + } + return true; +} + +void function(const std::shared_ptr& function, + const HostTensorVector& inputs, + HostTensorVector& outputs) { + const auto& parameters = function->get_parameters(); + const auto& parametersNumber = parameters.size(); + const auto& inputsNumber = inputs.size(); + NGRAPH_CHECK(parametersNumber == inputsNumber, + "Got function (", + function->get_friendly_name(), + ") with ", + parametersNumber, + " parameters, but ", + inputsNumber, + " input blobs"); + + for (const auto& parameter : parameters) { + const auto& parameterIndex = function->get_parameter_index(parameter); + const auto& parameterShape = parameter->get_shape(); + const auto& parameterType = parameter->get_element_type(); + const auto& parameterSize = shape_size(parameterShape) * parameterType.size(); + + const auto& input = inputs[parameterIndex]; + const auto& inputSize = input->get_size_in_bytes(); + NGRAPH_CHECK(parameterSize == inputSize, + "Got parameter (", + parameter->get_friendly_name(), + ") of size ", + parameterSize, + " bytes, but corresponding input with index ", + parameterIndex, + " has ", + inputSize, + " bytes"); + } + + const auto& results = function->get_results(); + outputs.reserve(results.size()); + for (size_t i = 0; i < results.size(); ++i) { + outputs.push_back(std::make_shared()); + } + call(outputs, inputs, function); +} + +void if_reference(const std::vector>& bodies, + const std::vector& out_descs, + const std::vector& input_descs, + const HostTensorVector& out, + const HostTensorVector& args) { + NGRAPH_CHECK(args.size() > 0, "If operation must have input condition value"); + + auto condition_value = args[0]->get_data_ptr()[0]; + auto branch_index = (condition_value) ? op::v8::If::THEN_BODY_INDEX : op::v8::If::ELSE_BODY_INDEX; + HostTensorVector inputs_to_body; + HostTensorVector outs_from_body; + inputs_to_body.resize(input_descs[branch_index].size()); + auto inputs_size = args.size(); + auto output_size = out.size(); + for (const auto& input_desc : input_descs[branch_index]) { + NGRAPH_CHECK(inputs_size > input_desc->m_input_index, + "Incorrect associating! If has not input with id ", + input_desc->m_input_index); + inputs_to_body[input_desc->m_body_parameter_index] = args[input_desc->m_input_index]; + } + function(bodies[branch_index], inputs_to_body, outs_from_body); + for (const auto& out_descr : out_descs[branch_index]) { + NGRAPH_CHECK(output_size > out_descr->m_output_index, + "Incorrect associating! If has not output with id ", + out_descr->m_output_index); + auto res = outs_from_body[out_descr->m_body_value_index]; + out[out_descr->m_output_index]->set_shape(res->get_shape()); + out[out_descr->m_output_index]->write(res->get_data_ptr(), res->get_size_in_bytes()); + } +} +} // namespace if_op + template bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, const HostTensorVector& inputs) { std::vector> bodies; @@ -442,7 +619,11 @@ bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, for (size_t i = 0; i < op->get_output_descriptions_size(); i++) { out_descs.emplace_back(op->get_output_descriptions(i)); } - runtime::reference::if_reference(bodies, out_descs, in_descs, outputs, inputs); + try { + runtime::reference::if_reference(bodies, out_descs, in_descs, outputs, inputs); + } catch (...) { + if_op::if_reference(bodies, out_descs, in_descs, outputs, inputs); + } return true; } @@ -3430,7 +3611,7 @@ bool evaluate(const shared_ptr& op, template bool evaluate_node(std::shared_ptr node, const HostTensorVector& outputs, const HostTensorVector& inputs) { auto element_type = node->get_output_element_type(0); - if (ov::is_type(node)) + if (ov::is_type(node) || ov::is_type(node)) element_type = node->get_input_element_type(1); switch (element_type) { diff --git a/docs/template_plugin/backend/opset_int_tbl.hpp b/docs/template_plugin/backend/opset_int_tbl.hpp index d8edd45d4b9..952488d28d6 100644 --- a/docs/template_plugin/backend/opset_int_tbl.hpp +++ b/docs/template_plugin/backend/opset_int_tbl.hpp @@ -45,6 +45,8 @@ NGRAPH_OP(ConvertLike, op::v1) NGRAPH_OP(Convolution, ngraph::op::v1) NGRAPH_OP(ConvolutionBackpropData, ngraph::op::v1) NGRAPH_OP(DeformablePSROIPooling, ngraph::op::v1) +NGRAPH_OP(Equal, ngraph::op::v1) +NGRAPH_OP(Greater, ngraph::op::v1) NGRAPH_OP(GroupConvolution, ngraph::op::v1) NGRAPH_OP(GroupConvolutionBackpropData, ngraph::op::v1) NGRAPH_OP(DeformableConvolution, ngraph::op::v1) From 3f941e3c5ff34ce775d02da1864cdea4a647a488 Mon Sep 17 00:00:00 2001 From: Anastasia Popova Date: Tue, 1 Mar 2022 16:03:09 +0300 Subject: [PATCH 136/310] Corrected layout parsing error message. (#10651) * Corrected error message. * Corrected message. * Small correction * Corrected error message for source and target layout. --- tools/mo/openvino/tools/mo/utils/cli_parser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py index fafde3d9eb5..29b2ff096dc 100644 --- a/tools/mo/openvino/tools/mo/utils/cli_parser.py +++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py @@ -1000,8 +1000,14 @@ def parse_layouts_by_destination(s: str, parsed: dict, dest: str = None) -> None elif m2: found_g = m2.groups() else: - raise Error("More then one layout provided for --{}layout without providing name.".format( - dest + '_' if dest else '')) + error_msg = "Invalid usage of --{}layout parameter. Please use following syntax for each tensor " \ + "or operation name:" \ + "\n name(nchw)" \ + "\n name[n,c,h,w]".format(dest + '_' if dest else '') + if dest is None: + error_msg += "\n name(nhwc->[n,h,w,c])" \ + "\n name[n,h,w,c]->[n,c,h,w]" + raise Error(error_msg) write_found_layout(found_g[0], found_g[1], parsed, dest) From 0f409ccea9670aa3f009cc7bfe621a3795a85f01 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Tue, 1 Mar 2022 16:11:57 +0300 Subject: [PATCH 137/310] [Python API] Fix typo in method name (#10707) --- src/bindings/python/src/pyopenvino/graph/descriptors/tensor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bindings/python/src/pyopenvino/graph/descriptors/tensor.cpp b/src/bindings/python/src/pyopenvino/graph/descriptors/tensor.cpp index 7e73074576b..6f28ea5619f 100644 --- a/src/bindings/python/src/pyopenvino/graph/descriptors/tensor.cpp +++ b/src/bindings/python/src/pyopenvino/graph/descriptors/tensor.cpp @@ -101,7 +101,7 @@ void regclass_graph_descriptor_Tensor(py::module m) { Set of names )"); - tensor.def("set_names", + tensor.def("add_names", &ov::descriptor::Tensor::add_names, py::arg("names"), R"( From 18035209a0b26c722541a65a112212afcae29d8b Mon Sep 17 00:00:00 2001 From: David Nam Date: Tue, 1 Mar 2022 22:27:11 +0900 Subject: [PATCH 138/310] Add op impl checkt tests (#10414) * Add op impl checkt tests * Add op impl check tests * Add op impl check tests * Add op impl check test * Add op impl check tests * Add op impl check tests * Fix usage of makeConstant() * Fix build error in ubuntu18_i386 * Fix error in linux-macos Co-authored-by: PVA-CI --- .../op_impl_check/single_op_graph.cpp | 687 +++++++++++++++++- 1 file changed, 681 insertions(+), 6 deletions(-) diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp index f0e5cbedde6..ed5a17b7d4a 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp @@ -14,9 +14,530 @@ std::shared_ptr generate(const std::shared_ptr &node) { return nullptr; } +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3, 2}}); + const auto elu = std::make_shared(params[0], 0.5f); + ov::ResultVector results{std::make_shared(elu)}; + return std::make_shared(results, params, "ElueGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{5, 2}}); + const auto indices = ngraph::builder::makeConstant(ov::element::i32, {4}, {0, 2, 3, 4}); + const auto segment_ids = ngraph::builder::makeConstant(ov::element::i32, {4}, {0, 0, 2, 2}); + const auto num_segments = ngraph::builder::makeConstant(ov::element::i32, {}, {3}); + const auto default_index = ngraph::builder::makeConstant(ov::element::i32, {}, {0}); + const auto per_sample_weights = + ngraph::builder::makeConstant(ov::element::f32, {4}, {0.5, 0.5, 0.5, 0.5}); + const auto embed_seg_sum = std::make_shared(params[0], + indices, + segment_ids, + num_segments, + default_index, + per_sample_weights); + ov::ResultVector results{std::make_shared(embed_seg_sum)}; + return std::make_shared(results, params, "EmbeddingSegmentsSum"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto rois = ngraph::builder::makeConstant( + ov::element::f32, + {{16, 4}}, + {1.0f, 1.0f, 10.0f, 10.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 4.0f, 1.0f, 8.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + const auto deltas = ngraph::builder::makeConstant( + ov::element::f32, + {{16, 8}}, + {5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 8.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + const auto scores = ngraph::builder::makeConstant( + ov::element::f32, + {{16, 2}}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + const auto im_info = ngraph::builder::makeConstant(ov::element::f32, {{1, 3}}, {1.0f, 1.0f, 1.0f}); + const auto attrs = ov::op::v6::ExperimentalDetectronDetectionOutput::Attributes{0.01000000074505806f, + 0.2f, + 2.0f, + 2, + 500, + 5, + true, + {10.0f, 10.0f, 5.0f, 5.0f}}; + const auto exp_detection_output = + std::make_shared(rois, deltas, scores, im_info, attrs); + ov::ResultVector results{std::make_shared(exp_detection_output)}; + return std::make_shared(results, + ngraph::ParameterVector{}, + "ExperimentalDetectronDetectionOutput"); +} + +std::shared_ptr generate( + const std::shared_ptr &node) { + const auto im_info = ngraph::builder::makeConstant(ov::element::f32, {3}, {1.0f, 1.0f, 1.0f}); + const auto anchors = ngraph::builder::makeConstant( + ov::element::f32, + {{36, 4}}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + const auto deltas = ngraph::builder::makeConstant( + ov::element::f32, + {{12, 2, 6}}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + const auto scores = ngraph::builder::makeConstant( + ov::element::f32, + {{3, 2, 6}}, + {5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 4.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 8.0f, 1.0f}); + const auto attrs = + ov::op::v6::ExperimentalDetectronGenerateProposalsSingleImage::Attributes{0, 0.699999988079071, 6, 1000}; + const auto exp_gen_prop_sing_img = + std::make_shared(im_info, + anchors, + deltas, + scores, + attrs); + ov::ResultVector results{std::make_shared(exp_gen_prop_sing_img)}; + return std::make_shared(results, + ngraph::ParameterVector{}, + "ExperimentalDetectronGenerateProposalsSingleImage"); +} + +std::shared_ptr generate( + const std::shared_ptr &node) { + const auto params = + ngraph::builder::makeDynamicParams(ov::element::f32, {{3, 4}, {1, 16, 4, 5}, {1, 3, 100, 200}}); + const auto attrs = ov::op::v6::ExperimentalDetectronPriorGridGenerator::Attributes{true, 0, 0, 4.0f, 4.0f}; + const auto exp_prior_grid_gen = std::make_shared(params[0], + params[1], + params[2], + attrs); + ov::ResultVector results{std::make_shared(exp_prior_grid_gen)}; + return std::make_shared(results, params, "ExperimentalDetectronPriorGridGenerator"); +} + +std::shared_ptr generate( + const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4}, {1, 2, 2, 3}}); + const auto attrs = ov::op::v6::ExperimentalDetectronROIFeatureExtractor::Attributes{3, 2, {4}, false}; + const auto exp_roi_feature_ext = + std::make_shared(NodeVector{params[0], params[1]}, + attrs); + ov::ResultVector results{std::make_shared(exp_roi_feature_ext)}; + return std::make_shared(results, params, "ExperimentalDetectronROIFeatureExtractor"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4}, {2}}); + const auto exp_topk_rois = std::make_shared(params[0], params[1], 1); + ov::ResultVector results{std::make_shared(exp_topk_rois)}; + return std::make_shared(results, params, "ExperimentalDetectronTopKROIs"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 10, 10}}); + const auto ext_img_patch = std::make_shared(params[0], + ov::Shape{3, 3}, + ov::Strides{5, 5}, + ov::Shape{1, 1}, + ov::op::PadType::VALID); + ov::ResultVector results{std::make_shared(ext_img_patch)}; + return std::make_shared(results, params, "ExtractImagePatches"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 3, 4}}); + const auto input_low = ngraph::builder::makeConstant(ov::element::f32, {}, {0.f}); + const auto input_high = ngraph::builder::makeConstant(ov::element::f32, {}, {23.f}); + const auto output_low = ngraph::builder::makeConstant(ov::element::f32, {}, {2.f}); + const auto output_high = ngraph::builder::makeConstant(ov::element::f32, {}, {16.f}); + const auto fake_quantize = std::make_shared(params[0], input_low, input_high, output_low, output_high, 4); + ov::ResultVector results{std::make_shared(fake_quantize)}; + return std::make_shared(results, params, "FakeQuantize"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3, 4}}); + const auto grn = std::make_shared(params[0], 1e-6); + ov::ResultVector results{std::make_shared(grn)}; + return std::make_shared(results, params, "GRN"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = + ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::i64}, + {{5, 10, 10}, {5, 1, 10}, {5}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {1, 30, 10}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {1, 30, 10}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {1, 30}, {}, true); + const size_t hidden_size = 10; + const auto gru_sequence = + std::make_shared(params[0], + params[1], + params[2], + W, + R, + B, + hidden_size, + ov::op::RecurrentSequenceDirection::FORWARD); + ov::ResultVector results{std::make_shared(gru_sequence)}; + return std::make_shared(results, params, "GRUSequence"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::i32}, {{3}, {7}}); + const auto gather_elements = std::make_shared(params[0], params[1], 0); + ov::ResultVector results{std::make_shared(gather_elements)}; + return std::make_shared(results, params, "GatherElements"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 10}, {1, 1, 10}, {1}, {}}); + const auto gather_tree = std::make_shared(params[0], params[1], params[2], params[3]); + ov::ResultVector results{std::make_shared(gather_tree)}; + return std::make_shared(results, params, "GatherTree"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{8}}); + const auto gelu = std::make_shared(params[0]); + ov::ResultVector results{std::make_shared(gelu)}; + return std::make_shared(results, params, "Gelu"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 6}, {1, 1, 1, 3}}); + const auto group_convolution = std::make_shared(params[0], + params[1], + ov::Strides{1}, + ov::CoordinateDiff{0}, + ov::CoordinateDiff{0}, + ov::Strides{1}); + ov::ResultVector results{std::make_shared(group_convolution)}; + return std::make_shared(results, params, "GroupConvolution"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 4}, {1, 1, 1, 3}}); + const auto group_convolution = std::make_shared(params[0], + params[1], + ov::Strides{1}, + ov::CoordinateDiff{0}, + ov::CoordinateDiff{0}, + ov::Strides{1}, + ov::op::PadType{ov::op::PadType::EXPLICIT}); + ov::ResultVector results{std::make_shared(group_convolution)}; + return std::make_shared(results, params, "GroupConvolutionBackpropData"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3}}); + const auto alpha = ngraph::builder::makeConstant(ov::element::f32, {}, {std::vector{0.5}}); + const auto beta = ngraph::builder::makeConstant(ov::element::f32, {}, {std::vector{0.6}}); + const auto hard_sigmoid = std::make_shared(params[0], alpha, beta); + ov::ResultVector results{std::make_shared(hard_sigmoid)}; + return std::make_shared(results, params, "HardSigmoid"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 2, 4}}); + const auto out_shape_in = ngraph::builder::makeConstant(ov::element::i64, {4}, {1, 1, 1, 2}); + ov::op::v0::Interpolate::Attributes attrs; + attrs.axes = ov::AxisSet{0, 1, 2, 3}; + attrs.mode = "nearest"; + attrs.align_corners = false; + attrs.antialias = false; + attrs.pads_begin = std::vector{0, 0, 0, 0}; + attrs.pads_end = std::vector{0, 0, 0, 0}; + const auto interpolate = std::make_shared(params[0], out_shape_in, attrs); + ov::ResultVector results{std::make_shared(interpolate)}; + return std::make_shared(results, params, "Interpolat-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + using InterpolateAttrs = op::v4::Interpolate::InterpolateAttrs; + using InterpolateMode = op::v4::Interpolate::InterpolateMode; + using ShapeCalcMode = op::v4::Interpolate::ShapeCalcMode; + using TransformMode = op::v4::Interpolate::CoordinateTransformMode; + using NearestMode = op::v4::Interpolate::NearestMode; + + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 2, 4}}); + const auto out_shape_in = ngraph::builder::makeConstant(ov::element::i64, {4}, {1, 1, 1, 2}); + const auto scales = ngraph::builder::makeConstant(ov::element::f32, {1}, {1.0}); + const InterpolateAttrs attrs{InterpolateMode::NEAREST, + ShapeCalcMode::SIZES, + std::vector{0, 0, 0, 0}, + std::vector{0, 0, 0, 0}, + TransformMode::HALF_PIXEL, + NearestMode::ROUND_PREFER_FLOOR, + false, + -0.75}; + const auto interpolate = std::make_shared(params[0], out_shape_in, scales, attrs); + ov::ResultVector results{std::make_shared(interpolate)}; + return std::make_shared(results, params, "Interpolate-4"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1}}); + auto read_value = std::make_shared(params[0], "v0"); + auto add = std::make_shared(read_value, params[0]); + auto assign = std::make_shared(add, "v0"); + ov::ResultVector results{std::make_shared(add)}; + return std::make_shared(results, SinkVector{assign}, params, "Assign-3"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1}}); + auto variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, "v0"}); + auto read_value = std::make_shared(params[0], variable); + auto add = std::make_shared(read_value, params[0]); + auto assign = std::make_shared(add, variable); + ov::ResultVector results{std::make_shared(add)}; + return std::make_shared(results, SinkVector{assign}, params, "Assign-6"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3, 2, 1}}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {1}, std::vector{2}); + const auto lrn = std::make_shared(params[0], axes, 3, 0.5, 1, 3); + ov::ResultVector results{std::make_shared(lrn)}; + return std::make_shared(results, params, "LRN"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = + ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::i32}, + {{5, 10, 10}, {5, 1, 10}, {5, 1, 10}, {5}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {1, 40}, {}, true); + const auto P = ngraph::builder::makeConstant(ov::element::f32, {1, 30}, {}, true); + const int64_t hidden_size = 10; + const auto lstm_sequence = + std::make_shared(params[0], + params[1], + params[2], + params[3], + W, + R, + B, + P, + hidden_size, + ov::op::RecurrentSequenceDirection::FORWARD); + ov::ResultVector results{std::make_shared(lstm_sequence->output(0)), + std::make_shared(lstm_sequence->output(1)), + std::make_shared(lstm_sequence->output(2))}; + return std::make_shared(results, params, "LSTMSequence"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1}}); + const auto lsm = std::make_shared(params[0], 0); + ov::ResultVector results{std::make_shared(lsm)}; + return std::make_shared(results, params, "LogSoftmax"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::boolean, {{1, 2}}); + const auto logical_not = std::make_shared(params[0]); + ov::ResultVector results{std::make_shared(logical_not)}; + return std::make_shared(results, params, "LogicalNot"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 3, 3, 3}}); + const auto mvn = std::make_shared(params[0], false, false, 1e-9); + ov::ResultVector results{std::make_shared(mvn)}; + return std::make_shared(results, params, "MVN-2"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 3, 3, 3}}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {2}, std::vector{2, 3}); + const auto mvn = std::make_shared(params[0], axes, false, 1e-9, ov::op::MVNEpsMode::OUTSIDE_SQRT); + ov::ResultVector results{std::make_shared(mvn)}; + return std::make_shared(results, params, "MVN-6"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2}, {2, 2}}); + const auto matmul = std::make_shared(params[0], params[1], false, false); + ov::ResultVector results{std::make_shared(matmul)}; + return std::make_shared(results, params, "MatMul-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2}}); + const auto mish = std::make_shared(params[0]); + ov::ResultVector results{std::make_shared(mish)}; + return std::make_shared(results, params, "Mish-4"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams( + {ov::element::f32, ov::element::f32, ov::element::i32, ov::element::f32, ov::element::f32}, + {{1, 6, 4}, {1, 1, 6}, {}, {}, {}}); + auto nms = std::make_shared(params[0], + params[1], + params[2], + params[3], + params[4], + ov::op::v1::NonMaxSuppression::BoxEncodingType::CENTER, + false); + ov::ResultVector results{std::make_shared(nms)}; + return std::make_shared(results, params, "NonMaxSuppression-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams( + {ov::element::f32, ov::element::f32, ov::element::i32, ov::element::f32, ov::element::f32}, + {{1, 6, 4}, {1, 1, 6}, {}, {}, {}}); + auto nms = std::make_shared(params[0], + params[1], + params[2], + params[3], + params[4], + ov::op::v3::NonMaxSuppression::BoxEncodingType::CENTER, + false); + ov::ResultVector results{std::make_shared(nms)}; + return std::make_shared(results, params, "NonMaxSuppression-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams( + {ov::element::f32, ov::element::f32, ov::element::i32, ov::element::f32, ov::element::f32}, + {{1, 6, 4}, {1, 1, 6}, {}, {}, {}}); + auto nms = std::make_shared(params[0], + params[1], + params[2], + params[3], + params[4], + ov::op::v4::NonMaxSuppression::BoxEncodingType::CENTER, + false); + ov::ResultVector results{std::make_shared(nms)}; + return std::make_shared(results, params, "NonMaxSuppression-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams( + {ov::element::f32, ov::element::f32, ov::element::i32, ov::element::f32, ov::element::f32}, + {{1, 6, 4}, {1, 1, 6}, {}, {}, {}}); + auto nms = std::make_shared(params[0], + params[1], + params[2], + params[3], + params[4], + ov::op::v5::NonMaxSuppression::BoxEncodingType::CENTER, + false); + ov::ResultVector results{std::make_shared(nms)}; + return std::make_shared(results, params, "NonMaxSuppression-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3, 2}}); + auto nonzero = std::make_shared(params[0], ov::element::i32); + ov::ResultVector results{std::make_shared(nonzero)}; + return std::make_shared(results, params, "NonZero-3"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{4}}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {0}, std::vector{}); + auto normalize = std::make_shared(params[0], axes, 1e-7, ov::op::EpsMode::ADD); + ov::ResultVector results{std::make_shared(normalize)}; + return std::make_shared(results, params, "NormalizeL2-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::i32, {{}}); + const auto depth = ngraph::builder::makeConstant(ov::element::i32, {}, std::vector{3}); + const auto onvalue = ngraph::builder::makeConstant(ov::element::i32, {}, std::vector{1}); + const auto offvalue = ngraph::builder::makeConstant(ov::element::i32, {}, std::vector{0}); + const int32_t axes = 0; + const auto onehot = std::make_shared(params[0], depth, onvalue, offvalue, axes); + ov::ResultVector results{std::make_shared(onehot)}; + return std::make_shared(results, params, "OneHot-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{6}}); + const auto slope = ngraph::builder::makeConstant(ov::element::f32, {1}, {2}); + const auto prelu = std::make_shared(params[0], slope); + ov::ResultVector results{std::make_shared(prelu)}; + return std::make_shared(results, params, "PRelu-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const std::string mode = "average"; + const size_t n_channel = 8; + const size_t n_group = 2; + const size_t n_boxes = 3; + const size_t spatial_bin_x = 1; + const size_t spatial_bin_y = 1; + const float spatial_scale = 1; + const size_t output_dim = n_channel / (n_group * n_group); + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, n_channel, 20, 20}}); + const auto coordi = ngraph::builder::makeConstant(ov::element::f32, + {n_boxes, 5}, + {0, 1, 2, 4, 6, 1, 0, 3, 10, 4, 0, 10, 7, 11, 13}); + const auto psroi_pooling = std::make_shared(params[0], + coordi, + output_dim, + n_group, + spatial_scale, + spatial_bin_x, + spatial_bin_y, + mode); + ov::ResultVector results{std::make_shared(psroi_pooling)}; + return std::make_shared(results, params, "PSROIPooling-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{6}}); + const auto pad_begin = ngraph::builder::makeConstant(ov::element::i64, {1}, {4}); + const auto pad_end = ngraph::builder::makeConstant(ov::element::i64, {1}, {5}); + const auto pad = std::make_shared(params[0], pad_begin, pad_end, ov::op::PadMode::CONSTANT); + ov::ResultVector results{std::make_shared(pad)}; + return std::make_shared(results, params, "Pad-1"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto in = std::make_shared(ov::element::f32, Shape{3, 4}); + return std::make_shared(in, ParameterVector{in}, "Parameter-1"); +} + std::shared_ptr generateBinaryEltwise(const std::shared_ptr &node) { - const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2}, - {1, 2}}); + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2}, {1, 2}}); std::shared_ptr eltwiseNode; if (ov::is_type(node)) { eltwiseNode = std::make_shared(params.front(), params.back()); @@ -36,21 +557,175 @@ std::shared_ptr generateBinaryEltwise(const std::shared_ptr(params.front(), params.back()); } else if (ov::is_type(node)) { eltwiseNode = std::make_shared(params.front(), params.back()); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(params.front(), params.back()); } else { return nullptr; } - ngraph::ResultVector results{std::make_shared(eltwiseNode)}; - return std::make_shared(results, params, "BinaryEltwiseGraph"); + ov::ResultVector results{std::make_shared(eltwiseNode)}; + return std::make_shared(results, params, "BinaryEltwiseGraph"); } -} // namespace + +std::shared_ptr generateBinaryEltwiseComp(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2}, {2}}); + std::shared_ptr eltwise; + if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(eltwise)}; + return std::make_shared(results, params, "BinaryEltwiseComparisonGraph"); +} + +std::shared_ptr generateBinaryEltwiseLogical(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::boolean, {{1}, {1}}); + std::shared_ptr eltwise; + if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else if (ov::is_type(node)) { + eltwise = std::make_shared(params[0], params[1]); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(eltwise)}; + return std::make_shared(results, ngraph::ParameterVector{params}, "BinaryEltwiseLogicalGraph"); +} + +std::shared_ptr generateBroadcast(const std::shared_ptr &node) { + const ov::Shape input_shape{}; + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {input_shape}); + const auto shape_const = + ngraph::builder::makeConstant(ov::element::u64, {4}, {5, 4, 3, 2}); + std::shared_ptr broadcast; + if (ov::is_type(node)) { + broadcast = std::make_shared(params[0], shape_const); + } else if (ov::is_type(node)) { + broadcast = std::make_shared(params[0], shape_const); + } else { + return nullptr; + } + + return std::make_shared(broadcast, ParameterVector{params}, "BroadcastGraph"); +} + +std::shared_ptr generateConvertColor(const std::shared_ptr &node) { + const auto params = std::make_shared(ov::element::u8, Shape{1, 3, 2, 1}); + std::shared_ptr convert; + if (ov::is_type(node)) { + convert = std::make_shared(params); + } else if (ov::is_type(node)) { + convert = std::make_shared(params); + } else if (ov::is_type(node)) { + convert = std::make_shared(params); + } else if (ov::is_type(node)) { + convert = std::make_shared(params); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(convert)}; + return std::make_shared(results, ParameterVector{params}, "ConvertColorGraph"); +} + +std::shared_ptr generateMultiSubGraph(const std::shared_ptr &node) { + if (ov::is_type(node)) { + auto cond = std::make_shared(ov::element::boolean, Shape{1}); + auto A = std::make_shared(ov::element::f32, ov::Shape{1}, 8.0); + auto B = std::make_shared(element::f32, ov::Shape{1}, 2.0); + auto A_res = std::make_shared(A); + auto B_res = std::make_shared(B); + auto then_body = std::make_shared(OutputVector{A_res}, ParameterVector{}); + auto else_body = std::make_shared(OutputVector{B_res}, ParameterVector{}); + auto if_op = std::make_shared(cond); + if_op->set_then_body(then_body); + if_op->set_else_body(else_body); + auto res = if_op->set_output(A_res, B_res); + return std::make_shared(OutputVector{res}, ParameterVector{cond}, "MultiSubGraphOp"); + } else { + return nullptr; + } +} + +std::shared_ptr generateNmsBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 4}, {1, 2, 2}}); + const auto outputs = + ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(params)); + if (ov::is_type(node)) { + const auto nms = + std::make_shared(outputs[0], outputs[1], ov::op::v8::MatrixNms::Attributes()); + ov::ResultVector results{std::make_shared(nms)}; + return std::make_shared(results, params, "MatrixNms"); + } else if (ov::is_type(node)) { + const auto nms = std::make_shared(outputs[0], outputs[1], ov::op::v8::MulticlassNms::Attributes()); + ov::ResultVector results{std::make_shared(nms)}; + return std::make_shared(results, params, "MulticlassNms"); + } else { + return nullptr; + } +} + +std::shared_ptr generateReadValueBase(const std::shared_ptr &node) { + auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1}}); + if (ov::is_type(node)) { + auto read_value = std::make_shared(params[0], "v0"); + auto add = std::make_shared(read_value, params[0]); + auto assign = std::make_shared(add, "v0"); + ov::ResultVector results{std::make_shared(add)}; + return std::make_shared(results, SinkVector{assign}, params, "ReadValue-3"); + } else if (ov::is_type(node)) { + auto variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape::dynamic(), ov::element::dynamic, "v0"}); + auto read_value = std::make_shared(params[0], variable); + auto add = std::make_shared(read_value, params[0]); + auto assign = std::make_shared(add, variable); + ov::ResultVector results{std::make_shared(add)}; + return std::make_shared(results, SinkVector{assign}, params, "ReadValue-6"); + } else { + return nullptr; + } +} +} // namespace template std::shared_ptr generateGraph() { - std::shared_ptr node = std::shared_ptr(new T); + std::shared_ptr node = std::shared_ptr(new T); if (ov::is_type(node)) { return generateBinaryEltwise(node); + } else if (ov::is_type(node)) { + return generateBinaryEltwiseComp(node); + } else if (ov::is_type(node)) { + return generateBinaryEltwiseLogical(node); + } else if (ov::is_type(node)) { + return generateBroadcast(node); + } else if (ov::is_type(node) || + ov::is_type(node)) { + return generateConvertColor(node); + } else if (ov::is_type(node)) { + return generateMultiSubGraph(node); + } else if (ov::is_type(node)) { + return generateNmsBase(node); + } else if (ov::is_type(node)) { + return generateReadValueBase(node); } + return generate(node); } From 6b067bc0ed3e6f08272375f179b531447048c42d Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 1 Mar 2022 16:56:15 +0300 Subject: [PATCH 139/310] Fixed install on Apple (#8302) * Fixed Apple install * Update path to libs in setupvars.sh * Fix IE_CPACK_RUNTIME_PATH for Apple * Fix wheels packaging Co-authored-by: Alexey Suhov --- cmake/developer_package/packaging.cmake | 4 ++++ scripts/setupvars/setupvars.sh | 4 ++-- src/bindings/python/wheel/setup.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cmake/developer_package/packaging.cmake b/cmake/developer_package/packaging.cmake index 0a9b5eb5106..cc287ff1f3b 100644 --- a/cmake/developer_package/packaging.cmake +++ b/cmake/developer_package/packaging.cmake @@ -15,6 +15,10 @@ function(ie_cpack_set_library_dir) set(IE_CPACK_LIBRARY_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) set(IE_CPACK_RUNTIME_PATH runtime/bin/${ARCH_FOLDER}/$ PARENT_SCOPE) set(IE_CPACK_ARCHIVE_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) + elseif(APPLE) + set(IE_CPACK_LIBRARY_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) + set(IE_CPACK_RUNTIME_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) + set(IE_CPACK_ARCHIVE_PATH runtime/lib/${ARCH_FOLDER}/$ PARENT_SCOPE) else() set(IE_CPACK_LIBRARY_PATH runtime/lib/${ARCH_FOLDER} PARENT_SCOPE) set(IE_CPACK_RUNTIME_PATH runtime/lib/${ARCH_FOLDER} PARENT_SCOPE) diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh index fdfcc0f34f3..bc46b5aac94 100755 --- a/scripts/setupvars/setupvars.sh +++ b/scripts/setupvars/setupvars.sh @@ -34,8 +34,8 @@ if [ -e "$INSTALLDIR/runtime" ]; then export HDDL_INSTALL_DIR=$INSTALLDIR/runtime/3rdparty/hddl if [[ "$OSTYPE" == "darwin"* ]]; then - export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}${DYLD_LIBRARY_PATH:+:DYLD_LIBRARY_PATH} - export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} + export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:DYLD_LIBRARY_PATH} + export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} else export LD_LIBRARY_PATH=$HDDL_INSTALL_DIR/lib:${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} fi diff --git a/src/bindings/python/wheel/setup.py b/src/bindings/python/wheel/setup.py index 7e3b41cfdab..bbc9465a415 100644 --- a/src/bindings/python/wheel/setup.py +++ b/src/bindings/python/wheel/setup.py @@ -27,7 +27,7 @@ WHEEL_LIBS_PACKAGE = 'openvino.libs' PYTHON_VERSION = f'python{sys.version_info.major}.{sys.version_info.minor}' LIBS_DIR = 'bin' if platform.system() == 'Windows' else 'lib' -CONFIG = 'Release' if platform.system() == 'Windows' else '' +CONFIG = 'Release' if platform.system() == 'Windows' or platform.system() == 'Darwin' else '' machine = platform.machine() if machine == 'x86_64' or machine == 'AMD64': From 5e023ebdd9568a59f7b93337c9e8ed9159d555cc Mon Sep 17 00:00:00 2001 From: Mikhail Nosov Date: Tue, 1 Mar 2022 17:32:36 +0300 Subject: [PATCH 140/310] Fix issue with default arguments in preprocessing python bindings (#10702) * Fix in Preprocessing python bindings - add correct default arguments for: - PreProcessSteps::convert_element_type - PostProcessSteps::convert_element_type - InputTensorInfo::set_color_format Otherwise, python users must always specify optional params E.g. instead of writing `tensor().set_color_format(ColorFormat.RGB)` python users will have to write `tensor().set_color_format(ColorFormat.RGB, [])` * Corrected 'help' output * Exposing 'openvino.runtime.Type.undefined' and use it in 'convert_element_type' documentation --- .../graph/preprocess/pre_post_process.cpp | 18 +++++++++--------- .../pyopenvino/graph/types/element_type.cpp | 5 ++++- .../tests/test_ngraph/test_preprocess.py | 10 +++++++--- src/core/include/openvino/core/model.hpp | 4 ++-- 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp b/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp index 56ef90f5db8..d7ec912524f 100644 --- a/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp +++ b/src/bindings/python/src/pyopenvino/graph/preprocess/pre_post_process.cpp @@ -109,18 +109,18 @@ static void regclass_graph_PreProcessSteps(py::module m) { steps.def( "convert_element_type", - [](ov::preprocess::PreProcessSteps& self, ov::element::Type type) { + [](ov::preprocess::PreProcessSteps& self, ov::element::Type type = {}) { return &self.convert_element_type(type); }, - py::arg("type"), + py::arg_v("type", ov::element::undefined, "openvino.runtime.Type.undefined"), R"( Converts input tensor element type to specified type. - Input tensor must have openvino.Type.f32 data type. + Input tensor must have openvino.Type data type. Parameters ---------- type : openvino.runtime.Type - Destination type. + Destination type. If not specified, type will be taken from model input's element type. Returns ---------- @@ -200,18 +200,18 @@ static void regclass_graph_PostProcessSteps(py::module m) { steps.def( "convert_element_type", - [](ov::preprocess::PostProcessSteps& self, ov::element::Type type) { + [](ov::preprocess::PostProcessSteps& self, ov::element::Type type = {}) { return &self.convert_element_type(type); }, - py::arg("type"), + py::arg_v("type", ov::element::undefined, "openvino.runtime.Type.undefined"), R"( Converts tensor element type to specified type. - Tensor must have openvino.Type.f32 data type. + Tensor must have openvino.Type data type. Parameters ---------- type : Type - Destination type. + Destination type. If not specified, type will be taken from model output's element type. Returns ---------- @@ -322,7 +322,7 @@ static void regclass_graph_InputTensorInfo(py::module m) { return &self.set_color_format(format, sub_names); }, py::arg("format"), - py::arg("sub_names")); + py::arg("sub_names") = std::vector{}); info.def( "set_memory_type", diff --git a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp index 204d125a330..b41d5c000d5 100644 --- a/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp +++ b/src/bindings/python/src/pyopenvino/graph/types/element_type.cpp @@ -29,10 +29,13 @@ void regclass_graph_Type(py::module m) { type.attr("u32") = ov::element::u32; type.attr("u64") = ov::element::u64; type.attr("bf16") = ov::element::bf16; + type.attr("undefined") = ov::element::undefined; type.def("__repr__", [](const ov::element::Type& self) { std::string bitwidth = std::to_string(self.bitwidth()); - if (self.is_signed()) { + if (self == ov::element::undefined) { + return ""; + } else if (self.is_signed()) { return ""; } return ""; diff --git a/src/bindings/python/tests/test_ngraph/test_preprocess.py b/src/bindings/python/tests/test_ngraph/test_preprocess.py index bf0741cbfdb..c8636410789 100644 --- a/src/bindings/python/tests/test_ngraph/test_preprocess.py +++ b/src/bindings/python/tests/test_ngraph/test_preprocess.py @@ -90,7 +90,7 @@ def test_ngraph_preprocess_mean_scale_convert(): p = PrePostProcessor(function) inp2 = p.input(1) inp2.tensor().set_element_type(Type.i32) - inp2.preprocess().convert_element_type(Type.f32).mean(1.).scale(2.) + inp2.preprocess().convert_element_type(Type.f32).mean(1.).scale(2.).convert_element_type() inp1 = p.input(0) inp1.preprocess().convert_element_type(Type.f32).mean(1.).custom(custom_preprocess) function = p.build() @@ -159,10 +159,14 @@ def test_ngraph_preprocess_output_postprocess(): inp.tensor().set_layout(layout1) inp.preprocess().convert_element_type(Type.f32).mean([1., 2., 3.]) out = p.output() + out.tensor().set_element_type(Type.f32) out.model().set_layout(layout1) out.postprocess().convert_element_type(Type.f32) \ .convert_layout(layout2) \ - .convert_layout(layout3).custom(custom_postprocess) + .convert_layout(layout3) \ + .custom(custom_postprocess) \ + .convert_element_type(Type.f16) \ + .convert_element_type() function = p.build() input_data = np.array([[-1, -2, -3], [-4, -5, -6]]).astype(np.int32) @@ -185,7 +189,7 @@ def test_ngraph_preprocess_spatial_static_shape(): p = PrePostProcessor(function) inp = p.input() - inp.tensor().set_layout(layout).set_spatial_static_shape(2, 2).set_color_format(color_format, []) + inp.tensor().set_layout(layout).set_spatial_static_shape(2, 2).set_color_format(color_format) inp.preprocess().convert_element_type(Type.f32).mean([1., 2.]) inp.model().set_layout(layout) out = p.output() diff --git a/src/core/include/openvino/core/model.hpp b/src/core/include/openvino/core/model.hpp index f3e6751e850..a5a357ad865 100644 --- a/src/core/include/openvino/core/model.hpp +++ b/src/core/include/openvino/core/model.hpp @@ -398,8 +398,8 @@ OPENVINO_API ov::Dimension get_batch(const std::shared_ptr& f); /// applied. Possible reason could be that layout was not set for some parameters, or batch size can't be applied to /// model at all /// -/// \param f model where to set batch_size value +/// \param model model where to set batch_size value /// \param batch_size Batch size value. For dynamic batch size, Dimension::dynamic() can be passed. -OPENVINO_API void set_batch(const std::shared_ptr& f, ov::Dimension batch_size); +OPENVINO_API void set_batch(const std::shared_ptr& model, ov::Dimension batch_size); } // namespace ov From 81cd9d86d12293d1c2fe4da77f6641d674ba0307 Mon Sep 17 00:00:00 2001 From: Nikolay Tyukaev Date: Tue, 1 Mar 2022 22:11:37 +0300 Subject: [PATCH 141/310] sphinxdirective: allow commented blocks (#10720) * sphinxdirective: allow commented blocks * minor correction --- docs/Doxyfile.config | 4 +++- docs/scripts/doxy_md_filter.py | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/Doxyfile.config b/docs/Doxyfile.config index 056f005fc73..3ee9ead3cb5 100644 --- a/docs/Doxyfile.config +++ b/docs/Doxyfile.config @@ -907,7 +907,9 @@ RECURSIVE = YES # Note that relative paths are relative to the directory from which doxygen is # run. -EXCLUDE = +EXCLUDE = "@OpenVINO_SOURCE_DIR@/thirdparty" \ + "@OpenVINO_SOURCE_DIR@/temp" \ + "@OpenVINO_SOURCE_DIR@/bin" # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded diff --git a/docs/scripts/doxy_md_filter.py b/docs/scripts/doxy_md_filter.py index 59219ed6b2e..4ed1b50196f 100644 --- a/docs/scripts/doxy_md_filter.py +++ b/docs/scripts/doxy_md_filter.py @@ -71,6 +71,13 @@ class DoxyMDFilter: rel_path = os.path.relpath(link_path, self.input_dir).replace('\\', '/') self.content = self.content.replace(link, rel_path) + def remove_comment_block_sphinxdirective(self): + """ + Remove comment blocks from `sphinxdirective` + """ + self.content = re.sub(r'\<\!\-\-\s*?\@sphinxdirective', '@sphinxdirective', self.content) + self.content = re.sub(r'\@endsphinxdirective\s*?\-\-\>', '@endsphinxdirective', self.content) + def copy_images(self): """ Go through image links and copy them into output_folder @@ -97,6 +104,7 @@ class DoxyMDFilter: Do all processing operations on a markdown file """ self.replace_image_links() + self.remove_comment_block_sphinxdirective() self.replace_md_links() self.copy_markdown() self.copy_images() From e75ee60bec8109df810ef560ea41f6305a8dffbb Mon Sep 17 00:00:00 2001 From: Vladislav Golubev Date: Tue, 1 Mar 2022 22:33:42 +0300 Subject: [PATCH 142/310] [CPU] Disabled sequences decomposition for dynamic case (#10710) --- src/plugins/intel_cpu/src/plugin.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 0e7daa2e707..49372ce44af 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -285,6 +285,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr auto isSequencePrimitiveSupported = [](const_node_ptr &node) -> bool { const auto& data = node->input(0); const auto& data_pshape = data.get_partial_shape(); + // WA: dynamic shapes make impossible to check seq_len due to shapeOf subgraphs + // but the sequence is still supported in CPU and doesn't need to be decomposed + if (data_pshape.is_dynamic()) + return true; if (data_pshape.rank().is_static() && data_pshape.rank().get_length() > 1 && !data_pshape[1].is_static()) return false; auto max_seq_len = data.get_shape().at(1); From 9cd3bff7df95ef5f00e64edd57307894d1213242 Mon Sep 17 00:00:00 2001 From: Pavel Zamelin Date: Wed, 2 Mar 2022 03:39:30 +0300 Subject: [PATCH 143/310] Fix install failures for static libs with `EXCLUDE_FROM_ALL` (#10706) * Remove EXCLUDE_FROM_ALL for some static targets * Add install check for static libs --- cmake/developer_package/IEDevScriptsConfig.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/developer_package/IEDevScriptsConfig.cmake b/cmake/developer_package/IEDevScriptsConfig.cmake index e7f342a1b6c..29ec0fbf7af 100644 --- a/cmake/developer_package/IEDevScriptsConfig.cmake +++ b/cmake/developer_package/IEDevScriptsConfig.cmake @@ -212,6 +212,10 @@ endif() macro(ov_install_static_lib target comp) if(NOT BUILD_SHARED_LIBS) + get_target_property(target_type ${target} TYPE) + if(${target_type} STREQUAL "STATIC_LIBRARY") + set_target_properties(${target} PROPERTIES EXCLUDE_FROM_ALL FALSE) + endif() install(TARGETS ${target} EXPORT OpenVINOTargets ARCHIVE DESTINATION ${IE_CPACK_ARCHIVE_PATH} COMPONENT ${comp} ${ARGN}) endif() From d1bcb6d0fca51fcd94c9fd2c1aa0e95c9a6da74b Mon Sep 17 00:00:00 2001 From: Yuan Xu Date: Wed, 2 Mar 2022 16:10:58 +0800 Subject: [PATCH 144/310] CVS-80445 (#10723) * Add Overview page * Revert "Add Overview page" * fix format * test formatting * test formatting * update * test formatting * minor changes --- .../installing-openvino-linux.md | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/docs/install_guides/installing-openvino-linux.md b/docs/install_guides/installing-openvino-linux.md index 4ea4225234e..7221a204ddd 100644 --- a/docs/install_guides/installing-openvino-linux.md +++ b/docs/install_guides/installing-openvino-linux.md @@ -85,14 +85,8 @@ This guide provides step-by-step instructions on how to install the Intel® Dist ```sh ./l_openvino_toolkit_p_.sh -a --cli ``` - - @sphinxdirective + > **NOTE**: To get additional information on all parameters that can be used, check up the help option: `--help`. Among others, you can find there `-s` option which offers silent mode, which together with `--eula approve` allows you to run whole installation with default values without any user inference. - .. note:: - To get additional information on all parameters that can be used, check up the help option: `--help`. Among others, you can find there `-s` option which offers silent mode, which together with `--eula approve` allows you to run whole installation with default values without any user inference. - - @endsphinxdirective - 6. Follow the instructions on your screen. During the installation you will be asked to accept the license agreement. The acceptance is required to continue. Check out the installation process on the image below:
![](../img/openvino-install-linux-run-boostrapper-script.gif) @@ -137,7 +131,7 @@ If you have more than one OpenVINO™ version on your machine, you can easily sw The environment variables are set. Next, you can download some additional tools. -## Step 4 (Optional): Download additional components +## Step 4 (Optional): Download Additional Components > **NOTE**: Since the OpenVINO™ 2022.1 release, the following development tools: Model Optimizer, Post-Training Optimization Tool, Model Downloader and other Open Model Zoo tools, Accuracy Checker, and Annotation Converter are not part of the installer. The OpenVINO™ Development Tools can only be installed via PyPI now. See [Install OpenVINO™ Development Tools](installing-model-dev-tools.md) for detailed steps. @@ -154,7 +148,7 @@ The environment variables are set. Next, you can download some additional tools. @endsphinxdirective -## Step 5 (Optional): Configure Inference on non-CPU Devices +## Step 5 (Optional): Configure Inference on Non-CPU Devices @sphinxdirective .. tab:: GPU @@ -164,7 +158,7 @@ The environment variables are set. Next, you can download some additional tools. .. tab:: NCS 2 Only if you want to perform inference on Intel® Neural Compute Stick 2 powered by the Intel® Movidius™ Myriad™ X VPU, follow the steps on :ref:`NCS2 Setup Guide `. - For more details, see the `Get Started page for Intel® Neural Compute Stick 2 `_. + .. tab:: VPU @@ -176,12 +170,12 @@ The environment variables are set. Next, you can download some additional tools. @endsphinxdirective -## Step 6: What's next? +## Step 6: What's Next? Now you are ready to try out the toolkit. Developing in Python: - * [Start with tensorflow models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/101-tensorflow-to-openvino-with-output.html) + * [Start with TensorFlow models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/101-tensorflow-to-openvino-with-output.html) * [Start with ONNX and PyTorch models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/102-pytorch-onnx-to-openvino-with-output.html) * [Start with PaddlePaddle models with OpenVINO™](https://docs.openvino.ai/latest/notebooks/103-paddle-onnx-to-openvino-classification-with-output.html) From 86b175534acabf9c99983d2ec1ab0f8715457a95 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 2 Mar 2022 12:16:58 +0300 Subject: [PATCH 145/310] Docs: complete migration guide (#10652) * Updated glossary * Removed references to OpenVX * Moved migration_ov_2_0 to OpenVINO User guide * Replaced IE with OV runtime * Complete migration guide * Migration 2.0 * Self-review * Added property migration guide * Fixed table * Added preprocessing migration * Update docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md Co-authored-by: Mikhail Nosov * Update docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md Co-authored-by: Mikhail Nosov * Update docs/snippets/ov_preprocessing_migration.cpp Co-authored-by: Mikhail Nosov * reivew fixes * Preprocessing intro updated * Updated config migration guide * Updates * Fixes Co-authored-by: Mikhail Nosov --- .../common_inference_pipeline.md | 200 ++++++++++++++++++ .../migration_ov_2_0/configure_devices.md | 129 +++++++++++ .../docs/common_inference_pipeline.md | 161 -------------- .../docs/graph_construction.md | 12 -- .../migration_ov_2_0/docs/intro.md | 24 --- .../migration_ov_2_0/graph_construction.md | 16 ++ docs/OV_Runtime_UG/migration_ov_2_0/intro.md | 81 +++++++ .../migration_ov_2_0/preprocessing.md | 64 ++++++ docs/img/tf_openvino.png | 3 + docs/snippets/ie_common.cpp | 4 +- docs/snippets/ov_common.cpp | 9 +- docs/snippets/ov_preprocessing_migration.cpp | 124 +++++++++++ docs/snippets/ov_properties_migration.cpp | 95 +++++++++ 13 files changed, 715 insertions(+), 207 deletions(-) create mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md create mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md delete mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/docs/common_inference_pipeline.md delete mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/docs/graph_construction.md delete mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/docs/intro.md create mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/graph_construction.md create mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/intro.md create mode 100644 docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md create mode 100644 docs/img/tf_openvino.png create mode 100644 docs/snippets/ov_preprocessing_migration.cpp create mode 100644 docs/snippets/ov_properties_migration.cpp diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md b/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md new file mode 100644 index 00000000000..fe1b0d3541a --- /dev/null +++ b/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md @@ -0,0 +1,200 @@ +# Inference Pipeline {#openvino_2_0_inference_pipeline} + +Usually to inference model with the OpenVINO™ Runtime an user needs to do the following steps in the application pipeline: +- 1. Create Core object +- 2. Read model from the disk + - 2.1. (Optional) Model preprocessing +- 3. Load the model to the device +- 4. Create an inference request +- 5. Fill input tensors with data +- 6. Start inference +- 7. Process the inference results + +Code snippets below cover these steps and show how application code should be changed for migration to OpenVINO™ Runtime 2.0. + +## 1. Create Core + +Inference Engine API: + +@snippet docs/snippets/ie_common.cpp ie:create_core + +OpenVINO™ Runtime API 2.0: + +@snippet docs/snippets/ov_common.cpp ov_api_2_0:create_core + +## 2. Read model from the disk + +Inference Engine API: + +@snippet docs/snippets/ie_common.cpp ie:read_model + +OpenVINO™ Runtime API 2.0: + +@snippet docs/snippets/ov_common.cpp ov_api_2_0:read_model + +Read model has the same structure as in the example from [Model Creation](./graph_construction.md) migration guide. + +Note, you can combine read and compile model stages into a single call `ov::Core::compile_model(filename, devicename)`. + +### 2.1 (Optional) Model preprocessing + +When application's input data doesn't perfectly match with model's input format, preprocessing steps may need to be added. +See detailed guide [how to migrate preprocessing in OpenVINO Runtime API 2.0](./preprocessing.md) + +## 3. Load the Model to the Device + +Inference Engine API: + +@snippet docs/snippets/ie_common.cpp ie:compile_model + +OpenVINO™ Runtime API 2.0: + +@snippet docs/snippets/ov_common.cpp ov_api_2_0:compile_model + +If you need to configure OpenVINO Runtime devices with additional configuration parameters, please, refer to the migration [Configure devices](./configure_devices.md) guide. + +## 4. Create an Inference Request + +Inference Engine API: + +@snippet docs/snippets/ie_common.cpp ie:create_infer_request + +OpenVINO™ Runtime API 2.0: + +@snippet docs/snippets/ov_common.cpp ov_api_2_0:create_infer_request + +## 5. Fill input tensors + +Inference Engine API fills inputs as `I32` precision (**not** aligned with the original model): + +@sphinxdirective + +.. tab:: IR v10 + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_input_tensor] + +.. tab:: IR v11 + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_input_tensor] + +.. tab:: ONNX + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_input_tensor] + +.. tab:: Model created in code + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_input_tensor] + +@endsphinxdirective + +OpenVINO™ Runtime API 2.0 fills inputs as `I64` precision (aligned with the original model):: + +@sphinxdirective + +.. tab:: IR v10 + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_input_tensor_v10] + +.. tab:: IR v11 + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_input_tensor_aligned] + +.. tab:: ONNX + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_input_tensor_aligned] + +.. tab:: Model created in code + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_input_tensor_aligned] + +@endsphinxdirective + +## 6. Start Inference + +Inference Engine API: + +@snippet docs/snippets/ie_common.cpp ie:inference + +OpenVINO™ Runtime API 2.0: + +@snippet docs/snippets/ov_common.cpp ov_api_2_0:inference + +## 7. Process the Inference Results + +Inference Engine API processes outputs as `I32` precision (**not** aligned with the original model): + +@sphinxdirective + +.. tab:: IR v10 + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_output_tensor] + +.. tab:: IR v11 + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_output_tensor] + +.. tab:: ONNX + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_output_tensor] + +.. tab:: Model created in code + + .. doxygensnippet:: docs/snippets/ie_common.cpp + :language: cpp + :fragment: [ie:get_output_tensor] + +@endsphinxdirective + +OpenVINO™ Runtime API 2.0 processes outputs: +- For IR v10 as `I32` precision (**not** aligned with the original model) to match **old** behavior +- For IR v11, ONNX, ov::Model, Paddle as `I64` precision (aligned with the original model) to match **new** behavior + +@sphinxdirective + +.. tab:: IR v10 + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_output_tensor_v10] + +.. tab:: IR v11 + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_output_tensor_aligned] + +.. tab:: ONNX + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_output_tensor_aligned] + +.. tab:: Model created in code + + .. doxygensnippet:: docs/snippets/ov_common.cpp + :language: cpp + :fragment: [ov_api_2_0:get_output_tensor_aligned] + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md b/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md new file mode 100644 index 00000000000..1286d2d6746 --- /dev/null +++ b/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md @@ -0,0 +1,129 @@ +# Configure devices {#openvino_2_0_configure_devices} + +### Introduction + +Inference Engine API provides an [ability to configure devices](https://docs.openvino.ai/2021.4/openvino_docs_IE_DG_InferenceEngine_QueryAPI.html) via configuration keys and [get device specific metrics](https://docs.openvino.ai/2021.4/openvino_docs_IE_DG_InferenceEngine_QueryAPI.html#getmetric). The values taken from `InferenceEngine::Core::GetConfig` are requested by its string name, while return type is `InferenceEngine::Parameter` and users don't know what is the actual type is stored in this parameter. + +OpenVINO Runtime API 2.0 solves these issues by introducing [properties](../supported_plugins/config_properties.md), which unify metrics and configuration key concepts, but the main advantage of properties - they have C++ type: + +``` +static constexpr Property full_name{"FULL_DEVICE_NAME"}; +``` + +And the property can be requested from an inference device as: + +@snippet ov_properties_migration.cpp core_get_ro_property + +The snippets below show how to migrate from Inference Engine device configuration to OpenVINO Runtime API 2.0 steps. + +### Set configuration values + +Inference Engine API: + +@sphinxdirective + +.. tab:: Devices + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_set_config] + +.. tab:: Model Loading + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_load_network] + +.. tab:: Execution + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [executable_network_set_config] + +@endsphinxdirective + +OpenVINO Runtime API 2.0: + +@sphinxdirective + +.. tab:: Devices + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_set_property] + +.. tab:: Model Loading + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_compile_model] + +.. tab:: Execution + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [compiled_model_set_property] + +@endsphinxdirective + +### Get information + +Inference Engine API: + +@sphinxdirective + +.. tab:: Device configuration + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_get_config] + +.. tab:: Device metrics + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_get_metric] + +.. tab:: Execution config + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [executable_network_get_metric] + +.. tab:: Execution metrics + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [executable_network_get_config] + +@endsphinxdirective + +OpenVINO Runtime API 2.0: + +@sphinxdirective + +.. tab:: Device configuration + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_get_rw_property] + +.. tab:: Device metrics + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [core_get_ro_property] + +.. tab:: Execution config + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [compiled_model_get_rw_property] + +.. tab:: Execution metrics + + .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp + :language: cpp + :fragment: [compiled_model_get_ro_property] + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/docs/common_inference_pipeline.md b/docs/OV_Runtime_UG/migration_ov_2_0/docs/common_inference_pipeline.md deleted file mode 100644 index 7a524ea9008..00000000000 --- a/docs/OV_Runtime_UG/migration_ov_2_0/docs/common_inference_pipeline.md +++ /dev/null @@ -1,161 +0,0 @@ -# OpenVINO™ Inference Pipeline {#openvino_inference_pipeline} - -Usually to inference network with the OpenVINO™ toolkit users need to do next steps: - 1. Create Core - 2. (Optional) Read model from the disk - 2.1. Configure Input and Output of the Model - 3. Load the Model to the Device - 4. Create an Inference Request - 5. Prepare Input - 6. Start Inference - 7. Process the Inference Results - -Code snippets below cover these steps and show how application code should be changed for migration to OpenVINO™ 2.0. - -## 1. Create Core - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:create_core - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:create_core - -## 2. (Optional) Read model from the disk - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:read_model - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:read_model - -Read model has the same structure as in the example from [OpenVINO™ Graph Construction](@ref openvino_graph_construction) guide. - -### 2.1 Configure Input and Output of the Model - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_inputs_outputs - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_inputs_outputs - -## 3. Load the Model to the Device - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:compile_model - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:compile_model - -## 4. Create an Inference Request - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:create_infer_request - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:create_infer_request - -## 5. Prepare input - -### IR v10 - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_input_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_input_tensor_v10 - -### IR v11 - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_input_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_input_tensor_aligned - -### ONNX - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_input_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_input_tensor_aligned - -### From Function - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_input_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_input_tensor_aligned - -## 6. Start Inference - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:inference - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:inference - - -## 7. Process the Inference Results - -### IR v10 - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_output_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_output_tensor_v10 - -### IR v11 - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_output_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_output_tensor_aligned - -### ONNX - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_output_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_output_tensor_aligned - -### From Function - -Inference Engine API: - -@snippet snippets/ie_common.cpp ie:get_output_tensor - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_common.cpp ov_api_2_0:get_output_tensor_aligned - diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/docs/graph_construction.md b/docs/OV_Runtime_UG/migration_ov_2_0/docs/graph_construction.md deleted file mode 100644 index 8c7f22c17df..00000000000 --- a/docs/OV_Runtime_UG/migration_ov_2_0/docs/graph_construction.md +++ /dev/null @@ -1,12 +0,0 @@ -# OpenVINO™ graph construction {#openvino_graph_construction} - -OpenVINO™ 2.0 includes nGraph engine in a common part. The `ngraph` namespace was changed to `ov`. -Code snippets below show how application code should be changed for migration to OpenVINO™ 2.0. - -nGraph API: - -@snippet snippets/ngraph.cpp ngraph:graph - -OpenVINO™ 2.0 API: - -@snippet snippets/ov_graph.cpp ov:graph diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/docs/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/docs/intro.md deleted file mode 100644 index 90ac48cdff9..00000000000 --- a/docs/OV_Runtime_UG/migration_ov_2_0/docs/intro.md +++ /dev/null @@ -1,24 +0,0 @@ -# OpenVINO™ API 2.0 Transition Guide {#openvino_2_0_transition_guide} - -@sphinxdirective - -.. toctree:: - :maxdepth: 1 - :hidden: - - openvino_inference_pipeline - openvino_graph_construction - -@endsphinxdirective - -The OpenVINO™ API 2.0 introduced in order to simplify migration from other frameworks and make the OpenVINO™ API more user-friendly. -The list with differences between APIs below: - - - OpenVINO™ API 2.0 uses tensor names or indexes to work with Inputs or Outputs, the old API works with operation names. - - Structures for Shapes, element types were changed. - - Naming style was changed. The old API uses CamelCaseStyle and OpenVINO™ API 2.0 uses snake_case for function names. - - Namespaces were aligned between components. - -Please look at next transition guides to understand how transit own application to OpenVINO™ API 2.0. - - [OpenVINO™ Graph Construction](graph_construction.md) - - [OpenVINO™ Common Inference pipeline](common_inference_pipeline.md) diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/graph_construction.md b/docs/OV_Runtime_UG/migration_ov_2_0/graph_construction.md new file mode 100644 index 00000000000..0ec5ec644ee --- /dev/null +++ b/docs/OV_Runtime_UG/migration_ov_2_0/graph_construction.md @@ -0,0 +1,16 @@ +# Model creation in runtime {#openvino_2_0_model_creation} + +OpenVINO™ Runtime API 2.0 includes nGraph engine as a common part. The `ngraph` namespace was changed to `ov`, all other ngraph API is preserved as is. +Code snippets below show how application code should be changed for migration to OpenVINO™ Runtime API 2.0. + +### nGraph API + +@snippet snippets/ngraph.cpp ngraph:graph + +### OpenVINO™ Runtime API 2.0: + +@snippet snippets/ov_graph.cpp ov:graph + +**See also:** +- [Hello Model Creation C++ Sample](../../../samples/cpp/model_creation_sample/README.md) +- [Hello Model Creation Python Sample](../../../samples/python/model_creation_sample/README.md) diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md new file mode 100644 index 00000000000..74dfa850149 --- /dev/null +++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md @@ -0,0 +1,81 @@ +# OpenVINO™ 2.0 Transition Guide {#openvino_2_0_transition_guide} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_2_0_inference_pipeline + openvino_2_0_configure_devices + openvino_2_0_preprocessing + openvino_2_0_model_creation + +@endsphinxdirective + +### Introduction + +Older versions of OpenVINO (prior to 2022.1) required to change the logic of applications when an user migrates from the frameworks like TensorFlow, ONNX Runtime, PyTorch, PaddlePaddle, etc. The change of application's logic is connected with: + +- Model Optimizer changed input precisions for some inputs. For example, neural langauge processing models with `I64` input are becoming to have `I32` input element type. +- Model Optimizer changed layouts for TensorFlow models ((see [Layouts in OpenVINO](../layout_overview.md))). It leads to unexpected user behavior that a user needs to use a different layout for its input data with compare to the framework: +![tf_openvino] +- Inference Engine API (`InferenceEngine::CNNNetwork`) also applied some conversion rules for input and output precisions because of device plugins limitations. +- Users need to specify input shapes during model conversions in Model Optimizer and work with static shapes in the application. + +OpenVINO Runtime API 2.0 is introduced to align logic of working with model as it is done in the frameworks - no layout and precision changes, operates with tensor names and indeces to address inputs and outputs. OpenVINO Runtime is composed of Inference Engine API used for inference and ngraph API targeted to work with models, operations. The OpenVINO API 2.0 has common structure, naming convention styles, namespaces, removes duplicated structures. See [How to migrate to OpenVINO 2.0 API](./common_inference_pipeline.md) for details. + +> **NOTE**: Most important is that your existing application can continue working with OpenVINO Runtime 2.0 as it used to be, but we recommend migration to new API to unlock additional features like [Preprocessing](../preprocessing_overview.md) and [Dynamic shapes support](../DynamicBatching.md). + +### Introduce IR v11 + +To support these features, OpenVINO introduced IR v11 which is generated by Model Optimizer by default since 2022.1. The model represented in IR v11 fully matches the original model in a original framework format in terms of inputs and outputs. Also, a user does not have to specify input shapes during the conversion, so the resulting IR v11 contains `-1` to denote undefined dimensions (see [Working with dynamic shapes](../DynamicBatching.md) to fully utilize this feature; or [Changning input shapes](../ShapeInference.md) to reshape to static shapes in the application). + +What is also important to mention - the IR v11 is fully compatible with old applications written with Inference Engine API from older versions of OpenVINO. This is achieved by adding additional runtime information to the IR v11 which is responsible for backwark compatible behavior. So, once the IR v11 is read by the old Inference Engine based application, it's internally converted to IR v10 to provide backward-compatible behavior. + +The IR v11 is supported by all OpenVINO Development tools including Post Training Optimization tool, Benchmark app, etc. + +### IR v10 compatibility + +OpenVINO Runtime API 2.0 also supports model in IR v10 for backward compatibility. So, if a user has an IR v10, such IR v10 can be fed to OpenVINO Runtime as well (see [migration steps](./common_inference_pipeline.md)). + +Some OpenVINO Development Tools also support both IR v10 and IR v11 as an input: +- Accuracy checker also supports IR v10, but requires an additional option to denote which API is used underneath. +- [Compile tool](../../../tools/compile_tool/README.md) compiles the model to be used in OpenVINO 2.0 API by default. If a user wants to use the resulting compiled blob in Inference Engine API, the additional `ov_api_1_0` option should be passed. + +But the following OpenVINO tools don't support IR v10 as an input, they require to regenerate an IR v11 from the original model with latest Model Optimizer: +- Post Training Optimization tool +- Deep Learning WorkBench + +### Differences between Inference Engine and OpenVINO Runtime 2.0 + +Inference Engine and ngraph APIs are not deprecated, they are fully functional and can be used in applications. But OpenVINO recommends users to migrate to new OpenVINO Runtime API 2.0, because it already has additional features and this list will be extended later. The following list of additional features is supported by new API: +- [Working with dynamic shapes](../DynamicBatching.md). The feature is quite usefull for best performance for NLP (Neural Language Processing) models, super resolution models and other which accepts dynamic input shapes. +- [Preprocessing of the model](../preprocessing_overview.md) to add preprocessing operations to the inference models and fully ocupay the accelerator and free CPU resources. + +To define a difference on the API level between Inference Engine and OpenVINO RUntime API 2.0, let's define two types of behaviors: +- **Old behavior** of OpenVINO supposes: + - Model Optimizer can change input element types, order of dimensions (layouts) with compare to the model from the original framework. + - Inference Engine can override input and output element types. + - Inference Engine API operates with operation names to address inputs and outputs (e.g. InferenceEngine::InferRequest::GetBlob). + - Does not support compiling of models with dynamic input shapes. +- **New behavior** assumes full model aligment with the framework and is implemented in OpenVINO 2.0: + - Model Optimizer preserves the input element types, order of dimensions (layouts) and stores tensor names from the original models. + - OpenVINO Runtime 2.0 reads models in any formats (IR v10, IR v11, ONNX, PaddlePaddle, etc) as is. + - OpenVINO Runtime API 2.0 operates with tensor names. Note, the difference between tensor names and operations names is that in case if a single operation has several output tensors, such tensors cannot identified in a unique manner, so tensor names are used for addressing as it's usually done in the frameworks. + - OpenVINO Runtime API 2.0 can address input and outputs tensors also by its index. Some model formats like ONNX are sensitive to order of inputs, outputs and its preserved by OpenVINO Runtime 2.0. + +The table below demonstrates which behavior **old** or **new** is used depending on a model source, used APIs. + +| API | IR v10 | IR v11 | ONNX file | Model created in code | +|-------------------------------|---------|---------|-----------|-----------------------| +|Inference Engine / ngraph APIs | Old | Old | Old | Old | +|OpenVINO Runtime API 2.0 | Old | New | New | New | + +Please look at next transition guides to understand how migrate Inference Engine-based application to OpenVINO™ Runtime API 2.0: + - [OpenVINO™ Common Inference pipeline](common_inference_pipeline.md) + - [Preprocess your model](./preprocessing.md) + - [Configure device](./configure_devices.md) + - [OpenVINO™ Model Creation](graph_construction.md) + +[tf_openvino]: ../../img/tf_openvino.png diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md b/docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md new file mode 100644 index 00000000000..9288249dbe9 --- /dev/null +++ b/docs/OV_Runtime_UG/migration_ov_2_0/preprocessing.md @@ -0,0 +1,64 @@ +# Preprocessing {#openvino_2_0_preprocessing} + +### Introduction + +Inference Engine API has preprocessing capabilities in `InferenceEngine::CNNNetwork` class. Such preprocessing information is not a part of the main inference graph executed by the [OpenVINO devices](../supported_plugins/Device_Plugins.md), so it is stored and executed separately before an inference stage: +- Preprocessing operations are executed on CPU processor for most of the OpenVINO inference plugins. So, instead of occupying of acceleators, CPU processor is also busy with computational tasks. +- Preprocessing information stored in `InferenceEngine::CNNNetwork` is lost during saving back to IR file format. + +OpenVINO Runtime API 2.0 introduces [new way of adding preprocessing operations to the model](../preprocessing_overview.md) - each preprocessing or postprocessing operation is integrated directly to the model and compiled together with inference graph: +- Add preprocessing operations first using `ov::preprocess::PrePostProcessor` +- Compile model on the target then using `ov::Core::compile_model` + +Having preprocessing operations as a part of OpenVINO opset allows to read and serialize preprocessed model as the IR file format. + +It's also important to mention that since OpenVINO 2.0, the Runtime API does not assume any default layouts like Inference Engine did, for example both `{ 1, 224, 224, 3 }` and `{ 1, 3, 224, 224 }` shapes are supposed to have `NCHW` layout while only the last shape has `NCHW`. So, some preprocessing capabilities in OpenVINO Runtime API 2.0 requires explicitly set layouts, see [Layout overview](../layout_overview.md) how to do it. For example, to perform image scaling by partial dimensions `H` and `W`, preprocessing needs to know what dimensions are `H` and `W`. + +> **NOTE**: Use Model Optimizer preprocessing capabilities to insert and optimize preprocessing operations to the model. In this case you don't need to read model in runtime application and set preprocessing, you can use [model caching feature](../Model_caching_overview.md) to improve time to inference stage. + +The steps below demonstrates how to migrate preprocessing scenarios from Inference Engine API to OpenVINO Runtime API 2.0. +The snippets suppose we need to preprocess a model input with tensor name `tensor_name`, in Inferenece Engine API using operation names to address the data, it's called `operation_name`. + +### Mean and scale values + +Inference Engine API: + +@snippet docs/snippets/ov_preprocessing_migration.cpp mean_scale + +OpenVINO Runtime API 2.0: + +@snippet docs/snippets/ov_preprocessing_migration.cpp ov_mean_scale + +### Precision and layout conversions + +Inference Engine API: + +@snippet docs/snippets/ov_preprocessing_migration.cpp conversions + +OpenVINO Runtime API 2.0: + +@snippet docs/snippets/ov_preprocessing_migration.cpp ov_conversions + +### Image scaling + +Inference Engine API: + +@snippet docs/snippets/ov_preprocessing_migration.cpp image_scale + +OpenVINO Runtime API 2.0: + +@snippet docs/snippets/ov_preprocessing_migration.cpp ov_image_scale + +### Color space conversions + +Inference Engine API: + +@snippet docs/snippets/ov_preprocessing_migration.cpp color_space + +OpenVINO Runtime API 2.0: + +@snippet docs/snippets/ov_preprocessing_migration.cpp ov_color_space + +**See also:** +- [Preprocessing details](../preprocessing_details.md) +- [NV12 classification sample](../../../samples/cpp/hello_nv12_input_classification/README.md) diff --git a/docs/img/tf_openvino.png b/docs/img/tf_openvino.png new file mode 100644 index 00000000000..62e43e48907 --- /dev/null +++ b/docs/img/tf_openvino.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c369ce9a1b7f24929aa2f7d954ff577e0f439ea049296dd13741838b91615f38 +size 47087 diff --git a/docs/snippets/ie_common.cpp b/docs/snippets/ie_common.cpp index 25cf78a0c43..7c408b14bf0 100644 --- a/docs/snippets/ie_common.cpp +++ b/docs/snippets/ie_common.cpp @@ -13,10 +13,8 @@ int main() { InferenceEngine::CNNNetwork network = core.ReadNetwork("model.xml"); //! [ie:read_model] - //! [ie:get_inputs_outputs] InferenceEngine::InputsDataMap inputs = network.getInputsInfo(); InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo(); - //! [ie:get_inputs_outputs] //! [ie:compile_model] InferenceEngine::ExecutableNetwork exec_network = core.LoadNetwork(network, "CPU"); @@ -29,7 +27,6 @@ int main() { //! [ie:get_input_tensor] InferenceEngine::Blob::Ptr input_blob1 = infer_request.GetBlob(inputs.begin()->first); // fill first blob - InferenceEngine::SizeVector dims1 = input_blob1->getTensorDesc().getDims(); InferenceEngine::MemoryBlob::Ptr minput1 = InferenceEngine::as(input_blob1); if (minput1) { // locked memory holder should be alive all time while access to its @@ -39,6 +36,7 @@ int main() { auto data = minputHolder.as::value_type*>(); // Fill data ... } + InferenceEngine::Blob::Ptr input_blob2 = infer_request.GetBlob("data2"); // fill first blob InferenceEngine::MemoryBlob::Ptr minput2 = InferenceEngine::as(input_blob2); diff --git a/docs/snippets/ov_common.cpp b/docs/snippets/ov_common.cpp index ca3c8f83fa1..00b834954b9 100644 --- a/docs/snippets/ov_common.cpp +++ b/docs/snippets/ov_common.cpp @@ -62,16 +62,11 @@ int main() { //! [ov_api_2_0:create_core] //! [ov_api_2_0:read_model] - std::shared_ptr network = core.read_model("model.xml"); + std::shared_ptr model = core.read_model("model.xml"); //! [ov_api_2_0:read_model] - //! [ov_api_2_0:get_inputs_outputs] - std::vector> inputs = network->inputs(); - std::vector> outputs = network->outputs(); - //! [ov_api_2_0:get_inputs_outputs] - //! [ov_api_2_0:compile_model] - ov::CompiledModel compiled_model = core.compile_model(network, "CPU"); + ov::CompiledModel compiled_model = core.compile_model(model, "CPU"); //! [ov_api_2_0:compile_model] //! [ov_api_2_0:create_infer_request] diff --git a/docs/snippets/ov_preprocessing_migration.cpp b/docs/snippets/ov_preprocessing_migration.cpp new file mode 100644 index 00000000000..032afb9c8b8 --- /dev/null +++ b/docs/snippets/ov_preprocessing_migration.cpp @@ -0,0 +1,124 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include +#include + +#include "inference_engine.hpp" + +int main_new() { + std::string model_path; + std::string tensor_name; + + ov::Core core; + std::shared_ptr model = core.read_model(model_path); + ov::preprocess::PrePostProcessor ppp(model); + + { + //! [ov_mean_scale] +ov::preprocess::PrePostProcessor ppp(model); +ov::preprocess::InputInfo& input = ppp.input(tensor_name); +// we only need to know where is C dimension +input.model().set_layout("...C"); +// specify scale and mean values, order of operations is important +input.preprocess().mean(116.78f).scale({ 57.21f, 57.45f, 57.73f }); +// insert preprocessing operations to the 'model' +model = ppp.build(); + //! [ov_mean_scale] + } + + { + //! [ov_conversions] +ov::preprocess::PrePostProcessor ppp(model); +ov::preprocess::InputInfo& input = ppp.input(tensor_name); +input.tensor().set_layout("NHWC").set_element_type(ov::element::u8); +input.model().set_layout("NCHW"); +// layout and precision conversion is inserted automatically, +// because tensor format != model input format +model = ppp.build(); + //! [ov_conversions] + } + + { + //! [ov_color_space] +ov::preprocess::PrePostProcessor ppp(model); +ov::preprocess::InputInfo& input = ppp.input(tensor_name); +input.tensor().set_color_format(ov::preprocess::ColorFormat::NV12_TWO_PLANES); +// add NV12 to BGR conversion +input.preprocess().convert_color(ov::preprocess::ColorFormat::BGR); +// and insert operations to the model +model = ppp.build(); + //! [ov_color_space] + } + + { + //! [ov_image_scale] +ov::preprocess::PrePostProcessor ppp(model); +ov::preprocess::InputInfo& input = ppp.input(tensor_name); +// scale from the specified tensor size +input.tensor().set_spatial_static_shape(448, 448); +// need to specify H and W dimensions in model, others are not important +input.model().set_layout("??HW"); +// scale to model shape +input.preprocess().resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); +// and insert operations to the model +model = ppp.build(); + //! [ov_image_scale] + } + +return 0; +} + +int main_old() { + std::string model_path; + std::string operation_name; + + InferenceEngine::Core core; + InferenceEngine::CNNNetwork network = core.ReadNetwork(model_path); + + { + //! [mean_scale] +auto preProcess = network.getInputsInfo()[operation_name]->getPreProcess(); +preProcess.init(3); +preProcess[0]->meanValue = 116.78f; +preProcess[1]->meanValue = 116.78f; +preProcess[2]->meanValue = 116.78f; +preProcess[0]->stdScale = 57.21f; +preProcess[1]->stdScale = 57.45f; +preProcess[2]->stdScale = 57.73f; +preProcess.setVariant(InferenceEngine::MEAN_VALUE); + //! [mean_scale] + } + + { + //! [conversions] +auto inputInfo = network.getInputsInfo()[operation_name]; +inputInfo->setPrecision(InferenceEngine::Precision::U8); +inputInfo->setLayout(InferenceEngine::Layout::NHWC); +// model input layout is always NCHW in Inference Engine +// for shapes with 4 dimensions + //! [conversions] + } + + { + //! [color_space] +auto preProcess = network.getInputsInfo()[operation_name]->getPreProcess(); +// Inference Engine supposes NV12 as two inputs which need to be passed +// as InferenceEngine::NV12Blob composed of two Y and UV planes +preProcess.setColorFormat(InferenceEngine::NV12); + //! [color_space] + } + + { + //! [image_scale] +auto preProcess = network.getInputsInfo()[operation_name]->getPreProcess(); +// Inference Engine supposes input for resize is always in NCHW layout +// while for OpenVINO Runtime API 2.0 `H` and `W` dimensions must be specified +// Also, current code snippet supposed resize from dynamic shapes +preProcess.setResizeAlgorithm(InferenceEngine::ResizeAlgorithm::RESIZE_BILINEAR); + //! [image_scale] + } + + return 0; +} diff --git a/docs/snippets/ov_properties_migration.cpp b/docs/snippets/ov_properties_migration.cpp new file mode 100644 index 00000000000..101200a3f12 --- /dev/null +++ b/docs/snippets/ov_properties_migration.cpp @@ -0,0 +1,95 @@ +#include +#include + +int main_new() { + ov::Core core; + +//! [core_get_ro_property] +// 'auto' is automatically deduced as std::string +// since the type is stored in the property +auto full_device_name = core.get_property("CPU", ov::device::full_name); +//! [core_get_ro_property] + +//! [core_get_rw_property] +// 'auto' is automatically deduced as ov::streams::Num +// since the type is stored in the property +auto num_streams = core.get_property("CPU", ov::streams::num); +//! [core_get_rw_property] + +//! [core_set_property] +core.set_property("CPU", ov::enable_profiling(true)); +//! [core_set_property] + +auto model = core.read_model("sample.xml"); +//! [core_compile_model] +auto compiled_model = core.compile_model(model, "MULTI", + ov::device::priorities("GPU", "CPU"), + ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::inference_precision(ov::element::f32)); +//! [core_compile_model] + +//! [compiled_model_set_property] +// turn CPU off for multi-device execution +compiled_model.set_property(ov::device::priorities("GPU")); +//! [compiled_model_set_property] + +{ +//! [compiled_model_get_ro_property] +// 'auto' is deduced to 'uint32_t' +auto nireq = compiled_model.get_property(ov::optimal_number_of_infer_requests); +//! [compiled_model_get_ro_property] +} + +{ +//! [compiled_model_get_rw_property] +ov::hint::PerformanceMode perf_model = compiled_model.get_property(ov::hint::performance_mode); +//! [compiled_model_get_rw_property] +} + + +return 0; +} + + +int main_old() { + InferenceEngine::Core core; +//! [core_get_metric] +auto full_device_name = core.GetConfig("CPU", METRIC_KEY(FULL_DEVICE_NAME)).as(); +//! [core_get_metric] + +//! [core_get_config] +// a user has to parse std::string after +auto num_streams = core.GetMetric("CPU", CONFIG_KEY(CPU_THROUGHPUT_STREAMS)).as(); +//! [core_get_config] + +//! [core_set_config] +core.SetConfig({ { CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(YES) } }, "CPU"); +//! [core_set_config] + +auto model = core.ReadNetwork("sample.xml"); +//! [core_load_network] +auto exec_network = core.LoadNetwork(model, "MULTI", { + { MULTI_CONFIG_KEY(DEVICE_PRIORITIES), "CPU, GPU" }, + { CONFIG_KEY(PERFORMANCE_HINT), CONFIG_VALUE(THROUGHPUT) }, + { CONFIG_KEY(ENFORCE_BF16), CONFIG_VALUE(NO) } }); +//! [core_load_network] + +//! [executable_network_set_config] +// turn CPU off for multi-device execution +exec_network.SetConfig({ { MULTI_CONFIG_KEY(DEVICE_PRIORITIES), "GPU" } }); +//! [executable_network_set_config] + +{ +//! [executable_network_get_metric] +auto nireq = exec_network.GetMetric(EXEC_NETWORK_METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); +//! [executable_network_get_metric] +} + +{ +//! [executable_network_get_config] +std::string perf_model = exec_network.GetConfig(CONFIG_KEY(PERFORMANCE_HINT)).as(); +//! [executable_network_get_config] +} + +return 0; +} From 969060c8db5f06e7251d0979925ab57a946c0147 Mon Sep 17 00:00:00 2001 From: Wilson Seok Date: Wed, 2 Mar 2022 01:50:31 -0800 Subject: [PATCH 146/310] Add op impl check tests (#10339) * Remove fp16 of Convert layer test from skip_tests.config.cpp as it works now * update repo * add initial op impl check tests * add op imple check tests * add op impl check tests * add rnn cell based ops * modify lstmsequence * update rnn cell base op test * add priorbox, priorboxclustered, proposal * add ROIAlign to ReverseSequence * add Roll to ScatterElementsUpdate * add select to swish tests * add tensoriterator to variadicsplit test * temporary block of LSTMCell v1 due to crash in mkldnn * use ov namespace instead of ngraph as possible * update indexing of vector array * update multiple parameter vector * add loop test * fix cpplint errors * fix build error --- .../op_impl_check/single_op_graph.cpp | 667 +++++++++++++++++- 1 file changed, 666 insertions(+), 1 deletion(-) diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp index ed5a17b7d4a..c78823e79f6 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp @@ -536,6 +536,397 @@ std::shared_ptr generate(const std::shared_ptr return std::make_shared(in, ParameterVector{in}, "Parameter-1"); } +std::shared_ptr generate(const std::shared_ptr &node) { + ov::op::v0::PriorBox::Attributes attrs; + attrs.min_size = {2.0f}; + attrs.aspect_ratio = {1.5f}; + attrs.scale_all_sizes = false; + const auto LS = ngraph::builder::makeConstant(ov::element::i32, {2}, {2, 2}); + const auto IS = ngraph::builder::makeConstant(ov::element::i32, {2}, {10, 10}); + auto Node = std::make_shared(LS, IS, attrs); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, ov::ParameterVector{}, "PrioBoxGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + ov::op::v8::PriorBox::Attributes attrs; + attrs.min_size = {2.0f}; + attrs.max_size = {5.0f}; + attrs.aspect_ratio = {1.5f}; + attrs.scale_all_sizes = true; + attrs.min_max_aspect_ratios_order = false; + const auto LS = ngraph::builder::makeConstant(ov::element::i32, {2}, {2, 2}); + const auto IS = ngraph::builder::makeConstant(ov::element::i32, {2}, {10, 10}); + auto Node = std::make_shared(LS, IS, attrs); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, ov::ParameterVector{}, "PrioBoxGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + ov::op::v0::PriorBoxClustered::Attributes attrs; + attrs.widths = {3.0f}; + attrs.heights = {3.0f}; + attrs.clip = true; + const auto LS = ngraph::builder::makeConstant(ov::element::i32, {2}, {2, 2}); + const auto IS = ngraph::builder::makeConstant(ov::element::i32, {2}, {10, 10}); + auto Node = std::make_shared(LS, IS, attrs); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, ov::ParameterVector{}, "PrioBoxClustedGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + ov::op::v0::Proposal::Attributes attrs; + attrs.base_size = 16; + attrs.min_size = 16; + attrs.pre_nms_topn = 6000; + attrs.post_nms_topn = 10; + attrs.nms_thresh = 0.7f; + attrs.feat_stride = 16; + attrs.min_size = 16; + attrs.ratio = {0.5f}; + attrs.scale = {32.0f}; + attrs.clip_before_nms = true; + attrs.clip_after_nms = false; + attrs.normalize = false; + attrs.box_size_scale = 1.0f; + attrs.box_coordinate_scale = 1.0f; + attrs.framework = ""; + attrs.infer_probs = false; + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 10, 10}, + {1, 4, 10, 10}, + {3}}); + auto Node = std::make_shared(params.at(0), params.at(1), params.at(2), attrs); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ProposalGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + ov::op::v4::Proposal::Attributes attrs; + attrs.base_size = 16; + attrs.min_size = 16; + attrs.pre_nms_topn = 6000; + attrs.post_nms_topn = 10; + attrs.nms_thresh = 0.7f; + attrs.feat_stride = 16; + attrs.min_size = 16; + attrs.ratio = {0.5f}; + attrs.scale = {32.0f}; + attrs.clip_before_nms = true; + attrs.clip_after_nms = false; + attrs.normalize = false; + attrs.box_size_scale = 1.0f; + attrs.box_coordinate_scale = 1.0f; + attrs.framework = ""; + attrs.infer_probs = true; + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 10, 10}, + {1, 4, 10, 10}, + {3}}); + auto Node = std::make_shared(params.at(0), params.at(1), params.at(2), attrs); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ProposalGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 1, 16, 16}}); + const auto coords = ngraph::builder::makeConstant(ov::element::f32, {2, 4}, {2, 2, 8, 8, 2, 2, 8, 8}); + const auto roisIdx = ngraph::builder::makeConstant(ov::element::i32, {2}, {0, 1}); + auto Node = std::make_shared(params.at(0), coords, roisIdx, 2, 2, 2, 1, "avg"); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ROIAlignGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 3, 8, 8}, + {1, 5}}); + auto Node = std::make_shared(params.at(0), params.at(1), Shape{1, 1}, 1); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ROIPoolingGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto out_shape_ = ngraph::builder::makeConstant(ov::element::i64, {4}, {1, 3, 3, 3}); + const auto min_value = ngraph::builder::makeConstant(ov::element::f32, {}, {0.f}); + const auto max_value = ngraph::builder::makeConstant(ov::element::f32, {}, {1.f}); + auto Node = std::make_shared(out_shape_, min_value, max_value, ov::element::f32, 10, 10); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, ov::ParameterVector{}, "RandomUniformGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto start = ngraph::builder::makeConstant(ov::element::f32, {}, {1.f}); + const auto stop = ngraph::builder::makeConstant(ov::element::f32, {}, {5.f}); + const auto step = ngraph::builder::makeConstant(ov::element::f32, {}, {1.f}); + auto Node = std::make_shared(start, stop, step); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, ov::ParameterVector{}, "RangeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto start = ngraph::builder::makeConstant(ov::element::f32, {}, {1.f}); + const auto stop = ngraph::builder::makeConstant(ov::element::f32, {}, {5.f}); + const auto step = ngraph::builder::makeConstant(ov::element::f32, {}, {1.f}); + auto Node = std::make_shared(start, stop, step, ov::element::f32); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, ov::ParameterVector{}, "RangeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 8, 2, 2}}); + auto Node = std::make_shared(params.at(0), 4, 1, 1, true, std::vector{0}, 1, 3); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "RegionYoloGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 8, 4, 4}}); + auto Node = std::make_shared(params.at(0), ov::Strides{2}); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ReorgYoloGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2, 3}}); + const auto shape = ngraph::builder::makeConstant(ov::element::i64, {1}, {12}); + auto Node = std::make_shared(params.at(0), shape, false); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ReshapeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeParams(ov::element::f32, {{2, 2}}); + ov::ResultVector results{std::make_shared(params.at(0))}; + return std::make_shared(results, params, "ResultGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4, 3}}); + const auto axis = ngraph::builder::makeConstant(ov::element::i64, {3}, {0, 1, 2}); + auto Node = std::make_shared(params.at(0), axis, op::v1::Reverse::Mode::INDEX); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ReverseGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::i32}, + {{3, 10}, {3}}); + auto Node = std::make_shared(params.at(0), params.at(1), 0, 1); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ReverseSequenceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{4, 2, 3}}); + const auto shift = ngraph::builder::makeConstant(ov::element::i64, {3}, {2, 1, 3}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {3}, {0, 1, 2}); + auto Node = std::make_shared(params.at(0), shift, axes); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "RollGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{10}}); + auto Node = std::make_shared(params.at(0), op::v5::Round::RoundMode::HALF_TO_EVEN); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "RoundGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2}, {2, 2}}); + const auto indices = ngraph::builder::makeConstant(ov::element::i64, {2, 2}, {1, 1, 0, 0}); + const auto axis = ngraph::builder::makeConstant(ov::element::i64, {1}, {0}); + auto Node = std::make_shared(params.at(0), indices, params.at(1), axis); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ScatterElementsUpdateGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams({ov::element::boolean, ov::element::f32, ov::element::f32}, + {{2, 2, 2}, {2, 2, 2}, {2, 2, 2}}); + auto Node = std::make_shared(params.at(0), params.at(1), params.at(2), op::AutoBroadcastType::NONE); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SelectGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3}}); + const auto alpha = ngraph::builder::makeConstant(ov::element::f32, {1}, {1.67326324}); + const auto lambda = ngraph::builder::makeConstant(ov::element::f32, {1}, {1.05070098}); + auto Node = std::make_shared(params.at(0), alpha, lambda); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SeluGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4, 8, 16, 64}}); + auto Node = std::make_shared(params.at(0)); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ShapeOfGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4, 8, 16, 64}}); + auto Node = std::make_shared(params.at(0)); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ShapeOfGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 15, 2, 2}}); + auto Node = std::make_shared(params.at(0), 1, 5); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "ShuffleChannelsGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4, 3}}); + const auto start = ngraph::builder::makeConstant(ov::element::i64, {3}, {0, 0, 4}); + const auto stop = ngraph::builder::makeConstant(ov::element::i64, {3}, {2, 4, -5}); + const auto step = ngraph::builder::makeConstant(ov::element::i64, {3}, {3, 2, -2}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {3}, {0, 1, 2}); + auto Node = std::make_shared(params.at(0), start, stop, step, axes); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SliceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{4, 4}}); + auto Node = std::make_shared(params.at(0)); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SoftPlusGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2, 3}}); + auto Node = std::make_shared(params.at(0), 0); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SoftmaxGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2, 3}}); + auto Node = std::make_shared(params.at(0), 0); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SoftmaxGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 3, 2, 1}}); + const auto blockShape = ngraph::builder::makeConstant(ov::element::i64, {5}, {1, 1, 3, 2, 2}); + const auto padsBegin = ngraph::builder::makeConstant(ov::element::i64, {5}, {0, 0, 1, 0, 3}); + const auto padsEnd = ngraph::builder::makeConstant(ov::element::i64, {5}, {0, 0, 2, 0, 0}); + auto Node = std::make_shared(params.at(0), blockShape, padsBegin, padsEnd); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SpaceToBatchGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 4, 4}}); + auto Node = std::make_shared(params.at(0), "BLOCKS_FIRST", 2); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SpaceToDepthGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 8, 2}}); + const auto axis = ngraph::builder::makeConstant(ov::element::i64, {}, {1}); + auto Node = std::make_shared(params.at(0), axis, 4); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SplitGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 4, 1, 1, 2}}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {2}, {0, 2}); + auto Node = std::make_shared(params.at(0), axes); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SqueezeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{128, 1}}); + const auto begin = ngraph::builder::makeConstant(ov::element::i64, {3}, {0, 0, 0}); + const auto end = ngraph::builder::makeConstant(ov::element::i64, {3}, {0, 0, 0}); + const auto stride = ngraph::builder::makeConstant(ov::element::i64, {3}, {1, 1, 1}); + auto Node = std::make_shared(params.at(0), begin, end, stride, + std::vector{0, 1, 1}, + std::vector{0, 1, 1}, + std::vector{1, 0, 0}, + std::vector{1, 0, 0}, + std::vector{0, 0, 0}); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "StridedSliceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 4}}); + const auto beta = ngraph::builder::makeConstant(ov::element::f32, {}, {0.6f}); + auto Node = std::make_shared(params.at(0), beta); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "SwishGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 1, 3}}); + const auto repeats = ngraph::builder::makeConstant(ov::element::i64, {2}, {2, 1}); + auto Node = std::make_shared(params.at(0), repeats); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "TileGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3, 2}}); + const auto k = ngraph::builder::makeConstant(ov::element::i64, {}, {3}); + auto Node = std::make_shared(params.at(0), + k, + 1, + ov::op::v1::TopK::Mode::MAX, + ov::op::v1::TopK::SortType::SORT_VALUES); + ov::ResultVector results{std::make_shared(Node->output(0)), + std::make_shared(Node->output(1))}; + return std::make_shared(results, params, "TopKGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3, 2}}); + const auto k = ngraph::builder::makeConstant(ov::element::i64, {}, {3}); + auto Node = std::make_shared(params.at(0), + k, + 1, + ov::op::v3::TopK::Mode::MAX, + ov::op::v3::TopK::SortType::SORT_VALUES); + ov::ResultVector results{std::make_shared(Node->output(0)), + std::make_shared(Node->output(1))}; + return std::make_shared(results, params, "TopKGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2, 3}}); + const auto inputOrder = ngraph::builder::makeConstant(ov::element::i64, {3}, {2, 1, 0}); + auto Node = std::make_shared(params.at(0), inputOrder); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "TransposeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{4, 2}}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {2}, {1, -1}); + auto Node = std::make_shared(params.at(0), axes); + ov::ResultVector results{std::make_shared(Node)}; + return std::make_shared(results, params, "UnsqueezeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 8, 2, 2}}); + const auto axis = ngraph::builder::makeConstant(ov::element::i64, {1}, {1}); + const auto splitLengths = ngraph::builder::makeConstant(ov::element::i64, {4}, {1, 3, 2, 2}); + auto Node = std::make_shared(params.at(0), axis, splitLengths); + ov::ResultVector results{std::make_shared(Node->output(0)), + std::make_shared(Node->output(1)), + std::make_shared(Node->output(2)), + std::make_shared(Node->output(3))}; + return std::make_shared(results, params, "VariadicSplitGraph"); +} + std::shared_ptr generateBinaryEltwise(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2}, {1, 2}}); std::shared_ptr eltwiseNode; @@ -702,6 +1093,262 @@ std::shared_ptr generateReadValueBase(const std::shared_ptr generateDeformableConvolutionBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 4, 4}, + {1, 18, 2, 2}, + {1, 2, 3, 3}}); + std::shared_ptr deformableConvolutionNode; + if (ov::is_type(node)) { + deformableConvolutionNode = std::make_shared(params.at(0), params.at(1), params.at(2), + ov::Strides {1, 1}, + ov::CoordinateDiff {0, 0}, + ov::CoordinateDiff {0, 0}, + ov::Strides {1, 1}); + } else if (ov::is_type(node)) { + deformableConvolutionNode = std::make_shared(params.at(0), params.at(1), params.at(2), + ov::Strides {1, 1}, + ov::CoordinateDiff {0, 0}, + ov::CoordinateDiff {0, 0}, + ov::Strides {1, 1}); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(deformableConvolutionNode)}; + return std::make_shared(results, params, "DeformableConvolutionBaseGraph"); +} + +std::shared_ptr generateDetectionOutputBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 8}, + {2, 6}, + {2, 1, 8}}); + ov::op::v0::DetectionOutput::Attributes attrs; + ov::op::v8::DetectionOutput::Attributes attrs_v8; + attrs.num_classes = 3; + attrs_v8.background_label_id = attrs.background_label_id = -1; + attrs_v8.top_k = attrs.top_k = -1; + attrs_v8.variance_encoded_in_target = attrs.variance_encoded_in_target = true; + attrs_v8.keep_top_k = attrs.keep_top_k = {2}; + attrs_v8.code_type = attrs.code_type = "caffe.PriorBoxParameter.CORNER"; + attrs_v8.share_location = attrs.share_location = true; + attrs_v8.nms_threshold = attrs.nms_threshold = 0.5; + attrs_v8.confidence_threshold = attrs.confidence_threshold = 0.3; + attrs_v8.clip_after_nms = attrs.clip_after_nms = false; + attrs_v8.clip_before_nms = attrs.clip_before_nms = true; + attrs_v8.decrease_label_id = attrs.decrease_label_id = false; + attrs_v8.normalized = attrs.normalized = true; + attrs_v8.input_height = attrs.input_height = 0; + attrs_v8.input_width = attrs.input_width = 0; + attrs_v8.objectness_score = attrs.objectness_score = 0; + + std::shared_ptr DetectionOutputNode; + if (ov::is_type(node)) { + DetectionOutputNode = std::make_shared(params.at(0), params.at(1), params.at(2), attrs); + } else if (ov::is_type(node)) { + DetectionOutputNode = std::make_shared(params.at(0), params.at(1), params.at(2), attrs_v8); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(DetectionOutputNode)}; + return std::make_shared(results, params, "DetectionOutputBaseGraph"); +} + +std::shared_ptr generateEmbeddingBagOffsetsBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{5, 2}}); + const auto indices = ngraph::builder::makeConstant(ov::element::i32, {4}, {}, true); + const auto offsets = ngraph::builder::makeConstant(ov::element::i32, {3}, {}, true); + const auto default_index = ngraph::builder::makeConstant(ov::element::i32, ov::Shape(), std::vector{0}); + + std::shared_ptr EmbeddingBagOffsetsSumNode; + if (ov::is_type(node)) { + EmbeddingBagOffsetsSumNode = std::make_shared(params.at(0), indices, offsets, default_index); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(EmbeddingBagOffsetsSumNode)}; + return std::make_shared(results, params, "EmbeddingBagOffsetsBaseGraph"); +} + +std::shared_ptr generateEmbeddingBagPackedBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{5, 2}}); + const auto indices = ngraph::builder::makeConstant(ov::element::i32, {2, 3}, {}, true); + + std::shared_ptr EmbeddingBagPackedSumNode; + if (ov::is_type(node)) { + EmbeddingBagPackedSumNode = std::make_shared(params.at(0), indices); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(EmbeddingBagPackedSumNode)}; + return std::make_shared(results, params, "EmbeddingBagPackedBaseGraph"); +} + +std::shared_ptr generateFFTBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 10, 10, 2}}); + const auto axes = ngraph::builder::makeConstant(ov::element::i32, {1}, {2}); + + std::shared_ptr FFTBaseNode; + if (ov::is_type(node)) { + FFTBaseNode = std::make_shared(params.at(0), axes); + } else if (ov::is_type(node)) { + FFTBaseNode = std::make_shared(params.at(0), axes); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(FFTBaseNode)}; + return std::make_shared(results, params, "FFTBaseGraph"); +} + +std::shared_ptr generateGatherBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::i32, {{2, 2, 3, 3}, {2}}); + const auto axis = ngraph::builder::makeConstant(ov::element::i64, ov::Shape(), std::vector{2}); + + std::shared_ptr GatherBaseNode; + if (ov::is_type(node)) { + GatherBaseNode = std::make_shared(params.at(0), params.at(1), axis); + } else if (ov::is_type(node)) { + GatherBaseNode = std::make_shared(params.at(0), params.at(1), axis); + } else if (ov::is_type(node)) { + GatherBaseNode = std::make_shared(params.at(0), params.at(1), axis); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(GatherBaseNode)}; + return std::make_shared(results, params, "GatherBaseGraph"); +} + +std::shared_ptr generateGatherNDBase(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::i32, {{2, 3, 4, 2}, {2, 3, 3, 2}}); + + std::shared_ptr GatherNDBaseNode; + if (ov::is_type(node)) { + GatherNDBaseNode = std::make_shared(params.at(0), params.at(1)); + } else if (ov::is_type(node)) { + GatherNDBaseNode = std::make_shared(params.at(0), params.at(1)); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(GatherNDBaseNode)}; + return std::make_shared(results, params, "GatherNDBaseGraph"); +} + +std::shared_ptr generateRNNCellBase(const std::shared_ptr &node) { + std::shared_ptr RNNCellBaseNode; + if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3}, {2, 3}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {9, 3}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {9, 3}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {9}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), + W, R, B, 3); + ov::ResultVector results{std::make_shared(RNNCellBaseNode)}; + return std::make_shared(results, params, "RNNCellBaseGraph"); + } else if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3}, {2, 3}, {2, 3}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {12, 3}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {12, 3}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {12}, {}, true); + const auto P = ngraph::builder::makeConstant(ov::element::f32, {9}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), params.at(2), + W, R, B, P, 3); + ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), + std::make_shared(RNNCellBaseNode->output(1))}; + //return std::make_shared(results, params, "RNNCellBaseGraph"); + return nullptr; // Temporary nullptr return due to crash in mkldnn + } else if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3}, {2, 3}, {2, 3}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {12, 3}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {12, 3}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {12}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), params.at(2), + W, R, B, 3); + ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), + std::make_shared(RNNCellBaseNode->output(1))};; + return std::make_shared(results, params, "RNNCellBaseGraph"); + } else if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::i64}, + {{5, 10, 10}, {5, 1, 10}, {5, 1, 10}, {5}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {1, 40, 10}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {1, 40}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), params.at(2), params.at(3), + W, R, B, 10, ov::op::RecurrentSequenceDirection::FORWARD); + ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), + std::make_shared(RNNCellBaseNode->output(1)), + std::make_shared(RNNCellBaseNode->output(2))}; + return std::make_shared(results, params, "RNNCellBaseGraph"); + } else if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3}, {2, 3}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {3, 3}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {3, 3}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {3}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), + W, R, B, 3); + ov::ResultVector results{std::make_shared(RNNCellBaseNode)}; + return std::make_shared(results, params, "RNNCellBaseGraph"); + } else if (ov::is_type(node)) { + const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::f32, ov::element::i64}, + {{2, 5, 3}, {2, 1, 3}, {2}}); + const auto W = ngraph::builder::makeConstant(ov::element::f32, {1, 3, 3}, {}, true); + const auto R = ngraph::builder::makeConstant(ov::element::f32, {1, 3, 3}, {}, true); + const auto B = ngraph::builder::makeConstant(ov::element::f32, {1, 3}, {}, true); + RNNCellBaseNode = std::make_shared(params.at(0), params.at(1), params.at(2), + W, R, B, 3, ov::op::RecurrentSequenceDirection::FORWARD); + ov::ResultVector results{std::make_shared(RNNCellBaseNode->output(0)), + std::make_shared(RNNCellBaseNode->output(1))}; + return std::make_shared(results, params, "RNNCellBaseGraph"); + } else { + return nullptr; + } +} + +std::shared_ptr generateSubGraphOp(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2}, {2, 2}, {2, 2}}); + const auto params_body = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 2}, {2, 2}, {2, 2}}); + const auto body_condition = ngraph::builder::makeConstant(ov::element::boolean, ov::Shape{1}, {true}); + const auto trip_count = ngraph::builder::makeConstant(ngraph::element::i64, ov::Shape{1}, {3}); + const auto exec_condition = ngraph::builder::makeConstant(ov::element::boolean, ov::Shape{1}, {true}); + // Body + auto sum = std::make_shared(params_body.at(0), params_body.at(1)); + auto Zo = std::make_shared(sum, params_body.at(2)); + auto body = std::make_shared(ov::OutputVector{body_condition, Zo}, params_body); + + ov::Output SubGraphOpNode; + if (ov::is_type(node)) { + auto tensor_iterator = std::make_shared(); + tensor_iterator->set_function(body); + + tensor_iterator->set_sliced_input(params_body.at(0), params.at(0), 0, 1, 1, -1, 1); + tensor_iterator->set_sliced_input(params_body.at(1), params.at(1), 0, 1, 1, -1, 0); + tensor_iterator->set_merged_input(params_body.at(2), params.at(2), Zo); + + // Output 0 is last Zo + SubGraphOpNode = tensor_iterator->get_iter_value(Zo, -1); + } else if (ov::is_type(node)) { + auto loop = std::make_shared(trip_count, exec_condition); + loop->set_function(body); + + loop->set_invariant_input(params_body.at(0), params.at(0)); + loop->set_invariant_input(params_body.at(1), params.at(1)); + loop->set_merged_input(params_body.at(2), params.at(2), Zo); + + loop->set_special_body_ports(ov::op::v5::Loop::SpecialBodyPorts{-1, 0}); + SubGraphOpNode = loop->get_iter_value(Zo, -1); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(SubGraphOpNode)}; + return std::make_shared(results, params, "SubGraphOpGraph"); +} } // namespace template @@ -724,6 +1371,24 @@ std::shared_ptr generateGraph() { return generateNmsBase(node); } else if (ov::is_type(node)) { return generateReadValueBase(node); + } else if (ov::is_type(node)) { + return generateDeformableConvolutionBase(node); + } else if (ov::is_type(node)) { + return generateDetectionOutputBase(node); + } else if (ov::is_type(node)) { + return generateEmbeddingBagOffsetsBase(node); + } else if (ov::is_type(node)) { + return generateEmbeddingBagPackedBase(node); + } else if (ov::is_type(node)) { + return generateFFTBase(node); + } else if (ov::is_type(node)) { + return generateGatherBase(node); + } else if (ov::is_type(node)) { + return generateGatherNDBase(node); + } else if (ov::is_type(node)) { + return generateRNNCellBase(node); + } else if (ov::is_type(node)) { + return generateSubGraphOp(node); } return generate(node); @@ -747,4 +1412,4 @@ OpGenerator getOpGeneratorMap() { } // namespace subgraph } // namespace test -} // namespace ov +} // namespace ov \ No newline at end of file From c54926ecb858a3dcb56b5517d32aff48c47ded24 Mon Sep 17 00:00:00 2001 From: Victor Kuznetsov Date: Wed, 2 Mar 2022 13:03:28 +0300 Subject: [PATCH 147/310] Update nightly memcheck models scope (#10709) --- .../desktop_references_config.xml | 58 ++++++++++++------- .../nightly_configs/desktop_test_config.xml | 9 ++- tests/stress_tests/scripts/get_testdata.py | 10 +++- 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml index 9e68f060112..75a085a2642 100644 --- a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml +++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_references_config.xml @@ -2,30 +2,12 @@ # Values from {"commit_id": "a92a737ba0ce5afd6df2da8d80c8deec7e11b1d9", "commit_date": "2021-07-16 13:30"} and *= 1.3 - - - - - - - - - - - - - - - - - - @@ -414,7 +396,7 @@ - + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 @@ -424,7 +406,7 @@ # Values from {"commit_id": "403339f8f470c90dee6f6d94ed58644b2787f66b", "commit_date": "2022-01-19 14:13"} and *= 1.3 - + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 # Values from {"commit_id": "403339f8f470c90dee6f6d94ed58644b2787f66b", "commit_date": "2022-01-19 14:13"} and *= 1.3 @@ -582,5 +564,41 @@ + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 + # Values from {"commit_id": "8ca1abed0d309d4c4981f00786adbe6106ca46b3", "commit_date": "2022-03-01 14:04"} and *= 1.3 diff --git a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml index a4456881c73..3c91115a4c2 100644 --- a/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml +++ b/tests/stress_tests/.automation/memcheck_tests/nightly_configs/desktop_test_config.xml @@ -43,6 +43,9 @@ + + + @@ -78,6 +81,9 @@ + + + @@ -87,14 +93,12 @@ - - @@ -103,7 +107,6 @@ - diff --git a/tests/stress_tests/scripts/get_testdata.py b/tests/stress_tests/scripts/get_testdata.py index 176472649c1..3190f7b19e3 100755 --- a/tests/stress_tests/scripts/get_testdata.py +++ b/tests/stress_tests/scripts/get_testdata.py @@ -181,8 +181,8 @@ def main(): # check selected precision with model info from Open Model Zoo if precision not in model_info['precisions']: log.warning("Please specify precision for the model " - "{model_name} from the list: {model_info}".format(model_name=model_name, - model_info=model_info['precisions'])) + f"{model_name} from the list: {model_info['precisions']}") + model_recs.remove(model_rec) continue model_rec.attrib.update(info_to_add) model_rec.attrib["path"] = str( @@ -216,6 +216,12 @@ def main(): models_dir=args.omz_models_out_dir, mo_tool=args.mo_tool) run_in_subprocess(cmd, check_call=not args.skip_omz_errors) + for model_rec in model_recs: + if model_rec.attrib.get("full_path") is None: + log.warning(f"Model {model_rec.attrib['name']} does not have 'full_path' attribute! " + f"This model will not be verified in this run.") + model_recs.remove(model_rec) + # rewrite test config with updated records test_conf_obj.write(args.test_conf) From cd52cc6767678b0bbd9e6439cda40207d65b7360 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Wed, 2 Mar 2022 15:36:31 +0300 Subject: [PATCH 148/310] [Python API][Docs] Remove excess info (#10672) * [Python API][Docs] Remove excess info * autodoc: add skip methods (#68) * remove utils from docs * undo changes Co-authored-by: Nikolay Tyukaev --- docs/api/ie_python_api/api.rst | 12 ------------ docs/conf.py | 23 ++++++++++++++++++++--- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/docs/api/ie_python_api/api.rst b/docs/api/ie_python_api/api.rst index 3528148dd87..a2f69af4790 100644 --- a/docs/api/ie_python_api/api.rst +++ b/docs/api/ie_python_api/api.rst @@ -67,12 +67,6 @@ OpenVINO Python API openvino.runtime.passes -.. autosummary:: - :toctree: _autosummary - :template: custom-module-template.rst - - openvino.runtime.utils - .. autosummary:: :toctree: _autosummary :template: custom-module-template.rst @@ -85,12 +79,6 @@ OpenVINO Python API openvino.frontend -.. autosummary:: - :toctree: _autosummary - :template: custom-module-template.rst - - openvino.offline_transformations - .. toctree:: :maxdepth: 2 :hidden: diff --git a/docs/conf.py b/docs/conf.py index 54cd19265c4..92c9fc66f37 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -63,7 +63,8 @@ templates_path = ['_templates'] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'openvino/inference-engine'] +exclude_patterns = ['_build', 'Thumbs.db', + '.DS_Store', 'openvino/inference-engine'] panels_add_bootstrap_css = False @@ -134,6 +135,7 @@ html_static_path = ['_static'] # monkeypatch sphinx api doc to prevent showing inheritance from object and enum.Enum add_line = ClassDocumenter.add_line + def add_line_no_base_object(self, line, *args, **kwargs): if line.strip() in ['Bases: :class:`object`', 'Bases: :class:`enum.Enum`']: return @@ -143,15 +145,30 @@ def add_line_no_base_object(self, line, *args, **kwargs): ClassDocumenter.add_line = add_line_no_base_object +# OpenVINO Python API Reference Configuration +exclude_pyapi_methods = ('__weakref__', + '__doc__', + '__module__', + '__dict__', + 'add_openvino_libs_to_path' + ) + + +def autodoc_skip_member(app, what, name, obj, skip, options): + return name in exclude_pyapi_methods + def setup(app): logger = logging.getLogger(__name__) - app.add_config_value('doxygen_mapping_file', doxygen_mapping_file, rebuild=True) + app.add_config_value('doxygen_mapping_file', + doxygen_mapping_file, rebuild=True) app.add_config_value('repositories', repositories, rebuild=True) + app.connect('autodoc-skip-member', autodoc_skip_member) app.add_js_file('js/custom.js') app.add_js_file('js/graphs.js') app.add_js_file('js/graphs_ov_tf.js') try: - shutil.copytree(os.path.join(app.srcdir, 'csv'), os.path.join(app.outdir, 'csv'), dirs_exist_ok=True) + shutil.copytree(os.path.join(app.srcdir, 'csv'), os.path.join( + app.outdir, 'csv'), dirs_exist_ok=True) except FileNotFoundError: logger.warning('csv directory not found.') From 40fc5334d89d29860f3d00b530a0abc24edc47a9 Mon Sep 17 00:00:00 2001 From: Gorokhov Dmitriy Date: Wed, 2 Mar 2022 15:44:34 +0300 Subject: [PATCH 149/310] [CPU] Fixed number of streams initialization for hint = throughput (#10728) --- src/plugins/intel_cpu/src/plugin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index 49372ce44af..cad41cce996 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -631,7 +631,7 @@ void Engine::ApplyPerformanceHints(std::map &config, c engConfig.perfHintsConfig.ovPerfHintNumRequests); } config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = std::to_string(num_streams); - config[ov::num_streams.name()] = ov::util::to_string(ov::streams::NUMA); + config[ov::num_streams.name()] = ov::util::to_string(num_streams); } } From d3ded2fc3619c487d5762dbe78303cb082d82123 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Wed, 2 Mar 2022 16:01:21 +0300 Subject: [PATCH 150/310] Fixed declaration of 'xxx' hides global declaration (#10733) --- .../include/openvino/runtime/properties.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 7a3e7e0136c..b63d7d04f6f 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -673,27 +673,27 @@ static constexpr Num NUMA{ -2}; //!< Creates as many streams as needed to accommodate NUMA and avoid associated penalties /** @cond INTERNAL */ -inline std::ostream& operator<<(std::ostream& os, const Num& num) { - switch (num) { +inline std::ostream& operator<<(std::ostream& os, const Num& num_val) { + switch (num_val) { case AUTO: return os << "AUTO"; case NUMA: return os << "NUMA"; default: - return os << num.num; + return os << num_val.num; } } -inline std::istream& operator>>(std::istream& is, Num& num) { +inline std::istream& operator>>(std::istream& is, Num& num_val) { std::string str; is >> str; if (str == "AUTO") { - num = AUTO; + num_val = AUTO; } else if (str == "NUMA") { - num = NUMA; + num_val = NUMA; } else { try { - num = {std::stoi(str)}; + num_val = {std::stoi(str)}; } catch (const std::exception& e) { throw ov::Exception{std::string{"Could not read number of streams from str: "} + str + "; " + e.what()}; } From 7cd3c8e86e9be1b09ba0adaca16c1b3647ade81d Mon Sep 17 00:00:00 2001 From: csy0225 <78470701+csy0225@users.noreply.github.com> Date: Wed, 2 Mar 2022 21:31:37 +0800 Subject: [PATCH 151/310] Fix compile problem when open -Wnon-virtual-dtor compile flag (#10705) * Fix compile problem when open -Wnon-virtual-dtor compile flag * update code style * fix the code style --- src/core/include/openvino/core/any.hpp | 2 ++ src/core/include/openvino/op/util/variable_extension.hpp | 3 +++ 2 files changed, 5 insertions(+) diff --git a/src/core/include/openvino/core/any.hpp b/src/core/include/openvino/core/any.hpp index f2c26c0e051..3d98462e8cc 100644 --- a/src/core/include/openvino/core/any.hpp +++ b/src/core/include/openvino/core/any.hpp @@ -541,6 +541,8 @@ class OPENVINO_API Any { template Impl(Args&&... args) : value(std::forward(args)...) {} + virtual ~Impl(){}; + const std::type_info& type_info() const override { return typeid(T); } diff --git a/src/core/include/openvino/op/util/variable_extension.hpp b/src/core/include/openvino/op/util/variable_extension.hpp index 25e8179fd36..53dc533652c 100644 --- a/src/core/include/openvino/op/util/variable_extension.hpp +++ b/src/core/include/openvino/op/util/variable_extension.hpp @@ -38,6 +38,9 @@ public: /// \brief Returns the identifier of corresponding variable. virtual std::string get_variable_id() const = 0; +protected: + virtual ~VariableExtension(){}; + protected: std::shared_ptr m_variable; }; From 42d3893833dae01deb95aaf3ddf5010950fe5eaf Mon Sep 17 00:00:00 2001 From: Nikolay Tyukaev Date: Wed, 2 Mar 2022 17:46:49 +0300 Subject: [PATCH 152/310] doc fixes (#10738) --- docs/doxygen-xfail.txt | 1 + docs/model_zoo.md | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/docs/doxygen-xfail.txt b/docs/doxygen-xfail.txt index d0be8692508..72d9cd0acdc 100644 --- a/docs/doxygen-xfail.txt +++ b/docs/doxygen-xfail.txt @@ -66,3 +66,4 @@ ovms_docs_shape_batch_layout.rst api/api_reference.rst workbench/docs/workbench_dg/key_concepts.md workbench/docs/workbench_dg/run_single_inference.md +omz_tools_downloader.rst diff --git a/docs/model_zoo.md b/docs/model_zoo.md index 2d94c62ef79..6122a054d9a 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -20,6 +20,13 @@ omz_demos +.. toctree:: + :maxdepth: 1 + :hidden: + :caption: Model API + + omz_model_api_ovms_adapter + @endsphinxdirective From 180f15e84c1bb30f4c6222034dc80453882dcb0f Mon Sep 17 00:00:00 2001 From: Maxim Shevtsov Date: Wed, 2 Mar 2022 17:48:01 +0300 Subject: [PATCH 153/310] auto-batching- bare min of the info (#10190) * auto-batching- bare min of the info * renaming BATCH.MD to the automatic_batching.md, also aligned the link to the new naming convention * more info and brushed * added openvino_docs_OV_UG_Automatic_Batching to the main TOC * Apply suggestions from code review Co-authored-by: Tatiana Savina * close on the comments, added the code examples * Apply suggestions from code review Co-authored-by: Tatiana Savina * Update example * Update format * Update docs format * added couple of more perf considerations * more code examples * Apply suggestions from code review * Apply the rest from code review * Update header Co-authored-by: Tatiana Savina --- docs/OV_Runtime_UG/automatic_batching.md | 107 ++++++++++++++++++ docs/OV_Runtime_UG/openvino_intro.md | 1 + .../supported_plugins/Device_Plugins.md | 3 +- docs/snippets/ov_auto_batching.cpp | 41 +++++++ docs/snippets/ov_auto_batching.py | 41 +++++++ 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 docs/OV_Runtime_UG/automatic_batching.md create mode 100644 docs/snippets/ov_auto_batching.cpp create mode 100644 docs/snippets/ov_auto_batching.py diff --git a/docs/OV_Runtime_UG/automatic_batching.md b/docs/OV_Runtime_UG/automatic_batching.md new file mode 100644 index 00000000000..eb64330198e --- /dev/null +++ b/docs/OV_Runtime_UG/automatic_batching.md @@ -0,0 +1,107 @@ +# Automatic Batching {#openvino_docs_OV_UG_Automatic_Batching} + +## (Automatic) Batching Execution + +The Automatic-Batching is a preview of the new functionality in the OpenVINO™ toolkit. It performs on-the-fly automatic batching (i.e. grouping inference requests together) to improve device utilization, with no programming effort from the user. +Inputs gathering and outputs scattering from the individual inference requests required for the batch happen transparently, without affecting the application code. + +The feature primarily targets existing code written for inferencing many requests (each instance with the batch size 1). To obtain corresponding performance improvements, the application must be *running many inference requests simultaneously*. +As explained below, the auto-batching functionality can be also used via a special *virtual* device. + +Batching is a straightforward way of leveraging the GPU compute power and saving on communication overheads. The automatic batching is _implicitly_ triggered on the GPU when the `ov::hint::PerformanceMode::THROUGHPUT` is specified for the `ov::hint::performance_mode` property for the compile_model or set_property calls. +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp + :language: cpp + :fragment: [compile_model] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_auto_batching.py + :language: python + :fragment: [compile_model] + +@endsphinxdirective +> **NOTE**: You can disable the Auto-Batching (for example, for the GPU device) from being triggered by the `ov::hint::PerformanceMode::THROUGHPUT`. To do that, pass the `ov::hint::allow_auto_batching` set to **false** in addition to the `ov::hint::performance_mode`: +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp + :language: cpp + :fragment: [compile_model_no_auto_batching] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_auto_batching.py + :language: python + :fragment: [compile_model_no_auto_batching] + +@endsphinxdirective + + +Alternatively, to enable the Auto-Batching in the legacy apps not akin to the notion of the performance hints, you may need to use the **explicit** device notion, such as 'BATCH:GPU'. In both cases (the *throughput* hint or explicit BATCH device), the optimal batch size selection happens automatically. The actual value depends on the model and device specifics, for example, on-device memory for the dGPUs. + +This _automatic batch size selection_ assumes that the application queries the `ov::optimal_number_of_infer_requests` to create and run the returned number of requests simultaneously: +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp + :language: cpp + :fragment: [query_optimal_num_requests] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_auto_batching.py + :language: python + :fragment: [query_optimal_num_requests] + +@endsphinxdirective +If not enough inputs were collected, the `timeout` value makes the transparent execution fall back to the execution of individual requests. Configuration-wise, this is the AUTO_BATCH_TIMEOUT property. +The timeout, which adds itself to the execution time of the requests, heavily penalizes the performance. To avoid this, in cases when your parallel slack is bounded, give the OpenVINO an additional hint. + +For example, the application processes only 4 video streams, so there is no need to use a batch larger than 4. The most future-proof way to communicate the limitations on the parallelism is to equip the performance hint with the optional `ov::hint::num_requests` configuration key set to 4. For the GPU this will limit the batch size, for the CPU - the number of inference streams, so each device uses the `ov::hint::num_requests` while converting the hint to the actual device configuration options: +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_auto_batching.cpp + :language: cpp + :fragment: [hint_num_requests] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_auto_batching.py + :language: python + :fragment: hint_num_requests] + +@endsphinxdirective + +For the *explicit* usage, you can limit the batch size using "BATCH:GPU(4)", where 4 is the number of requests running in parallel. + +### Other Performance Considerations + +To achieve the best performance with the Automatic Batching, the application should: + - Operate the number of inference requests that represents the multiple of the batch size. In the above example, for batch size 4, the application should operate 4, 8, 12, 16, etc. requests. + - Use the requests, grouped by the batch size, together. For example, the first 4 requests are inferred, while the second group of the requests is being populated. + +The following are limitations of the current implementations: + - Although less critical for the throughput-oriented scenarios, the load-time with auto-batching increases by almost 2x. + - Certain networks are not reshape-able by the "batching" dimension (specified as 'N' in the layouts terms) or if the dimension is not zero-th, the auto-batching is not triggered. + - Performance improvements happen at the cost of the memory footprint growth, yet the auto-batching queries the available memory (especially for the dGPUs) and limits the selected batch size accordingly. + + + +### Configuring the Automatic Batching +Following the OpenVINO convention for devices names, the *batching* device is named *BATCH*. The configuration options are as follows: + +| Parameter name | Parameter description | Default | Examples | +| :--- | :--- | :--- |:-----------------------------------------------------------------------------| +| "AUTO_BATCH_DEVICE" | Device name to apply the automatic batching and optional batch size in brackets | N/A | BATCH:GPU which triggers the automatic batch size selection or explicit batch size BATCH:GPU(4) | +| "AUTO_BATCH_TIMEOUT" | timeout value, in ms | 1000 | you can reduce the timeout value (to avoid performance penalty when the data arrives too non-evenly) e.g. pass the "100", or in contrast make it large enough e.g. to accommodate inputs preparation (e.g. when it is serial process) | + +### See Also +[Supported Devices](supported_plugins/Supported_Devices.md) \ No newline at end of file diff --git a/docs/OV_Runtime_UG/openvino_intro.md b/docs/OV_Runtime_UG/openvino_intro.md index fd8e88f8c15..10aeb252855 100644 --- a/docs/OV_Runtime_UG/openvino_intro.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -18,6 +18,7 @@ openvino_docs_IE_DG_supported_plugins_AUTO openvino_docs_OV_UG_Running_on_multiple_devices openvino_docs_OV_UG_Hetero_execution + openvino_docs_OV_UG_Automatic_Batching openvino_docs_IE_DG_network_state_intro openvino_2_0_transition_guide openvino_docs_OV_Should_be_in_performance diff --git a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md index 7ad6fbed4b8..30ba9de15ba 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md +++ b/docs/OV_Runtime_UG/supported_plugins/Device_Plugins.md @@ -29,7 +29,8 @@ OpenVINO runtime also has several execution capabilities which work on top of ot |------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------| |[Multi-Device execution](../multi_device.md) |Multi-Device enables simultaneous inference of the same model on several devices in parallel | |[Auto-Device selection](../auto_device_selection.md) |Auto-Device selection enables selecting Intel® device for inference automatically | -|[Heterogeneous execution](../hetero_execution.md) |Heterogeneous execution enables automatic inference splitting between several devices (for example if a device doesn't [support certain operation](#supported-layers)). | +|[Heterogeneous execution](../hetero_execution.md) |Heterogeneous execution enables automatic inference splitting between several devices (for example if a device doesn't [support certain operation](#supported-layers))| +|[Automatic Batching](../automatic_batching.md) | Auto-Batching plugin enables the batching (on top of the specified device) that is completely transparent to the application | Devices similar to the ones we have used for benchmarking can be accessed using [Intel® DevCloud for the Edge](https://devcloud.intel.com/edge/), a remote development environment with access to Intel® hardware and the latest versions of the Intel® Distribution of the OpenVINO™ Toolkit. [Learn more](https://devcloud.intel.com/edge/get_started/devcloud/) or [Register here](https://inteliot.force.com/DevcloudForEdge/s/). diff --git a/docs/snippets/ov_auto_batching.cpp b/docs/snippets/ov_auto_batching.cpp new file mode 100644 index 00000000000..1e943b3c8f5 --- /dev/null +++ b/docs/snippets/ov_auto_batching.cpp @@ -0,0 +1,41 @@ +#include + +int main() { + ov::Core core; + auto model = core.read_model("sample.xml"); + +//! [compile_model] +{ + auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); +} +//! [compile_model] + +//! [compile_model_no_auto_batching] +{ + // disabling the automatic batching + // leaving intact other configurations options that the device selects for the 'throughput' hint + auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::allow_auto_batching(false)}); +} +//! [compile_model_no_auto_batching] + +//! [query_optimal_num_requests] +{ + // when the batch size is automatically selected by the implementation + // it is important to query/create and run the sufficient #requests + auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); + auto num_requests = compiled_model.get_property(ov::optimal_number_of_infer_requests); +} +//! [query_optimal_num_requests] + +//! [hint_num_requests] +{ + // limiting the available parallel slack for the 'throughput' hint via the ov::hint::num_requests + // so that certain parameters (like selected batch size) are automatically accommodated accordingly + auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::num_requests(4)}); +} +//! [hint_num_requests] + + return 0; +} diff --git a/docs/snippets/ov_auto_batching.py b/docs/snippets/ov_auto_batching.py new file mode 100644 index 00000000000..1e943b3c8f5 --- /dev/null +++ b/docs/snippets/ov_auto_batching.py @@ -0,0 +1,41 @@ +#include + +int main() { + ov::Core core; + auto model = core.read_model("sample.xml"); + +//! [compile_model] +{ + auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); +} +//! [compile_model] + +//! [compile_model_no_auto_batching] +{ + // disabling the automatic batching + // leaving intact other configurations options that the device selects for the 'throughput' hint + auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::allow_auto_batching(false)}); +} +//! [compile_model_no_auto_batching] + +//! [query_optimal_num_requests] +{ + // when the batch size is automatically selected by the implementation + // it is important to query/create and run the sufficient #requests + auto compiled_model = core.compile_model(model, "GPU", ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)); + auto num_requests = compiled_model.get_property(ov::optimal_number_of_infer_requests); +} +//! [query_optimal_num_requests] + +//! [hint_num_requests] +{ + // limiting the available parallel slack for the 'throughput' hint via the ov::hint::num_requests + // so that certain parameters (like selected batch size) are automatically accommodated accordingly + auto compiled_model = core.compile_model(model, "GPU", {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT), + ov::hint::num_requests(4)}); +} +//! [hint_num_requests] + + return 0; +} From bea352f2722191419027015dd768c58e141f7764 Mon Sep 17 00:00:00 2001 From: Nesterov Alexander Date: Wed, 2 Mar 2022 18:00:32 +0300 Subject: [PATCH 154/310] Update Linux Azure CI (#10739) --- .ci/azure/linux_arm64.yml | 200 ++++++++++++++++++++++++++++---------- 1 file changed, 146 insertions(+), 54 deletions(-) diff --git a/.ci/azure/linux_arm64.yml b/.ci/azure/linux_arm64.yml index 6d9b68f1045..8197cf1a8da 100644 --- a/.ci/azure/linux_arm64.yml +++ b/.ci/azure/linux_arm64.yml @@ -26,16 +26,28 @@ jobs: system.debug: true VSTS_HTTP_RETRY: 5 VSTS_HTTP_TIMEOUT: 200 + PYTHON_ARM_VERSION: "3.8.12" + PYTHON_EXEC: "python3.8" + OPENVINO_ARCH: 'aarch64' + NUM_PROC: 1 BUILD_TYPE: Release OPENVINO_REPO_DIR: $(Build.Repository.LocalPath) OPENVINO_CONTRIB_REPO_DIR: $(OPENVINO_REPO_DIR)/../openvino_contrib OPENCV_REPO_DIR: $(OPENVINO_REPO_DIR)/../opencv + BUILD_PYTHON: $(WORK_DIR)/build_python + BUILD_OPENCV: $(WORK_DIR)/build_opencv + BUILD_OPENVINO: $(WORK_DIR)/build + BUILD_OPENVINO_PYTHON: $(WORK_DIR)/build_python + BUILD_OPEN_MODEL_ZOO: $(WORK_DIR)/build_open_model_zoo + INSTALL_OPENVINO: $(WORK_DIR)/install_openvino + INSTALL_PYTHON: $(INSTALL_OPENVINO)/extras/python + INSTALL_OPENCV: $(INSTALL_OPENVINO)/extras/opencv + INSTALL_OPEN_MODEL_ZOO: $(INSTALL_OPENVINO)/extras/open_model_zoo WORK_DIR: $(Pipeline.Workspace)/_w - BUILD_DIR: $(WORK_DIR)/build - BUILD_DIR_OPENCV: $(WORK_DIR)/build_opencv - TMP_DIR: /mnt/tmp SHARE_DIR: /mount/cinfsshare/onnxtestdata - CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64 + TMP_DIR: /mnt/tmp + OPENVINO_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64 + OPENCV_CCACHE_DIR: $(SHARE_DIR)/ccache/master/linux_arm64_opencv steps: - script: | @@ -56,17 +68,21 @@ jobs: df lsblk -o NAME,HCTL,SIZE,MOUNTPOINT | grep -i "sd" free -h + echo "##vso[task.setvariable variable=NUM_PROC]$(nproc --all)" + echo "NUM_PROC=$(NUM_PROC)" displayName: 'System information' - script: | rm -rf $(WORK_DIR) ; mkdir $(WORK_DIR) - rm -rf $(BUILD_DIR) ; mkdir $(BUILD_DIR) + mkdir -p $(BUILD_OPENCV) $(BUILD_OPENVINO) $(BUILD_OPENVINO_PYTHON) $(BUILD_PYTHON) $(BUILD_OPEN_MODEL_ZOO) + mkdir -p $(INSTALL_OPENVINO) $(INSTALL_PYTHON) $(INSTALL_OPENCV) $(INSTALL_OPEN_MODEL_ZOO) sudo rm -rf $(TMP_DIR) ; sudo mkdir $(TMP_DIR) ; sudo chmod 777 -R $(TMP_DIR) sudo mkdir -p $(SHARE_DIR) sudo apt --assume-yes update && sudo apt --assume-yes install nfs-common sudo mount -vvv -t nfs cinfsshare.file.core.windows.net:/cinfsshare/onnxtestdata $(SHARE_DIR) -o vers=4,minorversion=1,sec=sys - mkdir -p $(CCACHE_DIR) - displayName: 'Make directory' + mkdir -p $(OPENVINO_CCACHE_DIR) + mkdir -p $(OPENCV_CCACHE_DIR) + displayName: 'Make directories' - checkout: self clean: true @@ -83,16 +99,25 @@ jobs: - script: | set -e $(OPENVINO_REPO_DIR)/install_build_dependencies.sh - # Move into contrib install_build_dependencies.sh - sudo apt --assume-yes install scons crossbuild-essential-arm64 libprotoc-dev protobuf-compiler - # OpenCV should provide install_build_dependencies.sh as well - # Move into resources - git clone https://github.com/opencv/opencv.git --depth 1 $(OPENCV_REPO_DIR) - # Speed up build - wget https://github.com/ninja-build/ninja/releases/download/v1.10.2/ninja-linux.zip - unzip ninja-linux.zip - sudo cp -v ninja /usr/local/bin/ - workingDirectory: $(WORK_DIR) + export CCACHE_DIR=$(OPENCV_CCACHE_DIR) + export CCACHE_TEMPDIR=$(TMP_DIR)/ccache + export CCACHE_BASEDIR=$(Pipeline.Workspace) + export CCACHE_MAXSIZE=50G + export USE_CCACHE=1 + export PYTHON_ARM_VERSION=$(PYTHON_ARM_VERSION) + export NUM_PROC=$(NUM_PROC) + export BUILD_PYTHON=$(BUILD_PYTHON) + export WORK_DIR=$(WORK_DIR) + export INSTALL_PYTHON=$(INSTALL_PYTHON) + export BUILD_TYPE=$(BUILD_TYPE) + export OPENVINO_REPO_DIR=$(OPENVINO_REPO_DIR) + export INSTALL_OPENCV=$(INSTALL_OPENCV) + export PYTHON_EXEC=$(PYTHON_EXEC) + export OPENCV_REPO_DIR=$(OPENCV_REPO_DIR) + export BUILD_OPENCV=$(BUILD_OPENCV) + export INSTALL_OPENVINO=$(INSTALL_OPENVINO) + $(OPENVINO_CONTRIB_REPO_DIR)/modules/arm_plugin/scripts/install_build_dependencies.sh + workingDirectory: $(BUILD_OPENVINO) displayName: 'Install dependencies' - task: CMake@1 @@ -100,30 +125,21 @@ jobs: cmakeArgs: > -GNinja -DVERBOSE_BUILD=ON + -DOpenCV_DIR=$(INSTALL_OPENCV)/cmake + -DENABLE_OPENCV=OFF + -DPYTHON_INCLUDE_DIRS=$(INSTALL_PYTHON)/include/python3.8 + -DPYTHON_LIBRARY=$(INSTALL_PYTHON)/lib/libpython3.8.so + -DENABLE_PYTHON=ON + -DPYTHON_MODULE_EXTENSION=".so" + -DENABLE_TESTS=ON + -DENABLE_FUNCTIONAL_TESTS=ON + -DENABLE_GAPI_TESTS=OFF + -DENABLE_GAPI_PREPROCESSING=OFF + -DENABLE_DATA=OFF + -DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,$(INSTALL_OPENCV)/lib + -DTHREADING=SEQ -DENABLE_LTO=ON + -DCMAKE_TOOLCHAIN_FILE=$(OPENVINO_REPO_DIR)/cmake/arm64.toolchain.cmake -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) - -DBUILD_LIST=imgcodecs,videoio,highgui - -DCMAKE_TOOLCHAIN_FILE=$(OPENCV_REPO_DIR)/platforms/linux/aarch64-gnu.toolchain.cmake - $(OPENCV_REPO_DIR) - workingDirectory: $(BUILD_DIR_OPENCV) - - - script: ninja - workingDirectory: $(BUILD_DIR_OPENCV) - displayName: 'Build OpenCV Linux ARM64' - - - script: ninja install - workingDirectory: $(BUILD_DIR_OPENCV) - displayName: 'Install OpenCV Linux ARM64' - - - task: CMake@1 - inputs: - cmakeArgs: > - -GNinja - -DVERBOSE_BUILD=ON - -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) - -DCMAKE_TOOLCHAIN_FILE=$(OPENVINO_REPO_DIR)/cmake/arm64.toolchain.cmake - -DOpenCV_DIR=$(BUILD_DIR_OPENCV)/install/lib/cmake/opencv4 - -DENABLE_OPENCV=OFF - -DENABLE_TESTS=ON -DENABLE_SAMPLES=ON -DBUILD_java_api=OFF -DENABLE_INTEL_MYRIAD=OFF @@ -131,26 +147,102 @@ jobs: -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DARM_COMPUTE_SCONS_JOBS=$(NUM_PROC) + -DOUTPUT_ROOT=$(INSTALL_OPENVINO) + -DCMAKE_INSTALL_PREFIX=$(INSTALL_OPENVINO) $(OPENVINO_REPO_DIR) - workingDirectory: $(BUILD_DIR) - - - script: ls -alR $(OPENVINO_REPO_DIR)/temp/ - displayName: 'List temp SDKs' - - - script: ccache --zero-stats --max-size=50G --show-config - displayName: 'Clean ccache stats' + workingDirectory: $(BUILD_OPENVINO) + displayName: 'CMake OpenVINO ARM plugin' - script: | - export CCACHE_DIR=$(CCACHE_DIR) + export CCACHE_DIR=$(OPENVINO_CCACHE_DIR) export CCACHE_TEMPDIR=$(TMP_DIR)/ccache export CCACHE_BASEDIR=$(Pipeline.Workspace) export CCACHE_MAXSIZE=50G + export USE_CCACHE=1 ninja - workingDirectory: $(BUILD_DIR) - displayName: 'Build Linux ARM64' + workingDirectory: $(BUILD_OPENVINO) + displayName: 'Build OpenVINO ARM plugin' - - script: ccache --show-stats - displayName: 'Show ccache stats' + - script: ninja install + workingDirectory: $(BUILD_OPENVINO) + displayName: 'Install OpenVINO ARM plugin' - - script: ls -alR $(OPENVINO_REPO_DIR)/bin/ - displayName: 'List binary files' + - task: CMake@1 + inputs: + cmakeArgs: > + -GNinja + -DInferenceEngineDeveloperPackage_DIR=$(BUILD_OPENVINO) + -DENABLE_PYTHON=ON + -DPYTHON_EXECUTABLE=$(INSTALL_PYTHON)/bin/python3.8 + -DPYTHON_INCLUDE_DIRS=$(INSTALL_PYTHON)/include/python3.8 + -DPYTHON_LIBRARIES=$(INSTALL_PYTHON)/lib + -DPYTHON3_NUMPY_INCLUDE_DIRS=/usr/local/lib/python3.8/site-packages/numpy/core/include + -DPYTHON_MODULE_EXTENSION=".so" + -DPYBIND11_FINDPYTHON=OFF + -DPYBIND11_NOPYTHON=OFF + -DPYTHONLIBS_FOUND=TRUE + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) + -DENABLE_DATA=OFF + -DCMAKE_EXE_LINKER_FLAGS=-Wl,-rpath-link,$(INSTALL_OPENCV)/lib + -DCMAKE_TOOLCHAIN_FILE=$(OPENVINO_REPO_DIR)/cmake/arm64.toolchain.cmake + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache + -DCMAKE_C_COMPILER_LAUNCHER=ccache + -DCMAKE_INSTALL_PREFIX=$(INSTALL_OPENVINO) + $(OPENVINO_REPO_DIR)/src/bindings/python + workingDirectory: $(BUILD_OPENVINO_PYTHON) + displayName: 'CMake OpenVINO python binding' + + - script: | + export CCACHE_DIR=$(OPENVINO_CCACHE_DIR) + export CCACHE_TEMPDIR=$(TMP_DIR)/ccache + export CCACHE_BASEDIR=$(Pipeline.Workspace) + export CCACHE_MAXSIZE=50G + export USE_CCACHE=1 + ninja + workingDirectory: $(BUILD_OPENVINO_PYTHON) + displayName: 'Build OpenVINO python binding' + + - script: ninja install + workingDirectory: $(BUILD_OPENVINO_PYTHON) + displayName: 'Install OpenVINO python binding' + + - task: CMake@1 + inputs: + cmakeArgs: > + -GNinja + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) + -DENABLE_PYTHON=ON + -DPYTHON_EXECUTABLE=/usr/local/bin/python3.8 + -DPYTHON_INCLUDE_DIR=$(INSTALL_PYTHON)/include/python3.8 + -DPYTHON_LIBRARY=$(INSTALL_PYTHON)/lib + -DCMAKE_TOOLCHAIN_FILE=$(OPENVINO_REPO_DIR)/cmake/arm64.toolchain.cmake + -DOpenVINO_DIR=$(BUILD_OPENVINO) + -DInferenceEngine_DIR=$(BUILD_OPENVINO) + -DOpenCV_DIR=$(INSTALL_OPENCV)/cmake + -Dngraph_DIR=$(BUILD_OPENVINO) + -DIE_EXTRA_MODULES=$(OPENVINO_CONTRIB_REPO_DIR)/modules + -DCMAKE_INSTALL_PREFIX=$(INSTALL_OPEN_MODEL_ZOO) + $(OPENVINO_REPO_DIR)/thirdparty/open_model_zoo/demos + workingDirectory: $(BUILD_OPEN_MODEL_ZOO) + displayName: 'CMake Open Model Zoo demos' + + - script: ninja + workingDirectory: $(BUILD_OPEN_MODEL_ZOO) + displayName: 'Build Open Model Zoo demos' + + - script: ninja install + workingDirectory: $(BUILD_OPEN_MODEL_ZOO) + displayName: 'Install Open Model Zoo demos' + + - script: | + cp -r $(BUILD_OPEN_MODEL_ZOO)/$(OPENVINO_ARCH)/$(BUILD_TYPE)/* $(INSTALL_OPEN_MODEL_ZOO)/ + zip -9 -r $(Build.ArtifactStagingDirectory)/openvino_$(OPENVINO_ARCH)_linux.zip ./* + workingDirectory: $(INSTALL_OPENVINO) + displayName: 'Create OpenVINO ARM64 linux package' + + - task: PublishBuildArtifacts@1 + inputs: + pathToPublish: $(Build.ArtifactStagingDirectory) + artifactName: 'openvino_aarch64_linux' + displayName: 'Publish OpenVINO AArch64 linux package' From 4b55ef991166c9e41734b561bd3d986a4210c4ca Mon Sep 17 00:00:00 2001 From: Evgenya Stepyreva Date: Wed, 2 Mar 2022 19:16:34 +0300 Subject: [PATCH 155/310] Static Shape constraints removed from Interpolate 1->4 transformation (#10732) * Static Shape constraints removed from Interpolate 1->4 transformation * Dynamic tests added --- .../convert_interpolate1_to_interpolate4.cpp | 31 +++++++++---------- ...vert_interpolate1_to_interpolate4_test.cpp | 25 +++++++++++++++ 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp b/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp index 1b9202fdd3f..de0feeb8dcb 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp @@ -9,40 +9,37 @@ #include #include +#include #include #include #include +#include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertInterpolate1ToInterpolate4, "ConvertInterpolate1ToInterpolate4", 0); ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolate4() { MATCHER_SCOPE(ConvertInterpolate1ToInterpolate4); - auto interpolate1 = ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape()), pattern::any_input()}); + auto interpolate1 = ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_rank()), pattern::any_input()}); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto interpolationV0 = std::dynamic_pointer_cast(m.get_match_root()); if (!interpolationV0) { return false; } - auto& inp_partial_shape = interpolationV0->get_input_partial_shape(0); - auto& out_shape = interpolationV0->get_output_shape(0); auto attrsV0 = interpolationV0->get_attrs(); + std::vector axes{attrsV0.axes.begin(), attrsV0.axes.end()}; + const auto& out_dims = std::make_shared(interpolationV0->input_value(1), element::f32); + const auto& in_dims = std::make_shared(ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_source( + interpolationV0->input_value(0), axes), element::f32); - std::vector scales(attrsV0.axes.size(), 1.0f); - auto inp_shape = inp_partial_shape.to_shape(); - size_t i = 0; - for (std::size_t axis : attrsV0.axes) { - scales[i] = static_cast(out_shape.at(axis))/inp_shape.at(axis); - i++; - } - - auto input_shape_rank = inp_partial_shape.rank().get_length(); - auto scalesConstant = ngraph::op::Constant::create(ngraph::element::f32, {scales.size()}, scales); - auto axisConstant = ngraph::op::Constant::create(ngraph::element::i64, {attrsV0.axes.size()}, - std::vector{attrsV0.axes.begin(), attrsV0.axes.end()}); + std::shared_ptr scales = std::make_shared(out_dims, in_dims); + if (const auto& constant = ov::get_constant_from_source(scales)) + scales = constant; + auto axisConstant = ngraph::op::Constant::create(ngraph::element::i64, {axes.size()}, axes); ngraph::opset4::Interpolate::InterpolateAttrs attrsV4; - + auto input_shape_rank = interpolationV0->get_input_partial_shape(0).rank().get_length(); if (attrsV0.mode == "nearest") { attrsV4.mode = ngraph::opset4::Interpolate::InterpolateMode::NEAREST; } else if (attrsV0.mode == "linear") { @@ -85,7 +82,7 @@ ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolat } auto interpolateV4 = std::make_shared(interpolationV0->input_value(0), interpolationV0->input_value(1), - scalesConstant, axisConstant, attrsV4); + scales, axisConstant, attrsV4); interpolateV4->set_friendly_name(interpolationV0->get_friendly_name()); ngraph::copy_runtime_info(interpolationV0, interpolateV4); diff --git a/src/tests/functional/inference_engine/transformations/op_conversions/convert_interpolate1_to_interpolate4_test.cpp b/src/tests/functional/inference_engine/transformations/op_conversions/convert_interpolate1_to_interpolate4_test.cpp index 6650baffea6..8894b4c631e 100644 --- a/src/tests/functional/inference_engine/transformations/op_conversions/convert_interpolate1_to_interpolate4_test.cpp +++ b/src/tests/functional/inference_engine/transformations/op_conversions/convert_interpolate1_to_interpolate4_test.cpp @@ -56,6 +56,7 @@ TEST_F(TransformationTestsF, ConvertInterpolate1ToInterpolate4) { function_ref = std::make_shared(NodeVector{interpolate4}, ParameterVector{data_node}); } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); } TEST_F(TransformationTestsF, ConvertInterpolate1ToInterpolate4_1) { @@ -93,4 +94,28 @@ TEST_F(TransformationTestsF, ConvertInterpolate1ToInterpolate4_1) { function_ref = std::make_shared(NodeVector{interpolate4}, ParameterVector{data_node}); } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); } + +TEST(TransformationTests, DynamiShapeInterpolate1To4) { + auto data_node = std::make_shared(element::f32, PartialShape{-1, 5, {1, 10}, -1}); + auto out_shape_node = std::make_shared(element::i32, Shape{2}); + + auto interpolate1_attr = op::v0::InterpolateAttrs(); + interpolate1_attr.axes = AxisSet(std::vector{2, 3}); + interpolate1_attr.mode = "linear"; + interpolate1_attr.align_corners = false; + interpolate1_attr.antialias = true; + interpolate1_attr.pads_begin = std::vector{0, 0, 0, 0}; + interpolate1_attr.pads_end = std::vector{0, 0, 0, 0}; + + auto interpolate1 = std::make_shared(data_node, out_shape_node, interpolate1_attr); + auto f = std::make_shared(NodeVector{interpolate1}, ParameterVector{data_node, out_shape_node}); + + auto manager = ov::pass::Manager(); + manager.register_pass(); + manager.register_pass(); + manager.run_passes(f); + + ASSERT_TRUE(ngraph::op::util::has_op_with_type(f)); +} \ No newline at end of file From 24a5aab5014a41db51c123e126f4209e8c45b897 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Thu, 3 Mar 2022 01:27:32 +0900 Subject: [PATCH 156/310] Fixed bug: When external id of a loop is fused, the i/o map of a loop should be updated (#10726) --- src/plugins/intel_gpu/src/graph/program_node.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index a73f0833d8a..71a8beb99c7 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -5,7 +5,7 @@ #include "program_node.h" #include "program_helpers.h" #include "primitive_inst.h" - +#include "loop_inst.h" #ifdef ENABLE_ONEDNN_FOR_GPU #include "convolution_inst.h" #include "quantize_inst.h" @@ -37,6 +37,11 @@ void program_node::replace_dependency(size_t idx, program_node& new_dep) { if (dependencies[idx] == &new_dep) return; + if (is_type()) { + loop_node& loop = *this; + loop.update_primitive_map(dependencies[idx]->id(), new_dep.id(), true); + } + auto it = std::find(dependencies[idx]->users.begin(), dependencies[idx]->users.end(), this); if (it != dependencies[idx]->users.end()) { dependencies[idx]->users.erase(it); From 21185189d82c92ca018d906058325737975ebd82 Mon Sep 17 00:00:00 2001 From: Maxim Shevtsov Date: Wed, 2 Mar 2022 19:45:42 +0300 Subject: [PATCH 157/310] adding 2.0 config param for auto_batch_timeout and the tests (#10719) --- .../ov_executable_network/properties.cpp | 8 +++++--- .../include/openvino/runtime/properties.hpp | 6 ++++++ .../ov_executable_network/properties.cpp | 9 ++++----- .../behavior/plugin/auto_batching_tests.cpp | 19 +++++++++++++++++++ .../ov_executable_network/properties.cpp | 4 ++-- 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp index 8a7e41278f8..10ba97613de 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/ov_executable_network/properties.cpp @@ -29,6 +29,8 @@ const std::vector auto_inproperties = { const std::vector auto_batch_inproperties = { {ov::device::id("UNSUPPORTED_DEVICE_ID_STRING")}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_TEMPLATE) + "(4)"}, + {ov::auto_batch_timeout(-1)}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesIncorrectTests, @@ -89,9 +91,9 @@ const std::vector multi_properties = { }; const std::vector auto_batch_properties = { - {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_TEMPLATE}}, - {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_TEMPLATE}, - {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_TEMPLATE) + "(4)"}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_TEMPLATE) + "(4)"}, {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_TEMPLATE) + "(4)"}, {ov::auto_batch_timeout(10)}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesTests, diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index b63d7d04f6f..90a5a8e8e68 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -462,6 +462,12 @@ static constexpr Property optimal_batch_si */ static constexpr Property max_batch_size{"MAX_BATCH_SIZE"}; +/** + * @brief Read-write property to set the timeout used to collect the inputs for the auto-batching + * impact. + */ +static constexpr Property auto_batch_timeout{"AUTO_BATCH_TIMEOUT"}; + /** * @brief Read-only property to provide a hint for a range for number of async infer requests. If device supports * streams, the metric provides range for number of IRs per stream. diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp index 4b6066fb8b5..22705a5d63f 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_executable_network/properties.cpp @@ -30,6 +30,7 @@ const std::vector auto_inproperties = { const std::vector auto_batch_inproperties = { {ov::num_streams(-100)}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_CPU) + "(4)"}, {ov::auto_batch_timeout(-1)}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesIncorrectTests, @@ -107,11 +108,9 @@ const std::vector multi_properties = { }; const std::vector auto_batch_properties = { - {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, ov::num_streams(ov::streams::AUTO)}, - {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}}, - {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , CommonTestUtils::DEVICE_CPU}, - {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}, ov::num_streams(ov::streams::AUTO)}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_CPU) + "(4)"}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_CPU) + "(4)"}, {CONFIG_KEY(AUTO_BATCH_TIMEOUT) , "1"}}, + {{CONFIG_KEY(AUTO_BATCH_DEVICE_CONFIG) , std::string(CommonTestUtils::DEVICE_CPU) + "(4)"}, {ov::auto_batch_timeout(10)}}, }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVCompiledModelPropertiesTests, diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp index babdb0416ed..a67ef66b2ac 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/auto_batching_tests.cpp @@ -3,6 +3,7 @@ // #include "behavior/plugin/auto_batching_tests.hpp" #include "behavior/plugin/configuration_tests.hpp" +#include "openvino/runtime/properties.hpp" const std::vector num_streams{ 2 }; @@ -40,4 +41,22 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(DefaultParameter{CONFIG_KEY(AUTO_BATCH_TIMEOUT), InferenceEngine::Parameter{"1000"}})), DefaultConfigurationTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_AutoBatching_GPU_2_0_string, + DefaultConfigurationTest, + ::testing::Combine( + ::testing::Values(std::string(CommonTestUtils::DEVICE_BATCH) + ":" + CommonTestUtils::DEVICE_GPU), + ::testing::Values(DefaultParameter{ov::auto_batch_timeout.name(), + InferenceEngine::Parameter{"1000"}})), + DefaultConfigurationTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_AutoBatching_GPU_2_0_uint, + DefaultConfigurationTest, + ::testing::Combine( + ::testing::Values(std::string(CommonTestUtils::DEVICE_BATCH) + ":" + CommonTestUtils::DEVICE_GPU), + ::testing::Values(DefaultParameter{ov::auto_batch_timeout.name(), + InferenceEngine::Parameter{1000}})), + DefaultConfigurationTest::getTestCaseName); } // namespace AutoBatchingTests \ No newline at end of file diff --git a/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp b/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp index 6263ea0ed1c..7068000f690 100644 --- a/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp +++ b/src/tests/functional/plugin/shared/src/behavior/ov_executable_network/properties.cpp @@ -17,7 +17,7 @@ std::string OVCompiledModelEmptyPropertiesTests::getTestCaseName(testing::TestPa void OVCompiledModelEmptyPropertiesTests::SetUp() { SKIP_IF_CURRENT_TEST_IS_DISABLED() device_name = this->GetParam(); - model = ngraph::builder::subgraph::makeConvPoolRelu(); + model = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(device_name); } std::string OVCompiledModelPropertiesTests::getTestCaseName(testing::TestParamInfo obj) { @@ -35,7 +35,7 @@ std::string OVCompiledModelPropertiesTests::getTestCaseName(testing::TestParamIn void OVCompiledModelPropertiesTests::SetUp() { SKIP_IF_CURRENT_TEST_IS_DISABLED(); std::tie(device_name, properties) = this->GetParam(); - model = ngraph::builder::subgraph::makeConvPoolRelu(); + model = ov::test::behavior::getDefaultNGraphFunctionForTheDevice(device_name); } void OVCompiledModelPropertiesTests::TearDown() { From d670e77d976a98d67b2d24ab4a004fc54751da22 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Wed, 2 Mar 2022 20:07:52 +0300 Subject: [PATCH 158/310] Docs: Changed OpenVINO Runtime User Guide integration (#10187) * Changed C++ OpenVINO Runtime User Guide integration * Remove IE from C++ guide * Fixed comments * Additional fix * Fixed some comments * Some new documents * Fixed some comments * Added Python snippets * Added sphinx tabs * Removed tabs * Removed group-tab * Added additional lines * Fixed typo * Fixed comments and build * Try to fix complex tabs * Fixed some typos * Added python code for model representation * Added more python code * Added serialize/visualize python examples * Simplify integration pipeline * Fixed typo * Try to fix tabs * Extend CompiledModel guide * Resolve merge conflict * Added separate infer request guide * Fixed build * Added cancel infer request method * Update docs/snippets/ov_model_snippets.py Co-authored-by: Jan Iwaszkiewicz * Fixed comments * Fixed typo * Extend visualize pass * Fixed comments * Fixed build * Fixed typo * Update docs/snippets/ov_infer_request.py Co-authored-by: Jan Iwaszkiewicz * Update docs/snippets/ov_infer_request.py Co-authored-by: Jan Iwaszkiewicz * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/integrate_with_your_application.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/model_representation.md Co-authored-by: Andrey Zaytsev * Update docs/OV_Runtime_UG/model_representation.md Co-authored-by: Andrey Zaytsev * Fixed comments * Fixed doc * Fixed merge Co-authored-by: Jan Iwaszkiewicz Co-authored-by: Andrey Zaytsev --- README.md | 8 +- docs/CMakeLists.txt | 4 +- ...grate_with_customer_application_new_API.md | 490 ------------------ docs/OV_Runtime_UG/PythonPackage_Overview.md | 14 + .../integrate_with_your_application.md | 265 ++++++++++ docs/OV_Runtime_UG/model_representation.md | 181 +++++-- docs/OV_Runtime_UG/network_state_intro.md | 4 +- docs/OV_Runtime_UG/openvino_intro.md | 3 +- docs/OV_Runtime_UG/ov_infer_request.md | 277 ++++++++++ .../supported_plugins/Supported_Devices.md | 2 +- docs/index.rst | 4 +- docs/snippets/CMakeLists.txt | 19 + ...rate_with_customer_application_new_API.cpp | 133 ----- docs/snippets/example_ngraph_utils.cpp | 60 --- docs/snippets/ov_infer_request.cpp | 112 ++++ docs/snippets/ov_infer_request.py | 97 ++++ docs/snippets/ov_model_snippets.cpp | 110 +++- docs/snippets/ov_model_snippets.py | 88 ++++ docs/snippets/src/main.cpp | 69 +++ docs/snippets/src/main.py | 58 +++ samples/c/hello_classification/README.md | 6 +- .../hello_nv12_input_classification/README.md | 6 +- .../cpp/classification_sample_async/README.md | 4 +- samples/cpp/hello_classification/README.md | 4 +- .../hello_nv12_input_classification/README.md | 4 +- samples/cpp/hello_query_device/README.md | 2 +- samples/cpp/hello_reshape_ssd/README.md | 4 +- samples/cpp/model_creation_sample/README.md | 4 +- samples/cpp/speech_sample/README.md | 4 +- .../classification_sample_async/README.md | 4 +- samples/python/hello_classification/README.md | 4 +- samples/python/hello_reshape_ssd/README.md | 4 +- .../python/model_creation_sample/README.md | 4 +- samples/python/speech_sample/README.md | 4 +- tools/pot/openvino/tools/pot/api/README.md | 2 +- 35 files changed, 1286 insertions(+), 772 deletions(-) delete mode 100644 docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md create mode 100644 docs/OV_Runtime_UG/PythonPackage_Overview.md create mode 100644 docs/OV_Runtime_UG/integrate_with_your_application.md create mode 100644 docs/OV_Runtime_UG/ov_infer_request.md delete mode 100644 docs/snippets/Integrate_with_customer_application_new_API.cpp create mode 100644 docs/snippets/ov_infer_request.cpp create mode 100644 docs/snippets/ov_infer_request.py create mode 100644 docs/snippets/ov_model_snippets.py create mode 100644 docs/snippets/src/main.cpp create mode 100644 docs/snippets/src/main.py diff --git a/README.md b/README.md index fc47dbe49d4..95431e98dce 100644 --- a/README.md +++ b/README.md @@ -16,12 +16,11 @@ source and public models in popular formats such as TensorFlow, ONNX, PaddlePadd * [OpenVINO™ Runtime] * [Model Optimizer] * [Post-Training Optimization Tool] - +* [Samples] ## License -Deep Learning Deployment Toolkit is licensed under [Apache License Version 2.0](LICENSE). -By contributing to the project, you agree to the license and copyright terms therein -and release your contribution under these terms. +OpenVINO™ Toolkit is licensed under [Apache License Version 2.0](LICENSE). +By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. ## Resources * Docs: https://docs.openvino.ai/ @@ -46,5 +45,6 @@ Please report questions, issues and suggestions using: [OpenVINO™ Runtime]:https://docs.openvino.ai/latest/openvino_docs_OV_Runtime_User_Guide.html [Model Optimizer]:https://docs.openvino.ai/latest/openvino_docs_MO_DG_Deep_Learning_Model_Optimizer_DevGuide.html [Post-Training Optimization Tool]:https://docs.openvino.ai/latest/pot_README.html +[Samples]:https://github.com/openvinotoolkit/openvino/tree/master/samples [tag on StackOverflow]:https://stackoverflow.com/search?q=%23openvino diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 75421a3c972..75dbeb180c6 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -7,8 +7,6 @@ if(NOT ENABLE_DOCKER) ie_add_compiler_flags(-Wall) endif() - add_subdirectory(snippets) - # Detect OpenVINO find_package(OpenVINO QUIET PATHS "${CMAKE_BINARY_DIR}" @@ -17,6 +15,8 @@ if(NOT ENABLE_DOCKER) set(OpenVINO_DIR ${CMAKE_BINARY_DIR}) endif() + add_subdirectory(snippets) + add_subdirectory(template_extension) set(all_docs_targets diff --git a/docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md b/docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md deleted file mode 100644 index ebbec0e3c6e..00000000000 --- a/docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md +++ /dev/null @@ -1,490 +0,0 @@ -# Integrate Inference Engine {#openvino_docs_IE_DG_Integrate_with_customer_application_new_API} - -## Integrate Inference Engine with Your C++ Application - -@sphinxdirective -.. raw:: html - -
C++
-@endsphinxdirective - -The following diagram illustrates the typical Inference Engine С++ API workflow: - -![ie_api_flow_cpp] - -Read the sections below to learn about each item. - -> **NOTE**: Before start using Inference Engine, make sure you set all environment variables during the installation. If you did not, follow the instructions from the _Set the Environment Variables_ section in the installation guides: -> * [For Windows* 10](../install_guides/installing-openvino-windows.md) -> * [For Linux*](../install_guides/installing-openvino-linux.md) -> * [For macOS*](../install_guides/installing-openvino-macos.md) -> * To build an open source version, use the [Inference Engine Build Instructions](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). - -### Link with Inference Library - -1. **Create a structure** for the project: - ``` sh - project/ - ├── CMakeLists.txt - CMake file to build - ├── ... - Additional folders like includes/ - └── src/ - source folder - └── main.cpp - build/ - build directory - ... - ``` - -2. **Include Inference Engine, nGraph and OpenCV libraries** in `project/CMakeLists.txt` -[OpenCV](https://docs.opencv.org/master/db/df5/tutorial_linux_gcc_cmake.html) integration is needed mostly for pre-processing input data and model representation in OpenVINO™ Runtime for more complex applications using [OpenVINO Model API](../OV_Runtime_UG/model_representation.md). - ``` cmake - cmake_minimum_required(VERSION 3.0.0) - project(project_name) - find_package(OpenVINO REQUIRED) - add_executable(${PROJECT_NAME} src/main.cpp) - target_link_libraries(${PROJECT_NAME} PRIVATE openvino::runtime) - ``` - -### Use Inference Engine API to Implement Inference Pipeline - -This section provides step-by-step instructions to implement a typical inference pipeline with the Inference Engine C++ API: - -![ie_api_use_cpp] -#### Step 1. Create Inference Engine Core - -Use the following code to create Inference Engine Core to manage available devices and read network objects: - -@snippet snippets/Integrate_with_customer_application_new_API.cpp part0 - -#### Step 2 (Optional). Configure Input and Output of the Model - -@sphinxdirective -.. raw:: html - -
-@endsphinxdirective - - -Optionally, configure input and output of the model using the steps below: - -1. Load a model to a Core object: - @sphinxdirective - - .. tab:: IR - - .. code-block:: c - - auto network = core.ReadNetwork("model.xml"); - - .. tab:: ONNX - - .. code-block:: c - - auto network = core.ReadNetwork("model.onnx"); - - .. tab:: nGraph - - .. code-block:: c - - std::shared_ptr createNetwork() { - // To construct a network, please follow - // https://docs.openvino.ai/latest/openvino_docs_nGraph_DG_build_function.html - } - auto network = CNNNetwork(createNetwork()); - - @endsphinxdirective - -2. Request input and output information using `InferenceEngine::CNNNetwork::getInputsInfo()`, and `InferenceEngine::CNNNetwork::getOutputsInfo()` methods: - ```cpp - /** Take information about all topology inputs **/ - InferenceEngine::InputsDataMap input_info = network.getInputsInfo(); - /** Iterate over all input info**/ - for (auto &item : input_info) { - auto input_data = item.second; - // Add your input configuration steps here - } - - /** Take information about all topology outputs **/ - InferenceEngine::OutputsDataMap output_info = network.getOutputsInfo(); - /** Iterate over all output info**/ - for (auto &item : output_info) { - auto output_data = item.second; - // Add your output configuration steps here - } - ``` - Configuring options: - 1. **Set precision** (number format): FP16, FP32, INT8, etc. Refer to the Supported Configurations section on the [Supported Devices](supported_plugins/Supported_Devices.md) page to choose the relevant configuration.
- For input (*iterate over all input info*): - ```cpp - input_data->setPrecision(InferenceEngine::Precision::U8); - ``` - For output (*iterate over all output info*): - ```cpp - output_data->setPrecision(InferenceEngine::Precision::FP32); - ``` - **By default**, the input and output precision is set to `Precision::FP32`. - - 2. **Set layout** (NCHW, ).
- For input (*iterate over all input info*): - ```cpp - input_data->setLayout(InferenceEngine::Layout::NCHW); - ``` - **By default**, the input layout is set to `Layout::NCHW`.
- For output (*iterate over all output info*): - ```cpp - output_data->setLayout(InferenceEngine::Layout::NC); - ``` - **By default**, the output layout depends on a number of its dimensions:
- |Number of dimensions | 5 | 4 | 3 | 2 | 1 | - |:--------------------|-------|------|-----|----|----| - |Layout | NCDHW | NCHW | CHW | NC | C | - 3. **Set resize algorithm for inputs** (Bilinear). You can allow input of any size. To do this, mark each input as resizable by setting a desired resize algorithm (e.g. `BILINEAR`) inside of the appropriate input info (*Iterate over all input info*): - ```cpp - input_data->getPreProcess().setResizeAlgorithm(InferenceEngine::RESIZE_BILINEAR); - ``` - **By default**, no resize algorithm is set for inputs. - - 4. **Set color format** (BGR, RGB, NV12). Basic color format conversions are supported as well. **By default**, the Inference Engine assumes that the input color format is BGR and color format conversions are disabled. Set `ColorFormat::RAW` input color format if the input does not need color conversions. The Inference Engine supports the following color format conversions: - * RGB->BGR - * RGBX->BGR - * BGRX->BGR - * NV12->BGR - where X is a channel that will be ignored during inference. To enable the conversions, set a desired color format (for example, RGB) for each input inside of the appropriate input info (*iterate over all input info*): - ```cpp - input_data->getPreProcess().setColorFormat(InferenceEngine::ColorFormat::RGB); - ``` - > **NOTE**: NV12 input color format pre-processing differs from other color conversions. In case of NV12, Inference Engine expects two separate image planes (Y and UV). You must use a specific `InferenceEngine::NV12Blob` object instead of default blob object and set this blob to the Inference Engine Infer Request using `InferenceEngine::InferRequest::SetBlob()`. Refer to [Hello NV12 Input Classification C++ Sample](../../samples/cpp/hello_nv12_input_classification/README.md) for more details. - - 5. **Run on multiple images** with setting batch. If you want to run inference for multiple images at once, you can use the built-in batch pre-processing functionality. - - **NOTE** : Batch pre-processing is not supported if input color format is set to `ColorFormat::NV12`. - -@sphinxdirective -.. raw:: html - -
-@endsphinxdirective - -#### Step 3. Load the Model to the Device - -Load the model to the device using `InferenceEngine::Core::LoadNetwork()`: - - -@sphinxdirective - -.. tab:: IR - - .. code-block:: c - - executable_network = core.LoadNetwork("model.xml", "CPU"); - -.. tab:: ONNX - - .. code-block:: c - - executable_network = core.LoadNetwork("model.onnx", "CPU"); - -.. tab:: nGraph - - .. code-block:: c - - std::shared_ptr createNetwork() { - // To construct a network, please follow - // https://docs.openvino.ai/latest/openvino_docs_nGraph_DG_build_function.html - } - auto network = CNNNetwork(createNetwork()); - executable_network = core.LoadNetwork(network, "CPU"); - -.. tab:: Model From Step 2 - - Follow this step only if you went through optional "Step 2 (Optional). Configure Input and Output of the Model", otherwise use another tab for your model type: IR (OpenVINO Intermediate Representation), ONNX or nGraph. - - .. code-block:: c - - executable_network = core.LoadNetwork(network, "CPU"); - -@endsphinxdirective - - -It creates an executable network from a network object. The executable network is associated with single hardware device. -It is possible to create as many networks as needed and to use them simultaneously (up to the limitation of the hardware resources). - -Third parameter is a configuration for plugin. It is map of pairs: (parameter name, parameter value). Choose device from -[Supported devices](supported_plugins/Supported_Devices.md) page for more details about supported configuration parameters. - -@snippet snippets/Integrate_with_customer_application_new_API.cpp part6 - -#### Step 4. Create an Inference Request - -Create an infer request using the following code: - -@snippet snippets/Integrate_with_customer_application_new_API.cpp part7 - -#### Step 5. Prepare Input - -You can use one of the following options to prepare input: - -* **Optimal way for a single network.** Get blobs allocated by an infer request using `InferenceEngine::InferRequest::GetBlob()` and feed an image and the input data to the blobs. In this case, input data must be aligned (resized manually) with a given blob size and have a correct color format. - - @snippet snippets/Integrate_with_customer_application_new_API.cpp part8 - -* **Optimal way for a cascade of networks (output of one network is input for another).** Get output blob from the first request using `InferenceEngine::InferRequest::GetBlob()` and set it as input for the second request using `InferenceEngine::InferRequest::SetBlob()`. - - @snippet snippets/Integrate_with_customer_application_new_API.cpp part9 - -* **Optimal way to handle ROI (a ROI object located inside of input of one network is input for another).** It is possible to re-use shared input by several networks. You do not need to allocate separate input blob for a network if it processes a ROI object located inside of already allocated input of a previous network. For instance, when first network detects objects on a video frame (stored as input blob) and second network accepts detected bounding boxes (ROI inside of the frame) as input. In this case, it is allowed to re-use pre-allocated input blob (used by first network) by second network and just crop ROI without allocation of new memory using `InferenceEngine::make_shared_blob()` with passing of `InferenceEngine::Blob::Ptr` and `InferenceEngine::ROI` as parameters. - - @snippet snippets/Integrate_with_customer_application_new_API.cpp part10 - - Make sure that shared input is kept valid during execution of each network. Otherwise, ROI blob may be corrupted if the original input blob (that ROI is cropped from) has already been rewritten. - -* Allocate input blobs of the appropriate types and sizes, feed an image and the input data to the blobs, and call `InferenceEngine::InferRequest::SetBlob()` to set these blobs for an infer request: - - @snippet snippets/Integrate_with_customer_application_new_API.cpp part11 - -A blob can be filled before and after `SetBlob()`. - -> **NOTE**: -> -> * The `SetBlob()` method compares precision and layout of an input blob with the ones defined in step 3 and -> throws an exception if they do not match. It also compares a size of the input blob with input -> size of the read network. But if input was configured as resizable, you can set an input blob of -> any size (for example, any ROI blob). Input resize will be invoked automatically using resize -> algorithm configured on step 3. Similarly to the resize, color format conversions allow the color -> format of an input blob to differ from the color format of the read network. Color format -> conversion will be invoked automatically using color format configured on step 3. -> -> * `GetBlob()` logic is the same for pre-processable and not pre-processable input. Even if it is -> called with input configured as resizable or as having specific color format, a blob allocated by -> an infer request is returned. Its size and color format are already consistent with the -> corresponding values of the read network. No pre-processing will happen for this blob. If you -> call `GetBlob()` after `SetBlob()`, you will get the blob you set in `SetBlob()`. - -#### Step 6. Start Inference - -Start inference in asynchronous or synchronous mode. Async API usage can improve overall frame-rate of the application, because rather than wait for inference to complete, the app can continue doing things on the host, while accelerator is busy. - -* For synchronous inference request: - ```cpp - infer_request.Infer(); - ``` - -* For asynchronous inference request: - ```cpp - infer_request.StartAsync(); - infer_request.Wait(InferenceEngine::InferRequest::WaitMode::RESULT_READY); - ``` - `StartAsync` returns immediately and starts inference without blocking main thread, `Infer` blocks main thread and returns when inference is completed. Call `Wait` for waiting result to become available for asynchronous request. - - There are three ways to use it: - * specify maximum duration in milliseconds to block for. The method is blocked until the specified timeout has elapsed, or the result becomes available, whichever comes first. - * `InferenceEngine::InferRequest::WaitMode::RESULT_READY` - waits until inference result becomes available - * `InferenceEngine::InferRequest::WaitMode::STATUS_ONLY` - immediately returns request status.It does not - block or interrupts current thread. - - -Both requests are thread-safe: can be called from different threads without fearing corruption and failures. - -Multiple requests for single `ExecutableNetwork` are executed sequentially one by one in FIFO order. - -While request is ongoing, all its methods except `InferenceEngine::InferRequest::Wait` would throw an -exception. - -#### Step 7. Process the Inference Results - -Go over the output blobs and process the inference results. Note that casting `Blob` to `TBlob` via `std::dynamic_pointer_cast` is not the recommended way. It's better to access data via the `buffer()` and `as()` methods as follows: - -@snippet snippets/Integrate_with_customer_application_new_API.cpp part14 - -### Build Your Application - -For details about building your application, refer to the CMake files for the sample applications. -All samples source code is located in the `/samples` directory, where `INSTALL_DIR` is the OpenVINO™ installation directory. - -To build your project using CMake with the default build tools currently available on your machine, execute the following commands: - -> **NOTE**: Make sure you set environment variables first by running `/setupvars.sh` (or `setupvars.bat` for Windows). Otherwise the `InferenceEngine_DIR` and `OpenCV_DIR` variables won't be configured properly to pass `find_package` calls. - -```sh -cd build/ -cmake ../project -cmake --build . -``` -It's allowed to specify additional build options (e.g. to build CMake project on Windows with a specific build tools). Please refer to the [CMake page](https://cmake.org/cmake/help/latest/manual/cmake.1.html#manual:cmake(1)) for details. - -### Run Your Application - -> **NOTE**: Before running, make sure you completed **Set the Environment Variables** section in [OpenVINO Installation](../../samples/cpp/hello_nv12_input_classification/README.md) document so that the application can find the libraries. - -To run compiled applications on Microsoft* Windows* OS, make sure that Microsoft* Visual C++ 2017 -Redistributable and Intel® C++ Compiler 2017 Redistributable packages are installed and -`/bin/intel64/Release/*.dll` files are placed to the -application folder or accessible via `%PATH%` environment variable. - -## Integrate Inference Engine with Your Python Application - -@sphinxdirective -.. raw:: html - -
Python
-@endsphinxdirective - -This document explains how to integrate and use the Inference Engine API with your Python application. - -The following diagram illustrates the typical Inference Engine Python API workflow: -![ie_api_flow_python] - -Read the sections below to learn about each item. - -### Import Inference Module - -To make use of the Inference Engine functionality, import IECore to your application: - -```py -from openvino.inference_engine import IECore -``` - -### Use Inference Engine API - -This section provides step-by-step instructions to implement a typical inference pipeline with the Inference Engine API: - -![ie_api_use_python] - -#### Step 1. Create Inference Engine Core - -Use the following code to create Inference Engine Core to manage available devices and read network objects: -```py -ie = IECore() -``` -#### Step 2 (Optional). Read model. Configure Input and Output of the Model - -@sphinxdirective -.. raw:: html - -
-@endsphinxdirective - -Optionally, configure input and output of the model using the steps below: - -1. Read model - @sphinxdirective - - .. tab:: IR - - .. code-block:: python - - net = ie.read_network(model="model.xml") - - .. tab:: ONNX - - .. code-block:: python - - net = ie.read_network(model="model.onnx") - - .. tab:: nGraph - - .. code-block:: python - - #Basic example of nGraph model creation - param = Parameter(Type.f32, Shape([1, 3, 22, 22])) - relu = ng.relu(param) - func = Function([relu], [param], 'test') - caps = Function.to_capsule(func) - net = IENetwork(caps) - - @endsphinxdirective - -2. Request input and output information using input_info, outputs - ```py - inputs = net.input_info - input_name = next(iter(net.input_info)) - - outputs = net.outputs - output_name = next(iter(net.outputs)) - ``` - Information for this input layer is stored in input_info. The next cell prints the input layout, precision and shape. - ```py - print("Inputs:") - for name, info in net.input_info.items(): - print("\tname: {}".format(name)) - print("\tshape: {}".format(info.tensor_desc.dims)) - print("\tlayout: {}".format(info.layout)) - print("\tprecision: {}\n".format(info.precision)) - ``` - This cell output tells us that the model expects inputs with a shape of [1,3,224,224], and that this is in NCHW layout. This means that the model expects input data with a batch size (N) of 1, 3 channels (C), and images of a height (H) and width (W) of 224. The input data is expected to be of FP32 (floating point) precision. - - Getting the output layout, precision and shape is similar to getting the input layout, precision and shape. - ```py - print("Outputs:") - for name, info in net.outputs.items(): - print("\tname: {}".format(name)) - print("\tshape: {}".format(info.shape)) - print("\tlayout: {}".format(info.layout)) - print("\tprecision: {}\n".format(info.precision)) - ``` - This cell output shows that the model returns outputs with a shape of [1, 1001], where 1 is the batch size (N) and 1001 the number of classes (C). The output is returned as 32-bit floating point. - -@sphinxdirective -.. raw:: html - -
-@endsphinxdirective - -#### Step 3. Load model to the Device - -Load the model to the device using `load_network()`: - -@sphinxdirective - -.. tab:: IR - - .. code-block:: python - - exec_net = ie.load_network(network= "model.xml", device_name="CPU") -.. tab:: ONNX - - .. code-block:: python - - exec_net = ie.load_network(network= "model.onnx", device_name="CPU") - -.. tab:: Model from step 2 - - .. code-block:: python - - exec_net = ie.load_network(network=net, device_name="CPU") - -@endsphinxdirective - -This example is designed for CPU device, refer to the [Supported Devices](../OV_Runtime_UG/supported_plugins/Supported_Devices.md) page to read about more devices. - -#### Step 4. Prepare input -```py -import cv2 -import numpy as np - -image = cv2.imread("image.png") - -# Resize with OpenCV your image if needed to match with net input shape -# N, C, H, W = net.input_info[input_name].tensor_desc.dims -# image = cv2.resize(src=image, dsize=(W, H)) - -# Converting image to NCHW format with FP32 type -input_data = np.expand_dims(np.transpose(image, (2, 0, 1)), 0).astype(np.float32) -``` - -#### Step 5. Start Inference -```py -result = exec_net.infer({input_name: input_data}) -``` - -#### Step 6. Process the Inference Results -```py -output = result[output_name] -``` - -### Run Your Application - -Congratulations, you have made your first Python application with OpenVINO™ toolkit, now you may run it. - -[ie_api_flow_cpp]: img/BASIC_IE_API_workflow_Cpp.svg -[ie_api_use_cpp]: img/IMPLEMENT_PIPELINE_with_API_C.svg -[ie_api_flow_python]: img/BASIC_IE_API_workflow_Python.svg -[ie_api_use_python]: img/IMPLEMENT_PIPELINE_with_API_Python.svg diff --git a/docs/OV_Runtime_UG/PythonPackage_Overview.md b/docs/OV_Runtime_UG/PythonPackage_Overview.md new file mode 100644 index 00000000000..5e03eb3295c --- /dev/null +++ b/docs/OV_Runtime_UG/PythonPackage_Overview.md @@ -0,0 +1,14 @@ +# OpenVINO™ Python* Package + +OpenVINO™ Python\* package includes types to measure model and calibrate to low precision. + +The OpenVINO™ Python\* package available in the `/python/python3.X` directory. + +The OpenVINO™ Python\* package includes the following sub-packages: + + - [openvino.inference_engine](../../src/bindings/python/docs/api_overview.md) - Python\* wrapper on OpenVINO™ Inference Engine. + - `openvino.tools.accuracy_checker` - Measure accuracy. + - `openvino.tools.benchmark` - Measure latency and throughput. + +## See Also +* [Integrate with Customer Application New API](integrate_with_your_application.md) diff --git a/docs/OV_Runtime_UG/integrate_with_your_application.md b/docs/OV_Runtime_UG/integrate_with_your_application.md new file mode 100644 index 00000000000..6472e9ec8c5 --- /dev/null +++ b/docs/OV_Runtime_UG/integrate_with_your_application.md @@ -0,0 +1,265 @@ +# Integrate OpenVINO™ with Your Application {#openvino_docs_Integrate_OV_with_your_application} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + openvino_docs_OV_Runtime_UG_Model_Representation + openvino_docs_OV_Runtime_UG_Infer_request + +@endsphinxdirective + +> **NOTE**: Before start using OpenVINO™ Runtime, make sure you set all environment variables during the installation. If you did not, follow the instructions from the _Set the Environment Variables_ section in the installation guides: +> * [For Windows* 10](../install_guides/installing-openvino-windows.md) +> * [For Linux*](../install_guides/installing-openvino-linux.md) +> * [For macOS*](../install_guides/installing-openvino-macos.md) +> * To build an open source version, use the [OpenVINO™ Runtime Build Instructions](https://github.com/openvinotoolkit/openvino/wiki/BuildingCode). + +## Use OpenVINO™ Runtime API to Implement Inference Pipeline + +This section provides step-by-step instructions to implement a typical inference pipeline with the OpenVINO™ Runtime C++ API: + +![ie_api_use_cpp] + +### Step 1. Create OpenVINO™ Runtime Core + +Include next files to work with OpenVINO™ Runtime: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [include] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [import] + +@endsphinxdirective + +Use the following code to create OpenVINO™ Core to manage available devices and read model objects: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part1] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part1] + +@endsphinxdirective + +### Step 2. Compile the Model + +`ov::CompiledModel` class represents a device specific compiled model. `ov::CompiledModel` allows you to get information inputs or output ports by a tensor name or index. + +Compile the model for a specific device using `ov::Core::compile_model()`: + +@sphinxdirective + +.. tab:: C++ + + .. tab:: IR + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part2_1] + + .. tab:: ONNX + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part2_2] + + .. tab:: PaddlePaddle + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part2_3] + + .. tab:: ov::Model + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part2_4] + +.. tab:: Python + + .. tab:: IR + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part2_1] + + .. tab:: ONNX + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part2_2] + + .. tab:: PaddlePaddle + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part2_3] + + .. tab:: ov::Model + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part2_4] + +@endsphinxdirective + +The `ov::Model` object represents any models inside the OpenVINO™ Runtime. +For more details please read article about [OpenVINO™ Model representation](model_representation.md). + +The code above creates a compiled model associated with a single hardware device from the model object. +It is possible to create as many compiled models as needed and use them simultaneously (up to the limitation of the hardware resources). +To learn how to change the device configuration, read the [Query device properties](./supported_plugins/config_properties.md) article. + +### Step 3. Create an Inference Request + +`ov::InferRequest` class provides methods for model inference in the OpenVINO™ Runtime. +This section demonstrates a simple pipeline, to get more information about other use cases, read the [InferRequest documentation](./ov_infer_request.md) dedicated article. +Create an infer request using the following code: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part3] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part3] + +@endsphinxdirective + +### Step 4. Set Inputs + +You can use external memory to create `ov::Tensor` and use the `ov::InferRequest::set_input_tensor` method to put this tensor on the device: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part4] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part4] + +@endsphinxdirective + +### Step 5. Start Inference + +OpenVINO™ Runtime supports inference in asynchronous or synchronous mode. Async API usage can improve overall frame-rate of the application, because rather than wait for inference to complete, the app can continue doing things on the host, while the accelerator is busy. You can use `ov::InferRequest::start_async()` to start model inference in the asynchronous mode and call `ov::InferRequest::wait()` to wait for the inference results: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part5] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part5] + +@endsphinxdirective + +The asynchronous mode supports two methods to get the inference results: + * `ov::InferRequest::wait_for()` - Waits until the specified timeout (in milliseconds) has elapsed or the inference result becomes available, whichever comes first. + * `ov::InferRequest::wait()` - Waits until the inference result becomes available. + +Both requests are thread-safe, which means they can be called from different threads without exposing erroneous behavior or producing unpredictable results. + +While the request is ongoing, all its methods except `ov::InferRequest::cancel`, `ov::InferRequest::wait` or `ov::InferRequest::wait_for` throw +the `ov::Busy` exception indicating the request is busy with computations. + +### Step 6. Process the Inference Results + +Go over the output tensors and process the inference results. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/src/main.cpp + :language: cpp + :fragment: [part6] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/src/main.py + :language: python + :fragment: [part6] + +@endsphinxdirective + +## Link and Build Your C++ Application with OpenVINO™ Runtime + +The example uses CMake for project configuration. + +1. **Create a structure** for the project: + ``` sh + project/ + ├── CMakeLists.txt - CMake file to build + ├── ... - Additional folders like includes/ + └── src/ - source folder + └── main.cpp + build/ - build directory + ... + ``` + +2. **Include OpenVINO™ Runtime libraries** in `project/CMakeLists.txt` + + @snippet snippets/CMakeLists.txt cmake:integration_example + +To build your project using CMake with the default build tools currently available on your machine, execute the following commands: + +> **NOTE**: Make sure you set environment variables first by running `/setupvars.sh` (or `setupvars.bat` for Windows). Otherwise the `OpenVINO_DIR` variable won't be configured properly to pass `find_package` calls. + +```sh +cd build/ +cmake ../project +cmake --build . +``` +It's allowed to specify additional build options (e.g. to build CMake project on Windows with a specific build tools). Please refer to the [CMake page](https://cmake.org/cmake/help/latest/manual/cmake.1.html#manual:cmake(1)) for details. + +## Run Your Application + +Congratulations, you have made your first application with OpenVINO™ toolkit, now you may run it. + +## See also + + - [OpenVINO™ Runtime Preprocessing](./preprocessing_overview.md) + +[ie_api_flow_cpp]: img/BASIC_IE_API_workflow_Cpp.svg +[ie_api_use_cpp]: img/IMPLEMENT_PIPELINE_with_API_C.svg +[ie_api_flow_python]: img/BASIC_IE_API_workflow_Python.svg +[ie_api_use_python]: img/IMPLEMENT_PIPELINE_with_API_Python.svg diff --git a/docs/OV_Runtime_UG/model_representation.md b/docs/OV_Runtime_UG/model_representation.md index f3edf5e2f24..0e932278373 100644 --- a/docs/OV_Runtime_UG/model_representation.md +++ b/docs/OV_Runtime_UG/model_representation.md @@ -2,8 +2,6 @@ In OpenVINO™ Runtime a model is represented by the `ov::Model` class. -## Model Representation - The `ov::Model` object stores shared pointers to `ov::op::v0::Parameter`, `ov::op::v0::Result` and `ov::op::Sink` operations that are inputs, outputs and sinks of the graph. Sinks of the graph have no consumers and are not included in the results vector. All other operations hold each other via shared pointers: child operation holds its parent (hard link). If an operation has no consumers and it's not the `Result` or `Sink` operation (shared pointer counter is zero), then it will be destructed and won't be accessible anymore. @@ -12,24 +10,63 @@ Each operation in `ov::Model` has the `std::shared_ptr` type. For details on how to build a model in OpenVINO™ Runtime, see the [Build a Model in OpenVINO™ Runtime](@ref build_model) section. +OpenVINO™ Runtime allows using tensor names or indexes to work wit model inputs/outputs. To get model input/output ports, use the `ov::Model::inputs()` or `ov::Model::outputs()` methods respectively. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [all_inputs_ouputs] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [all_inputs_ouputs] + +@endsphinxdirective + +OpenVINO™ Runtime model representation uses special classes to work with model data types and shapes. For data types the `ov::element::Type` is used. + +## Shapes Representation + +OpenVINO™ Runtime provides two types for shape representation: + +* `ov::Shape` - Represents static (fully defined) shapes. + +* `ov::PartialShape` - Represents dynamic shapes. That means that the rank or some of dimensions are dynamic (dimension defines an interval or undefined). `ov::PartialShape` can be converted to `ov::Shape` using the `get_shape()` method if all dimensions are static; otherwise the conversion raises an exception. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:partial_shape] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:partial_shape] + +@endsphinxdirective + + But in most cases before getting static shape using `get_shape()` method, you need to check that shape is static. + ## Operations -The `ov::Op` class represents any abstract operation in the model representation. Use this class to create [custom operations](../Extensibility_UG/add_openvino_ops). +The `ov::Op` class represents any abstract operation in the model representation. Use this class to create [custom operations](../Extensibility_UG/add_openvino_ops.md). ## Operation Sets Operation set (opset) is a collection of operations that can be used to construct a model. The `ov::OpSet` class provides a functionality to work with operation sets. -For each operation set, OpenVINO™ Runtime provides a separate namespace, for example `opset8`. +For each operation set, OpenVINO™ Runtime provides a separate namespace, for example `opset8`. Each OpenVINO™ Release release introduces new operations and add these operations to a new operation set. New operation sets help to introduce a new version of operations that change behavior of previous operations. Using operation sets allows you to avoid changes in your application if new operations have been introduced. - -## Static and Partial Shapes - -There are two types used for shape representation: - -* `ov::Shape` - Represents static (fully defined) shapes. - -* `ov::PartialShape` - Represents dynamic shapes. That means that the rank or some of dimensions are dynamic (undefined). `ov::PartialShape` can be converted to `ov::Shape` using the `get_shape()` method if all dimensions are static; otherwise the conversion raises an exception. - +For a complete list of operation sets supported in OpenVINO™ toolkit, see [Available Operations Sets](../ops/opset.md). +To add support of custom operations, see the [Add Custom OpenVINO Operations](../Extensibility_UG/Intro.md) document. ## Build a Model in OpenVINO™ Runtime {#build_model} @@ -37,55 +74,109 @@ You can create a model from source. This section illustrates how to construct a Operation set `opsetX` integrates a list of pre-compiled operations that work for this purpose. In other words, `opsetX` defines a set of operations for building a graph. -For a complete list of operation sets supported in OpenVINO™ toolkit, see [Available Operations Sets](../ops/opset.md). - -To add support of custom operations, see the [Add Custom OpenVINO Operations](../Extensibility_UG/Intro.md) document. - To build an `ov::Model` instance from `opset8` operations, include the following files: - - C++ - @snippet example_ngraph_utils.cpp ov:include +@sphinxdirective - - Python - ```python - import openvino.runtime.opset8 as ov - from openvino.runtime import Model - ``` +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:include] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [import] + +@endsphinxdirective The following code demonstrates how to create a simple model: - - C++ - @snippet example_ngraph_utils.cpp ov:create_simple_model +@sphinxdirective - - Python - ```python - TBD - ``` +.. tab:: C++ + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:create_simple_model] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:create_simple_model] + +@endsphinxdirective The following code creates a model with several outputs: - - C++ - @snippet example_ngraph_utils.cpp ov:create_advanced_model +@sphinxdirective - - Python - ```python - TBD - ``` +.. tab:: C++ -## FAQ + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:create_advanced_model] -### Does OpenVINO™ Runtime provide any capabilities to debug the model structure and model modification? - - To receive additional messages about applied graph modifications, rebuild the OpenVINO™ Runtime library with the `-DENABLE_OPENVINO_DEBUG=ON` option. - - A model can be visualized to image from the xDot format: - @snippet example_ngraph_utils.cpp ov:visualize - - A model can be serialized to IR: - @snippet example_ngraph_utils.cpp ov:serialize +.. tab:: Python -### How can I develop my own transformation pass? - See the [Transformations Developer Guide](./../Extensibility_UG/ov_transformations.md). + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:create_advanced_model] + +@endsphinxdirective + +## Model debug capabilities + +OpenVINO™ provides several debug capabilities: + - To receive additional messages about applied model modifications, rebuild the OpenVINO™ Runtime library with the `-DENABLE_OPENVINO_DEBUG=ON` option. + - Model can be visualized to image from the xDot format: + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:visualize] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:visualize] + + @endsphinxdirective + + `ov::pass::VisualizeTree` can be parametrized via environment variables: + + OV_VISUALIZE_TREE_OUTPUT_SHAPES=1 - visualize shapes + OV_VISUALIZE_TREE_OUTPUT_TYPES=1 - visualize types + OV_VISUALIZE_TREE_MIN_MAX_DENORMAL=1 - pretty denormal values + OV_VISUALIZE_TREE_RUNTIME_INFO=1 - print runtime information + OV_VISUALIZE_TREE_IO=1 - print I/O ports + OV_VISUALIZE_TREE_MEMBERS_NAME=1 - print member names + + - Also model can be serialized to IR: + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_model_snippets.cpp + :language: cpp + :fragment: [ov:serialize] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_model_snippets.py + :language: python + :fragment: [ov:serialize] + + @endsphinxdirective ## See Also * [Available Operation Sets](../ops/opset.md) * [OpenVINO™ Runtime Extensibility Developer Guide](../Extensibility_UG/Intro.md) +* [Transformations Developer Guide](../Extensibility_UG/ov_transformations.md). diff --git a/docs/OV_Runtime_UG/network_state_intro.md b/docs/OV_Runtime_UG/network_state_intro.md index 5d39b56d32d..cf76e83ac41 100644 --- a/docs/OV_Runtime_UG/network_state_intro.md +++ b/docs/OV_Runtime_UG/network_state_intro.md @@ -243,7 +243,7 @@ After applying the transformation, ReadValue operations can receive other operat 1. Get CNNNetwork. Either way is acceptable: - * [from IR or ONNX model](./Integrate_with_customer_application_new_API.md) + * [from IR or ONNX model](./integrate_with_your_application.md) * [from ov::Model](../OV_Runtime_UG/model_representation.md) 2. Change the number of iterations inside TensorIterator/Loop nodes in the network using the [Reshape](ShapeInference.md) feature. @@ -347,7 +347,7 @@ After applying the transformation, ReadValue operations can receive other operat 1. Get CNNNetwork. Either way is acceptable: - * [from IR or ONNX model](./Integrate_with_customer_application_new_API.md) + * [from IR or ONNX model](./integrate_with_your_application.md) * [from ov::Model](../OV_Runtime_UG/model_representation.md) 2. [Reshape](ShapeInference.md) the CNNNetwork network if necessary. **Necessary case:** where the sequence_lengths dimension of input > 1, it means TensorIterator layer will have number_iterations > 1. We should reshape the inputs of the network to set sequence_dimension to exactly 1. diff --git a/docs/OV_Runtime_UG/openvino_intro.md b/docs/OV_Runtime_UG/openvino_intro.md index 10aeb252855..d5f79b4be5f 100644 --- a/docs/OV_Runtime_UG/openvino_intro.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -8,9 +8,8 @@ :maxdepth: 1 :hidden: - openvino_docs_IE_DG_Integrate_with_customer_application_new_API + openvino_docs_Integrate_OV_with_your_application - openvino_docs_OV_Runtime_UG_Model_Representation openvino_docs_IE_DG_ShapeInference openvino_docs_OV_UG_Working_with_devices openvino_docs_OV_Runtime_UG_Preprocessing_Overview diff --git a/docs/OV_Runtime_UG/ov_infer_request.md b/docs/OV_Runtime_UG/ov_infer_request.md new file mode 100644 index 00000000000..c984a4e6a92 --- /dev/null +++ b/docs/OV_Runtime_UG/ov_infer_request.md @@ -0,0 +1,277 @@ +# OpenVINO™ Inference Request {#openvino_docs_OV_Runtime_UG_Infer_request} + +OpenVINO™ Runtime uses Infer Request mechanism which allows to run models on different devices in asynchronous or synchronous manners. +`ov::InferRequest` class is used for this purpose inside the OpenVINO™ Runtime. +This class allows to set and get data for model inputs, outputs and run inference for the model. + +## Creating Infer Request + +`ov::InferRequest` can be created from the `ov::CompiledModel`: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [create_infer_request] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [create_infer_request] + +@endsphinxdirective + +## Run inference + +`ov::InferRequest` supports synchronous and asynchronous modes for inference. + +### Synchronous mode + +You can use `ov::InferRequest::infer()`, which blocks the application execution, to infer model in synchronous mode: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [sync_infer] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [sync_infer] + +@endsphinxdirective + +### Asynchronous mode + +Asynchronous mode can improve overall frame-rate of the application, because rather than wait for inference to complete, the app can continue doing things on the host, while accelerator is busy. You can use `ov::InferRequest::start_async()` to infer model in asynchronous mode: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [async_infer] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [async_infer] + +@endsphinxdirective + +Asynchronous mode supports two ways to wait inference results: + * `ov::InferRequest::wait_for()` - specify maximum duration in milliseconds to block for. The method is blocked until the specified timeout has elapsed, or the result becomes available, whichever comes first. + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [wait_for] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [wait_for] + + @endsphinxdirective + * `ov::InferRequest::wait()` - waits until inference result becomes available + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [wait] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [wait] + + @endsphinxdirective + +Both requests are thread-safe: can be called from different threads without fearing corruption and failures. + +Also InferRequest provides an functionality which allows to avoid a call of `ov::InferRequest::wait()`, in order to do it, you can use `ov::InferRequest::set_callback()` method. This method allows to set callback which will be called after completing run of InferRequest, please use weak reference of infer_request (`ov::InferRequest*`, `ov::InferRequest&`, `std::weal_ptr` and etc) in the callback, it is needed to avoid cyclic references. +For more details please take a look too [Classification Sample Async](../../samples/cpp/classification_sample_async/README.md). + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [set_callback] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [set_callback] + +@endsphinxdirective + +You can use `ov::InferRequest::cancel()` method in case if you want to cancel the current inference request: + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [cancel] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [cancel] + +@endsphinxdirective + +## Working with Input and Output tensors + +`ov::InferRequest` allows to get input/output tensors by tensor name, index, port and without any arguments in case if model has only one input or output. + + * `ov::InferRequest::get_input_tensor()`, `ov::InferRequest::set_input_tensor()`, `ov::InferRequest::get_output_tensor()`, `ov::InferRequest::set_output_tensor()` methods without arguments can be used to get or set input/output tensor for model with only one input/output: + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [get_set_one_tensor] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [get_set_one_tensor] + + @endsphinxdirective + + * `ov::InferRequest::get_input_tensor()`, `ov::InferRequest::set_input_tensor()`, `ov::InferRequest::get_output_tensor()`, `ov::InferRequest::set_output_tensor()` methods with argument can be used to get or set input/output tensor by input/output index: + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [get_set_index_tensor] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [get_set_index_tensor] + + @endsphinxdirective + + * `ov::InferRequest::get_tensor()`, `ov::InferRequest::set_tensor()` methods can be used to get or set input/output tensor by tensor name: + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [get_set_tensor] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [get_set_tensor] + + @endsphinxdirective + + * `ov::InferRequest::get_tensor()`, `ov::InferRequest::set_tensor()` methods can be used to get or set input/output tensor by port: + @sphinxdirective + + .. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [get_set_tensor_by_port] + + .. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [get_set_tensor_by_port] + + @endsphinxdirective + +## Examples of InferRequest usages + +### Cascade of models + +`ov::InferRequest` can be used to organize cascade of models. You need to have infer requests for each model. +In this case you can get output tensor from the first request using `ov::InferRequest::get_tensor()` and set it as input for the second request using `ov::InferRequest::set_tensor()`. But be careful, shared tensors across compiled models can be rewritten by the first model if the first infer request is run once again, while the second model has not started yet. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [cascade_models] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [cascade_models] + +@endsphinxdirective + +### Using of ROI tensors + +It is possible to re-use shared input by several models. You do not need to allocate separate input tensor for a model if it processes a ROI object located inside of already allocated input of a previous model. For instance, when first model detects objects on a video frame (stored as input tensor) and second model accepts detected bounding boxes (ROI inside of the frame) as input. In this case, it is allowed to re-use pre-allocated input tensor (used by first model) by second model and just crop ROI without allocation of new memory using `ov::Tensor()` with passing of `ov::Tensor` and `ov::Coordinate` as parameters. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [roi_tensor] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [roi_tensor] + +@endsphinxdirective + +### Using of remote tensors + +You can create a remote tensor to work with remote device memory. `ov::RemoteContext` allows to create remote tensor. + +@sphinxdirective + +.. tab:: C++ + + .. doxygensnippet:: docs/snippets/ov_infer_request.cpp + :language: cpp + :fragment: [remote_tensor] + +.. tab:: Python + + .. doxygensnippet:: docs/snippets/ov_infer_request.py + :language: python + :fragment: [remote_tensor] + +@endsphinxdirective diff --git a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md index c8914472035..0c200186708 100644 --- a/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md +++ b/docs/OV_Runtime_UG/supported_plugins/Supported_Devices.md @@ -116,7 +116,7 @@ the supported output precision depends on the actual underlying devices. _Gener |Layout |NCDHW|NCHW |CHW |NC |C | For setting relevant configuration, refer to the -[Integrate with Customer Application New Request API](../Integrate_with_customer_application_new_API.md) topic +[Integrate with Customer Application](../integrate_with_your_application.md) topic (step 3 "Configure input and output"). ### Supported Layers diff --git a/docs/index.rst b/docs/index.rst index 46b71ca485d..bd472b359b7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -73,8 +73,8 @@ OpenVINO™ Documentation

Learn about the alternative, web-based version of OpenVINO. DL Workbench container installation Required.

-

Inference Engine

-

Learn about OpenVINO's inference mechanism which executes the IR and ONNX models on target devices.

+

OpenVINO™ Runtime

+

Learn about OpenVINO's inference mechanism which executes the IR, ONNX, Paddle models on target devices.

Tune & Optimize

diff --git a/docs/snippets/CMakeLists.txt b/docs/snippets/CMakeLists.txt index ce32a90200e..889936e8579 100644 --- a/docs/snippets/CMakeLists.txt +++ b/docs/snippets/CMakeLists.txt @@ -58,3 +58,22 @@ if(NOT MSVC) endif() target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime openvino::runtime::dev) + +set(TARGET_NAME "ov_integration_snippet") +# [cmake:integration_example] +cmake_minimum_required(VERSION 3.10) +set(CMAKE_CXX_STANDARD 11) + + +find_package(OpenVINO REQUIRED) + +add_executable(${TARGET_NAME} src/main.cpp) + +target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime) +# [cmake:integration_example] +if(NOT MSVC) + target_compile_options(${TARGET_NAME} PRIVATE -Wno-unused-variable) + if(CMAKE_COMPILER_IS_GNUCXX) + target_compile_options(${TARGET_NAME} PRIVATE -Wno-unused-but-set-variable) + endif() +endif() diff --git a/docs/snippets/Integrate_with_customer_application_new_API.cpp b/docs/snippets/Integrate_with_customer_application_new_API.cpp deleted file mode 100644 index 3bee3d4df4c..00000000000 --- a/docs/snippets/Integrate_with_customer_application_new_API.cpp +++ /dev/null @@ -1,133 +0,0 @@ -#include - -int main() { -const std::string output_name = "output_name"; -const std::string input_name = "input_name"; -//! [part0] -InferenceEngine::Core core; -InferenceEngine::CNNNetwork network; -InferenceEngine::ExecutableNetwork executable_network; -//! [part0] - -//! [part1] -network = core.ReadNetwork("Model.xml"); -//! [part1] - -//! [part2] -network = core.ReadNetwork("model.onnx"); -//! [part2] - -//! [part3] -/** Take information about all topology inputs **/ -InferenceEngine::InputsDataMap input_info = network.getInputsInfo(); -/** Take information about all topology outputs **/ -InferenceEngine::OutputsDataMap output_info = network.getOutputsInfo(); -//! [part3] - -//! [part4] -/** Iterate over all input info**/ -for (auto &item : input_info) { - auto input_data = item.second; - input_data->setPrecision(InferenceEngine::Precision::U8); - input_data->setLayout(InferenceEngine::Layout::NCHW); - input_data->getPreProcess().setResizeAlgorithm(InferenceEngine::RESIZE_BILINEAR); - input_data->getPreProcess().setColorFormat(InferenceEngine::ColorFormat::RGB); -} -/** Iterate over all output info**/ -for (auto &item : output_info) { - auto output_data = item.second; - output_data->setPrecision(InferenceEngine::Precision::FP32); - output_data->setLayout(InferenceEngine::Layout::NC); -} -//! [part4] - -//! [part5] -executable_network = core.LoadNetwork(network, "CPU"); -//! [part5] - -//! [part6] -/** Optional config. E.g. this enables profiling of performance counters. **/ -std::map config = {{ InferenceEngine::PluginConfigParams::KEY_PERF_COUNT, InferenceEngine::PluginConfigParams::YES }}; -executable_network = core.LoadNetwork(network, "CPU", config); -//! [part6] - -//! [part7] -auto infer_request = executable_network.CreateInferRequest(); -//! [part7] - -auto infer_request1 = executable_network.CreateInferRequest(); -auto infer_request2 = executable_network.CreateInferRequest(); - -//! [part8] -/** Iterate over all input blobs **/ -for (auto & item : input_info) { - auto input_name = item.first; - /** Get input blob **/ - auto input = infer_request.GetBlob(input_name); - /** Fill input tensor with planes. First b channel, then g and r channels **/ -// ... -} -//! [part8] - -//! [part9] -auto output = infer_request1.GetBlob(output_name); -infer_request2.SetBlob(input_name, output); -//! [part9] - -//! [part10] -/** inputBlob points to input of a previous network and - cropROI contains coordinates of output bounding box **/ -InferenceEngine::Blob::Ptr inputBlob; -InferenceEngine::ROI cropRoi; -//... - -/** roiBlob uses shared memory of inputBlob and describes cropROI - according to its coordinates **/ -auto roiBlob = InferenceEngine::make_shared_blob(inputBlob, cropRoi); -infer_request2.SetBlob(input_name, roiBlob); -//! [part10] - -//! [part11] -/** Iterate over all input blobs **/ -for (auto & item : input_info) { - auto input_data = item.second; - /** Create input blob **/ - InferenceEngine::TBlob::Ptr input; - // assuming input precision was asked to be U8 in prev step - input = InferenceEngine::make_shared_blob( - InferenceEngine::TensorDesc(InferenceEngine::Precision::U8, input_data->getTensorDesc().getDims(), - input_data->getTensorDesc().getLayout())); - input->allocate(); - infer_request.SetBlob(item.first, input); - - /** Fill input tensor with planes. First b channel, then g and r channels **/ -// ... -} -//! [part11] - -//! [part12] -infer_request.StartAsync(); -infer_request.Wait(InferenceEngine::InferRequest::WaitMode::RESULT_READY); -//! [part12] - -auto sync_infer_request = executable_network.CreateInferRequest(); - -//! [part13] -sync_infer_request.Infer(); -//! [part13] - -//! [part14] - for (auto &item : output_info) { - auto output_name = item.first; - auto output = infer_request.GetBlob(output_name); - { - auto const memLocker = output->cbuffer(); // use const memory locker - // output_buffer is valid as long as the lifetime of memLocker - const float *output_buffer = memLocker.as(); - /** output_buffer[] - accessing output blob data **/ - } - } -//! [part14] - -return 0; -} diff --git a/docs/snippets/example_ngraph_utils.cpp b/docs/snippets/example_ngraph_utils.cpp index 185e9f425cf..6cf9faf21a4 100644 --- a/docs/snippets/example_ngraph_utils.cpp +++ b/docs/snippets/example_ngraph_utils.cpp @@ -20,56 +20,6 @@ #include // ! [ov:include] - -// ! [ov:create_simple_model] -std::shared_ptr create_simple_function() { - // This example shows how to create ov::Function - // - // Parameter--->Multiply--->Add--->Result - // Constant---' / - // Constant---' - - // Create opset8::Parameter operation with static shape - auto data = std::make_shared(ov::element::f32, ov::Shape{3, 1, 2}); - - auto mul_constant = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {1.5}); - auto mul = std::make_shared(data, mul_constant); - - auto add_constant = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {0.5}); - auto add = std::make_shared(mul, add_constant); - - // Create opset8::Result operation - auto res = std::make_shared(mul); - - // Create nGraph function - return std::make_shared(ov::ResultVector{res}, ov::ParameterVector{data}); -} -// ! [ov:create_simple_model] - -// ! [ov:create_advanced_model] -std::shared_ptr create_advanced_function() { - // Advanced example with multi output operation - // - // Parameter->Split---0-->Result - // | `--1-->Relu-->Result - // `----2-->Result - - auto data = std::make_shared(ov::element::f32, ov::Shape{1, 3, 64, 64}); - - // Create Constant for axis value - auto axis_const = ov::opset8::Constant::create(ov::element::i64, ov::Shape{}/*scalar shape*/, {1}); - - // Create opset8::Split operation that splits input to three slices across 1st dimension - auto split = std::make_shared(data, axis_const, 3); - - // Create opset8::Relu operation that takes 1st Split output as input - auto relu = std::make_shared(split->output(1)/*specify explicit output*/); - - // Results operations will be created automatically based on provided OutputVector - return std::make_shared(ov::OutputVector{split->output(0), relu, split->output(2)}, ov::ParameterVector{data}); -} -// ! [ov:create_advanced_model] - bool ngraph_api_examples(std::shared_ptr node) { { // ! [ngraph:ports_example] @@ -95,16 +45,6 @@ auto consumers = output.get_target_inputs(); // ! [ngraph:ports_example] } -{ -// ! [ngraph:shape] -auto partial_shape = node->input(0).get_partial_shape(); // get zero input partial shape -if (partial_shape.is_dynamic() /* or !partial_shape.is_static() */) { - return false; -} -auto static_shape = partial_shape.get_shape(); -// ! [ngraph:shape] -} - { // ! [ngraph:shape_check] auto partial_shape = node->input(0).get_partial_shape(); // get zero input partial shape diff --git a/docs/snippets/ov_infer_request.cpp b/docs/snippets/ov_infer_request.cpp new file mode 100644 index 00000000000..42be537252e --- /dev/null +++ b/docs/snippets/ov_infer_request.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// ! [ov:include] +#include +// ! [ov:include] + +int main() { +ov::Core core; +std::shared_ptr model; +ov::CompiledModel compiled_model; + +//! [create_infer_request] +auto infer_request = compiled_model.create_infer_request(); +//! [create_infer_request] + +//! [sync_infer] +infer_request.infer(); +//! [sync_infer] + +//! [async_infer] +infer_request.start_async(); +//! [async_infer] + +//! [wait] +infer_request.wait(); +//! [wait] + +//! [wait_for] +infer_request.wait_for(std::chrono::milliseconds(10)); +//! [wait_for] + +//! [set_callback] +infer_request.set_callback([&](std::exception_ptr ex_ptr) { + if (!ex_ptr) { + // all done. Output data can be processed. + // You can fill the input data and run inference one more time: + infer_request.start_async(); + } else { + // Something wrong, you can analyze exception_ptr + } +}); +//! [set_callback] + +//! [cancel] +infer_request.cancel(); +//! [cancel] + +{ +//! [get_set_one_tensor] +auto input_tensor = infer_request.get_input_tensor(); +auto output_tensor = infer_request.get_output_tensor(); +//! [get_set_one_tensor] +} + +{ +//! [get_set_index_tensor] +auto input_tensor = infer_request.get_input_tensor(0); +auto output_tensor = infer_request.get_output_tensor(1); +//! [get_set_index_tensor] +} + +//! [get_set_tensor] +auto tensor1 = infer_request.get_tensor("tensor_name1"); +ov::Tensor tensor2; +infer_request.set_tensor("tensor_name2", tensor2); +//! [get_set_tensor] + +{ +//! [get_set_tensor_by_port] +auto input_port = model->input(0); +auto output_port = model->output("tensor_name"); +ov::Tensor input_tensor; +infer_request.set_tensor(input_port, input_tensor); +auto output_tensor = infer_request.get_tensor(output_port); +//! [get_set_tensor_by_port] +} + +auto infer_request1 = compiled_model.create_infer_request(); +auto infer_request2 = compiled_model.create_infer_request(); + +//! [cascade_models] +auto output = infer_request1.get_output_tensor(0); +infer_request2.set_input_tensor(0, output); +//! [cascade_models] + +//! [roi_tensor] +/** input_tensor points to input of a previous network and + cropROI contains coordinates of output bounding box **/ +ov::Tensor input_tensor(ov::element::f32, ov::Shape({1, 3, 20, 20})); +ov::Coordinate begin({0, 0, 0, 0}); +ov::Coordinate end({1, 2, 3, 3}); +//... + +/** roi_tensor uses shared memory of input_tensor and describes cropROI + according to its coordinates **/ +ov::Tensor roi_tensor(input_tensor, begin, end); +infer_request2.set_tensor("input_name", roi_tensor); +//! [roi_tensor] + +{ +//! [remote_tensor] +ov::RemoteContext context = core.get_default_context("GPU"); +auto input_port = compiled_model.input("tensor_name"); +ov::RemoteTensor remote_tensor = context.create_tensor(input_port.get_element_type(), input_port.get_shape()); +infer_request.set_tensor(input_port, remote_tensor); +//! [remote_tensor] +} + +return 0; +} diff --git a/docs/snippets/ov_infer_request.py b/docs/snippets/ov_infer_request.py new file mode 100644 index 00000000000..c18ea6316f2 --- /dev/null +++ b/docs/snippets/ov_infer_request.py @@ -0,0 +1,97 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +#! [import] +import openvino.runtime as ov +#! [import] + +core = ov.Core() +model = core.read_model("model.xml") +compiled_model = core.compile_model(model, "AUTO") + +#! [create_infer_request] +infer_request = compiled_model.create_infer_request() +#! [create_infer_request] + +#! [sync_infer] +infer_request.infer() +#! [sync_infer] + +#! [async_infer] +infer_request.start_async() +#! [async_infer] + +#! [wait] +infer_request.wait() +#! [wait] + +#! [wait_for] +infer_request.wait_for(10) +#! [wait_for] + +#! [set_callback] +def callback(request, userdata): + request.start_async() + +infer_request.set_callback(callback) +#! [set_callback] + +#! [cancel] +infer_request.cancel() +#! [cancel] + +#! [get_set_one_tensor] +input_tensor = infer_request.get_input_tensor() +output_tensor = infer_request.get_output_tensor() +#! [get_set_one_tensor] + +#! [get_set_index_tensor] +input_tensor = infer_request.get_input_tensor(0) +output_tensor = infer_request.get_output_tensor(1) +#! [get_set_index_tensor] + +#! [get_set_name_tensor] +input_tensor = infer_request.get_tensor("input_name") +output_tensor = infer_request.get_tensor("output_name") +#! [get_set_name_tensor] + +#! [get_set_tensor] +tensor1 = infer_request.get_tensor("tensor_name1") +tensor2 = ov.Tensor() +infer_request.set_tensor("tensor_name2", tensor2) +#! [get_set_tensor] + +#! [get_set_tensor_by_port] +input_port = model.input(0) +output_port = model.input("tensor_name") +input_tensor = ov.Tensor() +infer_request.set_tensor(input_port, input_tensor) +output_tensor = infer_request.get_tensor(output_port) +#! [get_set_tensor_by_port] + +infer_request1 = compiled_model.create_infer_request() +infer_request2 = compiled_model.create_infer_request() + +#! [cascade_models] +output = infer_request1.get_output_tensor(0) +infer_request2.set_input_tensor(0, output) +#! [cascade_models] + +#! [roi_tensor] +# input_tensor points to input of a previous network and +# cropROI contains coordinates of output bounding box **/ +input_tensor = ov.Tensor(type=ov.Type.f32, shape=ov.Shape([1, 3, 20, 20])) +begin = [0, 0, 0, 0] +end = [1, 2, 3, 3] +# ... + +# roi_tensor uses shared memory of input_tensor and describes cropROI +# according to its coordinates **/ +roi_tensor = ov.Tensor(input_tensor, begin, end) +infer_request2.set_tensor("input_name", roi_tensor) +#! [roi_tensor] + +#! [remote_tensor] +# NOT SUPPORTED +#! [remote_tensor] diff --git a/docs/snippets/ov_model_snippets.cpp b/docs/snippets/ov_model_snippets.cpp index e5ccca530c6..12a8c732fc3 100644 --- a/docs/snippets/ov_model_snippets.cpp +++ b/docs/snippets/ov_model_snippets.cpp @@ -1,7 +1,14 @@ -// Copyright (C) 2018-2021 Intel Corporation +// Copyright (C) 2020 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +// ! [ov:include] +#include +#include +// ! [ov:include] + +#include +#include #include #include #include @@ -9,6 +16,107 @@ #include #include + +// ! [ov:create_simple_model] +std::shared_ptr create_simple_model() { + // This example shows how to create ov::Model + // + // Parameter--->Multiply--->Add--->Result + // Constant---' / + // Constant---' + + // Create opset8::Parameter operation with static shape + auto data = std::make_shared(ov::element::f32, ov::Shape{3, 1, 2}); + + auto mul_constant = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {1.5}); + auto mul = std::make_shared(data, mul_constant); + + auto add_constant = ov::opset8::Constant::create(ov::element::f32, ov::Shape{1}, {0.5}); + auto add = std::make_shared(mul, add_constant); + + // Create opset8::Result operation + auto res = std::make_shared(mul); + + // Create nGraph function + return std::make_shared(ov::ResultVector{res}, ov::ParameterVector{data}); +} +// ! [ov:create_simple_model] + +// ! [ov:create_advanced_model] +std::shared_ptr create_advanced_model() { + // Advanced example with multi output operation + // + // Parameter->Split---0-->Result + // | `--1-->Relu-->Result + // `----2-->Result + + auto data = std::make_shared(ov::element::f32, ov::Shape{1, 3, 64, 64}); + + // Create Constant for axis value + auto axis_const = ov::opset8::Constant::create(ov::element::i64, ov::Shape{} /*scalar shape*/, {1}); + + // Create opset8::Split operation that splits input to three slices across 1st dimension + auto split = std::make_shared(data, axis_const, 3); + + // Create opset8::Relu operation that takes 1st Split output as input + auto relu = std::make_shared(split->output(1) /*specify explicit output*/); + + // Results operations will be created automatically based on provided OutputVector + return std::make_shared(ov::OutputVector{split->output(0), relu, split->output(2)}, + ov::ParameterVector{data}); +} +// ! [ov:create_advanced_model] + +void ov_api_examples() { + std::shared_ptr node = std::make_shared(ov::element::f32, ov::PartialShape{ov::Dimension::dynamic(), 3, 64, 64}); + + // ! [ov:partial_shape] + ov::Shape static_shape; + ov::PartialShape partial_shape = node->output(0).get_partial_shape(); // get zero output partial shape + if (!partial_shape.is_dynamic() /* or partial_shape.is_static() */) { + static_shape = partial_shape.get_shape(); + } + // ! [ov:partial_shape] +} + +// ! [ov:serialize] +void serialize_example(const std::shared_ptr& f) { + // Need include: + // * openvino/pass/manager.hpp + // * openvino/pass/serialize.hpp + ov::pass::Manager manager; + + // Serialize ov::Model to IR + manager.register_pass("/path/to/file/model.xml", "/path/to/file/model.bin"); + + manager.run_passes(f); +} +// ! [ov:serialize] + +// ! [ov:visualize] +void visualize_example(const std::shared_ptr& m) { + // Need include: + // * openvino/pass/manager.hpp + // * openvino/pass/visualize_tree.hpp + ov::pass::Manager manager; + + // Serialize ov::Model to before.svg file before transformation + manager.register_pass("image.svg"); + + manager.run_passes(m); +} +// ! [ov:visualize] + +void model_inputs() { +std::shared_ptr model; +//! [all_inputs_ouputs] +/* Take information about all topology inputs */ +auto inputs = model->inputs(); +/* Take information about all topology outputs */ +auto outputs = model->outputs(); +//! [all_inputs_ouputs] +} + void pattern_matcher_examples(std::shared_ptr node) { { // ! [pattern:simple_example] diff --git a/docs/snippets/ov_model_snippets.py b/docs/snippets/ov_model_snippets.py new file mode 100644 index 00000000000..575ab07e80e --- /dev/null +++ b/docs/snippets/ov_model_snippets.py @@ -0,0 +1,88 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +#! [import] +import openvino.runtime as ov +#! [import] +import openvino.runtime.passes as passes + +# ! [ov:create_simple_model] +def create_simple_model(): + # This example shows how to create ov::Function + # + # Parameter--->Multiply--->Add--->Result + # Constant---' / + # Constant---' + data = ov.opset8.parameter([3, 1, 2], ov.Type.f32) + mul_constant = ov.opset8.constant([1.5], ov.Type.f32) + mul = ov.opset8.multiply(data, mul_constant) + add_constant = ov.opset8.constant([0.5], ov.Type.f32) + add = ov.opset8.add(mul, add_constant) + res = ov.opset8.result(add) + return ov.Model([res], [data], "model") +# ! [ov:create_simple_model] + +# ! [ov:create_advanced_model] +def create_advanced_model(): + # Advanced example with multi output operation + # + # Parameter->Split---0-->Result + # | `--1-->Relu-->Result + # `----2-->Result + data = ov.opset8.parameter(ov.Shape([1, 3, 64, 64]), ov.Type.f32) + # Create Constant for axis value + axis_const = ov.opset8.constant(ov.Type.i64, ov.Shape({}), [1]) + + # Create opset8::Split operation that splits input to three slices across 1st dimension + split = ov.opset8.split(data, axis_const, 3) + + # Create opset8::Relu operation that takes 1st Split output as input + relu = ov.opset8.relu(split.output(1)) + + # Results operations will be created automatically based on provided OutputVector + return ov.Model([split.output(0), relu, split.output[2]], [data], "model") +# ! [ov:create_advanced_model] + +def ov_api_examples(): + # Doesn't work + # node = ov.opset8.parameter(ov.PartialShape([ov.Dimension.dynamic(), 3, 64, 64]), np.float32) + node = ov.opset8.parameter(ov.PartialShape([ov.Dimension.dynamic(), ov.Dimension(3), ov.Dimension(64), ov.Dimension(64)]), np.float32) + + # it doesn't work: + # static_shape = ov.Shape() + # ! [ov:partial_shape] + partial_shape = node.output(0).get_partial_shape() # get zero output partial shape + if not partial_shape.is_dynamic: # or partial_shape.is_static + static_shape = partial_shape.get_shape() + # ! [ov:partial_shape] + +# ! [ov:serialize] +def serialize_example(m : ov.Model): + # Need import: + # * import openvino.runtime.passes as passes + pass_manager = passes.Manager() + pass_manager.register_pass(pass_name="Serialize", xml_path='model.xml', bin_path='model.bin') + pass_manager.run_passes(m) +# ! [ov:serialize] + +# ! [ov:visualize] +def visualize_example(m : ov.Model): + # Need import: + # * import openvino.runtime.passes as passes + pass_manager = passes.Manager() + pass_manager.register_pass(pass_name="VisualTree", file_name='image.svg') + pass_manager.run_passes(m) +# ! [ov:visualize] + +def model_inputs_outputs(model : ov.Model): + #! [all_inputs_ouputs] + inputs = model.inputs + outputs = model.outputs + #! [all_inputs_ouputs] + + +if __name__ == '__main__': + ov_api_examples() + create_simple_model() + create_advanced_model() diff --git a/docs/snippets/src/main.cpp b/docs/snippets/src/main.cpp new file mode 100644 index 00000000000..c600e063865 --- /dev/null +++ b/docs/snippets/src/main.cpp @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +//! [include] +#include +//! [include] + +int main() { +//! [part1] +ov::Core core; +//! [part1] + +ov::CompiledModel compiled_model; +{ +//! [part2_1] +ov::CompiledModel compiled_model = core.compile_model("model.xml", "AUTO"); +//! [part2_1] +} +{ +//! [part2_2] +ov::CompiledModel compiled_model = core.compile_model("model.onnx", "AUTO"); +//! [part2_2] +} +{ +//! [part2_3] +ov::CompiledModel compiled_model = core.compile_model("model.pdmodel", "AUTO"); +//! [part2_3] +} +{ +//! [part2_4] +auto create_model = []() { + std::shared_ptr model; + // To construct a model, please follow + // https://docs.openvino.ai/latest/openvino_docs_OV_Runtime_UG_Model_Representation.html + return model; +}; +std::shared_ptr model = create_model(); +compiled_model = core.compile_model(model, "AUTO"); +//! [part2_4] +} + +//! [part3] +ov::InferRequest infer_request = compiled_model.create_infer_request(); +//! [part3] + +void * memory_ptr = nullptr; +//! [part4] +// Get input port for model with one input +auto input_port = compiled_model.input(); +// Create tensor from external memory +ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), memory_ptr); +// Set input tensor for model with one input +infer_request.set_input_tensor(input_tensor); +//! [part4] + +//! [part5] +infer_request.start_async(); +infer_request.wait(); +//! [part5] + +//! [part6] +// Get output tensor by tensor name +auto output = infer_request.get_tensor("tensor_name"); +const float *output_buffer = output.data(); +/* output_buffer[] - accessing output tensor data */ +//! [part6] +return 0; +} diff --git a/docs/snippets/src/main.py b/docs/snippets/src/main.py new file mode 100644 index 00000000000..a86986d56be --- /dev/null +++ b/docs/snippets/src/main.py @@ -0,0 +1,58 @@ +# Copyright (C) 2018-2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +#! [import] +import openvino.runtime as ov +#! [import] + +#! [part1] +core = ov.Core() +#! [part1] + +#! [part2_1] +compiled_model = core.compile_model("model.xml", "AUTO") +#! [part2_1] +#! [part2_2] +compiled_model = core.compile_model("model.onnx", "AUTO") +#! [part2_2] +#! [part2_3] +compiled_model = core.compile_model("model.pdmodel", "AUTO") +#! [part2_3] +#! [part2_4] +def create_model(): + # This example shows how to create ov::Function + # + # To construct a model, please follow + # https://docs.openvino.ai/latest/openvino_docs_OV_Runtime_UG_Model_Representation.html + data = ov.opset8.parameter([3, 1, 2], ov.Type.f32) + res = ov.opset8.result(data) + return ov.Model([res], [data], "model") + +model = create_model() +compiled_model = core.compile_model(model, "AUTO") +#! [part2_4] + +#! [part3] +infer_request = compiled_model.create_infer_request() +#! [part3] + +memory = np.array([1, 2, 3, 4]) +#! [part4] +# Create tensor from external memory +input_tensor = ov.Tensor(array=memory, shared_memory=True) +# Set input tensor for model with one input +infer_request.set_input_tensor(input_tensor) +#! [part4] + +#! [part5] +infer_request.start_async() +infer_request.wait() +#! [part5] + +#! [part6] +# Get output tensor for model with one output +output = infer_request.get_output_tensor() +output_buffer = output.data +# output_buffer[] - accessing output tensor data +#! [part6] diff --git a/samples/c/hello_classification/README.md b/samples/c/hello_classification/README.md index f9daa132d69..a77c10f5f5f 100644 --- a/samples/c/hello_classification/README.md +++ b/samples/c/hello_classification/README.md @@ -26,7 +26,7 @@ Upon the start-up, the sample application reads command line parameters, loads s Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the Inference Engine with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -92,8 +92,8 @@ This sample is an API example, for any performance measurements please use the d ## See Also -- [Integrate the Inference Engine with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) -- [Using Inference Engine Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) +- [Integrate OpenVINO™ into Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) +- [Using OpenVINO™ Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/c/hello_nv12_input_classification/README.md b/samples/c/hello_nv12_input_classification/README.md index a3879a129d0..da71ed74e7f 100644 --- a/samples/c/hello_nv12_input_classification/README.md +++ b/samples/c/hello_nv12_input_classification/README.md @@ -25,7 +25,7 @@ image in the NV12 color format to an Inference Engine plugin. Then, the sample c application outputs data to the standard output stream. You can see the explicit description of -each sample step at [Integration Steps](https://docs.openvino.ai/latest/openvino_docs_IE_DG_Integrate_with_customer_application_new_API.html) section of "Integrate the Inference Engine with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -107,8 +107,8 @@ This sample is an API example, for any performance measurements please use the d ## See Also -- [Integrate the Inference Engine with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) -- [Using Inference Engine Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) +- [Integrate the OpenVINO™ into Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) +- [Using OpenVINO™ Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/cpp/classification_sample_async/README.md b/samples/cpp/classification_sample_async/README.md index c9f601cc4c8..fbddfc4df2b 100644 --- a/samples/cpp/classification_sample_async/README.md +++ b/samples/cpp/classification_sample_async/README.md @@ -36,7 +36,7 @@ After that, the application starts inference for the first infer request and wai When inference is done, the application outputs data to the standard output stream. You can place labels in .labels file near the model to get pretty output. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -172,7 +172,7 @@ classid probability ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/cpp/hello_classification/README.md b/samples/cpp/hello_classification/README.md index 27af315a355..f0ee0f343b0 100644 --- a/samples/cpp/hello_classification/README.md +++ b/samples/cpp/hello_classification/README.md @@ -26,7 +26,7 @@ The following C++ API is used in the application: At startup, the sample application reads command line parameters, prepares input data, loads a specified model and image to the OpenVINO™ Runtime plugin and performs synchronous inference. Then processes output data and write it to a standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -116,7 +116,7 @@ classid probability ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/cpp/hello_nv12_input_classification/README.md b/samples/cpp/hello_nv12_input_classification/README.md index 83fd42793a0..c1a78d56893 100644 --- a/samples/cpp/hello_nv12_input_classification/README.md +++ b/samples/cpp/hello_nv12_input_classification/README.md @@ -25,7 +25,7 @@ Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](.. At startup, the sample application reads command line parameters, loads the specified model and an image in the NV12 color format to an OpenVINO™ Runtime plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application outputs data to the standard output stream. You can place labels in .labels file near the model to get pretty output. -You can see the explicit description of each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +You can see the explicit description of each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -130,7 +130,7 @@ classid probability ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/cpp/hello_query_device/README.md b/samples/cpp/hello_query_device/README.md index d783f234627..202f733542b 100644 --- a/samples/cpp/hello_query_device/README.md +++ b/samples/cpp/hello_query_device/README.md @@ -90,5 +90,5 @@ The application prints all available devices with their supported metrics and de ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) diff --git a/samples/cpp/hello_reshape_ssd/README.md b/samples/cpp/hello_reshape_ssd/README.md index ef9d21ba169..cdd0dcfe421 100644 --- a/samples/cpp/hello_reshape_ssd/README.md +++ b/samples/cpp/hello_reshape_ssd/README.md @@ -27,7 +27,7 @@ Upon the start-up the sample application reads command line parameters, loads sp Engine plugin. Then, the sample creates an synchronous inference request object. When inference is done, the application creates output image and output data to the standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -116,7 +116,7 @@ This sample is an API example, for any performance measurements please use the d ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/cpp/model_creation_sample/README.md b/samples/cpp/model_creation_sample/README.md index e0edc2c3625..542d6d82ec0 100644 --- a/samples/cpp/model_creation_sample/README.md +++ b/samples/cpp/model_creation_sample/README.md @@ -33,7 +33,7 @@ At startup, the sample application does the following: - Loads the model and input data to the OpenVINO™ Runtime plugin - Performs synchronous inference and processes output data, logging each step in a standard output stream -You can see the explicit description of each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +You can see the explicit description of each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Building @@ -178,6 +178,6 @@ classid probability label ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/cpp/speech_sample/README.md b/samples/cpp/speech_sample/README.md index dde7d168d04..0f440ba767d 100644 --- a/samples/cpp/speech_sample/README.md +++ b/samples/cpp/speech_sample/README.md @@ -31,7 +31,7 @@ At startup, the sample application reads command-line parameters, loads a specif If the `-r` option is given, error statistics are provided for each speech utterance as shown above. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ### GNA-specific details @@ -237,7 +237,7 @@ All of mentioned files can be downloaded from [https://storage.openvinotoolkit.o ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/python/classification_sample_async/README.md b/samples/python/classification_sample_async/README.md index 98c38a47166..24f4095001a 100644 --- a/samples/python/classification_sample_async/README.md +++ b/samples/python/classification_sample_async/README.md @@ -23,7 +23,7 @@ Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample At startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image(s) to the OpenVINO™ Runtime plugin, performs synchronous inference, and processes output data, logging each step in a standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Running @@ -135,7 +135,7 @@ The sample application logs each step in a standard output stream and outputs to ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/python/hello_classification/README.md b/samples/python/hello_classification/README.md index 71f15ba8315..a5dd902fc1b 100644 --- a/samples/python/hello_classification/README.md +++ b/samples/python/hello_classification/README.md @@ -24,7 +24,7 @@ The following Python API is used in the application: At startup, the sample application reads command-line parameters, prepares input data, loads a specified model and image to the OpenVINO™ Runtime plugin, performs synchronous inference, and processes output data, logging each step in a standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Running @@ -98,7 +98,7 @@ The sample application logs each step in a standard output stream and outputs to ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/python/hello_reshape_ssd/README.md b/samples/python/hello_reshape_ssd/README.md index ef16766af1f..f7bc8e6aba7 100644 --- a/samples/python/hello_reshape_ssd/README.md +++ b/samples/python/hello_reshape_ssd/README.md @@ -24,7 +24,7 @@ At startup, the sample application reads command-line parameters, prepares input As a result, the program creates an output image, logging each step in a standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Running @@ -86,7 +86,7 @@ The sample application logs each step in a standard output stream and creates an ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/python/model_creation_sample/README.md b/samples/python/model_creation_sample/README.md index d50cbf20a8e..068cb25894c 100644 --- a/samples/python/model_creation_sample/README.md +++ b/samples/python/model_creation_sample/README.md @@ -28,7 +28,7 @@ At startup, the sample application does the following: You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## Running @@ -127,7 +127,7 @@ The sample application logs each step in a standard output stream and outputs 10 ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/samples/python/speech_sample/README.md b/samples/python/speech_sample/README.md index 1f460379557..3c8dfa99b4c 100644 --- a/samples/python/speech_sample/README.md +++ b/samples/python/speech_sample/README.md @@ -29,7 +29,7 @@ Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample At startup, the sample application reads command-line parameters, loads a specified model and input data to the OpenVINO™ Runtime plugin, performs synchronous inference on all speech utterances stored in the input file, logging each step in a standard output stream. You can see the explicit description of -each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) section of "Integrate the OpenVINO™ Runtime with Your Application" guide. +each sample step at [Integration Steps](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) section of "Integrate OpenVINO™ Runtime with Your Application" guide. ## GNA-specific details @@ -328,7 +328,7 @@ The sample application logs each step in a standard output stream. ## See Also -- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/Integrate_with_customer_application_new_API.md) +- [Integrate the OpenVINO™ Runtime with Your Application](../../../docs/OV_Runtime_UG/integrate_with_your_application.md) - [Using OpenVINO™ Toolkit Samples](../../../docs/OV_Runtime_UG/Samples_Overview.md) - [Model Downloader](@ref omz_tools_downloader) - [Model Optimizer](../../../docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) diff --git a/tools/pot/openvino/tools/pot/api/README.md b/tools/pot/openvino/tools/pot/api/README.md index 728711284f3..06b9af6a165 100644 --- a/tools/pot/openvino/tools/pot/api/README.md +++ b/tools/pot/openvino/tools/pot/api/README.md @@ -52,7 +52,7 @@ The POT Python* API for model optimization can be used in the following cases: - [Accuracy Checker](@ref omz_tools_accuracy_checker) tool does not support the model or dataset. - POT does not support the model in the [Simplified Mode](@ref pot_docs_BestPractices) or produces the optimized model with low accuracy in this mode. -- You already have the Python* script to validate the accuracy of the model using the [OpenVINO™ Runtime](@ref openvino_docs_OV_Runtime_User_Guide). +- You already have the Python* script to validate the accuracy of the model using the [OpenVINO™ Runtime](@ref openvino_docs_OV_Runtime_User_Guide). ## Examples From 4f6ca1b85fa0cee82f8f5591dca07731a95e713c Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 2 Mar 2022 21:30:44 +0300 Subject: [PATCH 159/310] Docs: update some rendering stuff (#10742) * Fixed small rendering issues * Updated picture * Give better name for stateful models * Removed the document --- docs/OV_Runtime_UG/API_Changes.md | 9 --------- .../migration_ov_2_0/common_inference_pipeline.md | 2 +- docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md | 4 ++-- docs/OV_Runtime_UG/migration_ov_2_0/intro.md | 2 +- docs/OV_Runtime_UG/network_state_intro.md | 2 +- docs/OV_Runtime_UG/openvino_intro.md | 1 - docs/img/tf_openvino.png | 4 ++-- 7 files changed, 7 insertions(+), 17 deletions(-) delete mode 100644 docs/OV_Runtime_UG/API_Changes.md diff --git a/docs/OV_Runtime_UG/API_Changes.md b/docs/OV_Runtime_UG/API_Changes.md deleted file mode 100644 index 3f1c0bf42e2..00000000000 --- a/docs/OV_Runtime_UG/API_Changes.md +++ /dev/null @@ -1,9 +0,0 @@ -# OpenVINO™ Runtime API Changes History {#openvino_docs_OV_Runtime_API_Changes} - -The sections below contain detailed list of changes made to the OpenVINO™ Runtime API in recent releases. - -## 2022.1 - -### New API - -* The OpenVINO™ 2.0 API was introduced. diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md b/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md index fe1b0d3541a..72970192bda 100644 --- a/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md +++ b/docs/OV_Runtime_UG/migration_ov_2_0/common_inference_pipeline.md @@ -95,7 +95,7 @@ Inference Engine API fills inputs as `I32` precision (**not** aligned with the o @endsphinxdirective -OpenVINO™ Runtime API 2.0 fills inputs as `I64` precision (aligned with the original model):: +OpenVINO™ Runtime API 2.0 fills inputs as `I64` precision (aligned with the original model): @sphinxdirective diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md b/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md index 1286d2d6746..2e19b825c4c 100644 --- a/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md +++ b/docs/OV_Runtime_UG/migration_ov_2_0/configure_devices.md @@ -88,13 +88,13 @@ Inference Engine API: .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp :language: cpp - :fragment: [executable_network_get_metric] + :fragment: [executable_network_get_config] .. tab:: Execution metrics .. doxygensnippet:: docs/snippets/ov_properties_migration.cpp :language: cpp - :fragment: [executable_network_get_config] + :fragment: [executable_network_get_metric] @endsphinxdirective diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md index 74dfa850149..f42548e2c4a 100644 --- a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md +++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md @@ -18,7 +18,7 @@ Older versions of OpenVINO (prior to 2022.1) required to change the logic of applications when an user migrates from the frameworks like TensorFlow, ONNX Runtime, PyTorch, PaddlePaddle, etc. The change of application's logic is connected with: - Model Optimizer changed input precisions for some inputs. For example, neural langauge processing models with `I64` input are becoming to have `I32` input element type. -- Model Optimizer changed layouts for TensorFlow models ((see [Layouts in OpenVINO](../layout_overview.md))). It leads to unexpected user behavior that a user needs to use a different layout for its input data with compare to the framework: +- Model Optimizer changed layouts for TensorFlow models (see [Layouts in OpenVINO](../layout_overview.md)). It leads to unexpected user behavior that a user needs to use a different layout for its input data with compare to the framework: ![tf_openvino] - Inference Engine API (`InferenceEngine::CNNNetwork`) also applied some conversion rules for input and output precisions because of device plugins limitations. - Users need to specify input shapes during model conversions in Model Optimizer and work with static shapes in the application. diff --git a/docs/OV_Runtime_UG/network_state_intro.md b/docs/OV_Runtime_UG/network_state_intro.md index cf76e83ac41..1721f6145e6 100644 --- a/docs/OV_Runtime_UG/network_state_intro.md +++ b/docs/OV_Runtime_UG/network_state_intro.md @@ -1,4 +1,4 @@ -Introduction to OpenVINO state API {#openvino_docs_IE_DG_network_state_intro} +Stateful models {#openvino_docs_IE_DG_network_state_intro} ============================== This section describes how to work with stateful networks in OpenVINO toolkit, specifically: diff --git a/docs/OV_Runtime_UG/openvino_intro.md b/docs/OV_Runtime_UG/openvino_intro.md index d5f79b4be5f..349535a262b 100644 --- a/docs/OV_Runtime_UG/openvino_intro.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -21,7 +21,6 @@ openvino_docs_IE_DG_network_state_intro openvino_2_0_transition_guide openvino_docs_OV_Should_be_in_performance - openvino_docs_OV_Runtime_API_Changes @endsphinxdirective diff --git a/docs/img/tf_openvino.png b/docs/img/tf_openvino.png index 62e43e48907..21a62a5f22d 100644 --- a/docs/img/tf_openvino.png +++ b/docs/img/tf_openvino.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c369ce9a1b7f24929aa2f7d954ff577e0f439ea049296dd13741838b91615f38 -size 47087 +oid sha256:21e4d7d0c2de5de20bee14196641e8efacd62ef732a8bfa765e7928d78f611a0 +size 86782 From 3318dd6c687f5b353be808d0581b3dd6d8fd1ff8 Mon Sep 17 00:00:00 2001 From: Nico Galoppo Date: Wed, 2 Mar 2022 13:36:02 -0800 Subject: [PATCH 160/310] Fix MacOS DYLD_LIBRARY_PATH export (#10750) --- scripts/setupvars/setupvars.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/setupvars/setupvars.sh b/scripts/setupvars/setupvars.sh index bc46b5aac94..ab186881dc6 100755 --- a/scripts/setupvars/setupvars.sh +++ b/scripts/setupvars/setupvars.sh @@ -34,7 +34,7 @@ if [ -e "$INSTALLDIR/runtime" ]; then export HDDL_INSTALL_DIR=$INSTALLDIR/runtime/3rdparty/hddl if [[ "$OSTYPE" == "darwin"* ]]; then - export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:DYLD_LIBRARY_PATH} + export DYLD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} export LD_LIBRARY_PATH=${IE_PLUGINS_PATH}/Release:${IE_PLUGINS_PATH}/Debug${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} else export LD_LIBRARY_PATH=$HDDL_INSTALL_DIR/lib:${IE_PLUGINS_PATH}${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} @@ -49,7 +49,7 @@ fi if [ -e "$INSTALLDIR/runtime/3rdparty/tbb" ]; then if [[ "$OSTYPE" == "darwin"* ]]; then - export DYLD_LIBRARY_PATH=$INSTALLDIR/runtime/3rdparty/tbb/lib:${DYLD_LIBRARY_PATH:+:DYLD_LIBRARY_PATH} + export DYLD_LIBRARY_PATH=$INSTALLDIR/runtime/3rdparty/tbb/lib:${DYLD_LIBRARY_PATH:+:$DYLD_LIBRARY_PATH} fi export LD_LIBRARY_PATH=$INSTALLDIR/runtime/3rdparty/tbb/lib:${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} export TBB_DIR=$INSTALLDIR/runtime/3rdparty/tbb/cmake From 7ba71f9c201dcae068aa05933a984d76b218bdf2 Mon Sep 17 00:00:00 2001 From: FanJiangIntel Date: Thu, 3 Mar 2022 12:39:52 +0800 Subject: [PATCH 161/310] Enable apivalidator check when BUILD_SHARED_LIBS=OFF (#10461) * enable apivalidator for static build * add target _ie_plugins_hpp as dependency of inference_engine_obj --- cmake/developer_package/api_validator/api_validator.cmake | 6 ------ cmake/developer_package/plugins/plugins.cmake | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/cmake/developer_package/api_validator/api_validator.cmake b/cmake/developer_package/api_validator/api_validator.cmake index 6e073b53ad8..33a21696a9e 100644 --- a/cmake/developer_package/api_validator/api_validator.cmake +++ b/cmake/developer_package/api_validator/api_validator.cmake @@ -51,12 +51,6 @@ endfunction() set(VALIDATED_LIBRARIES "" CACHE INTERNAL "") function(_ov_add_api_validator_post_build_step) - if(NOT BUILD_SHARED_LIBS) - # since _ov_add_api_validator_post_build_step - # is currently run only on shared libraries, we have nothing to test - return() - endif() - set(UWP_API_VALIDATOR_APIS "${PROGRAMFILES}/Windows Kits/10/build/universalDDIs/x64/UniversalDDIs.xml") set(UWP_API_VALIDATOR_EXCLUSION "${UWP_SDK_PATH}/BinaryExclusionlist.xml") diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake index 90a1b96dbbf..a630d6c354e 100644 --- a/cmake/developer_package/plugins/plugins.cmake +++ b/cmake/developer_package/plugins/plugins.cmake @@ -347,7 +347,7 @@ function(ie_generate_plugins_hpp) # for some reason dependency on source files does not work # so, we have to use explicit target and make it dependency for inference_engine add_custom_target(_ie_plugins_hpp DEPENDS ${ie_plugins_hpp}) - add_dependencies(inference_engine _ie_plugins_hpp) + add_dependencies(inference_engine_obj _ie_plugins_hpp) # add dependency for object files get_target_property(sources inference_engine_obj SOURCES) From 1c5e76c4db274003bd3f942b95b5a2e43a0f4a0c Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Thu, 3 Mar 2022 09:00:28 +0300 Subject: [PATCH 162/310] Dynamic Shapes Documentation (#10656) * Added draft of Dynamic Shapes Doc * Better wording Co-authored-by: Ilya Churaev * Apply suggestions from code review Better wording, grammar, technical fixes. No significant content rework. Co-authored-by: Andrey Zaytsev Co-authored-by: Evgenya Stepyreva * Removed indentation in dynamic shapes snippets * Split dynamic shapes doc to two separate files, added more examples, fixed code review comments, connected to TOC * Fix links * Added aux doc to toc to avoid crash in docs build in CI * Added dynamicbatching in temp section * Apply suggestions from code review * Removed old DynamicBatching document * Applied @myshevts changes * Update docs/OV_Runtime_UG/ov_without_dynamic_shapes.md * Update ov_dynamic_shapes.md * Fix links to dynamic shapes doc Co-authored-by: Ilya Churaev Co-authored-by: Andrey Zaytsev Co-authored-by: Evgenya Stepyreva --- docs/OV_Runtime_UG/DynamicBatching.md | 106 ------------ docs/OV_Runtime_UG/migration_ov_2_0/intro.md | 6 +- docs/OV_Runtime_UG/openvino_intro.md | 2 +- docs/OV_Runtime_UG/openvino_temporary.md | 1 + docs/OV_Runtime_UG/ov_dynamic_shapes.md | 122 ++++++++++++++ .../ov_without_dynamic_shapes.md | 44 +++++ docs/snippets/ov_dynamic_shapes.cpp | 157 ++++++++++++++++++ 7 files changed, 328 insertions(+), 110 deletions(-) delete mode 100644 docs/OV_Runtime_UG/DynamicBatching.md create mode 100644 docs/OV_Runtime_UG/ov_dynamic_shapes.md create mode 100644 docs/OV_Runtime_UG/ov_without_dynamic_shapes.md create mode 100644 docs/snippets/ov_dynamic_shapes.cpp diff --git a/docs/OV_Runtime_UG/DynamicBatching.md b/docs/OV_Runtime_UG/DynamicBatching.md deleted file mode 100644 index 5773af94128..00000000000 --- a/docs/OV_Runtime_UG/DynamicBatching.md +++ /dev/null @@ -1,106 +0,0 @@ -# Working with dynamic shapes {#openvino_docs_IE_DG_DynamicBatching} - -## Using Dynamic Batching (C++) - -@sphinxdirective -.. raw:: html - -
C++
-@endsphinxdirective - -The Dynamic Batching feature allows you to dynamically change batch size for inference calls -within a preset batch size limit. This feature might be useful when batch size is unknown beforehand and using an extra-large batch size is undesirable or impossible due to resource limitations. For example, applying face detection and then mood labeling to a video, you won't know in advance how many frames will contain a face when you pass inferencing results to a secondary model. - - -You can activate Dynamic Batching by setting `KEY_DYN_BATCH_ENABLED` flag to `YES` in a configuration map that is -passed to the plugin while loading a network. -This configuration creates an `ExecutableNetwork` object that will allow setting batch size -dynamically in all of its infer requests using `SetBatch()` method. -The batch size that was set in the passed `CNNNetwork` object will be used as a maximum batch size limit. - -Here is a code example: - -@snippet snippets/DynamicBatching.cpp part0 - - -### Limitations - -Currently, there are certain limitations for the use of Dynamic Batching exist: - -* Use Dynamic Batching with CPU and GPU plugins only. -* Use Dynamic Batching on topologies that consist of certain layers only: - * Convolution - * Deconvolution - * Activation - * LRN - * Pooling - * FullyConnected - * SoftMax - * Split - * Concatenation - * Power - * Eltwise - * Crop - * BatchNormalization - * Copy - -The following types of layers are not supported: - -* Layers that might arbitrary change tensor shape (such as Flatten, Permute, Reshape) -* Layers specific to object detection topologies (ROIPooling, ProirBox, DetectionOutput) -* Custom layers - -Topology analysis is performed during the process of loading a network into plugin, and if the topology is not supported, an exception is generated. - -## Using Dynamic Batching (Python) - -@sphinxdirective -.. raw:: html - -
Python
-@endsphinxdirective - -Dynamic Batching is a feature that allows you to dynamically change batch size for inference calls within a preset batch size limit. This feature might be useful when batch size is unknown beforehand, and using extra large batch size is not desired or impossible due to resource limitations. For example, face detection with person age, gender, or mood recognition is a typical usage scenario. - -You can activate Dynamic Batching by setting the "DYN_BATCH_ENABLED" flag to "YES" in a configuration map that is passed to the plugin while loading a network. This configuration creates an `ExecutableNetwork` object that will allow setting batch size dynamically in all of its infer requests using the [ie_api.batch_size](api/ie_python_api/_autosummary/openvino.inference_engine.IENetwork.html#openvino.inference_engine.IENetwork.batch_size) method. The batch size that was set in the passed CNNNetwork object will be used as a maximum batch size limit. - -```python -from openvino.inference_engine import IECore - -ie = IECore() -dyn_config = {"DYN_BATCH_ENABLED": "YES"} -ie.set_config(config=dyn_config, device_name=device) -# Read a network in IR or ONNX format -net = ie.read_network(path_to_model) -net.batch_size = 32 # set the maximum batch size to 32 -exec_net = ie.load_network(network=net, device_name=device) -``` - -### Limitations - -Currently, certain limitations for the use of Dynamic Batching exist: - -* Use Dynamic Batching with CPU and GPU plugins only. -* Use Dynamic Batching on topologies that consist of certain layers only: - * Convolution - * Deconvolution - * Activation - * LRN - * Pooling - * FullyConnected - * SoftMax - * Split - * Concatenation - * Power - * Eltwise - * Crop - * BatchNormalization - * Copy - -The following types of layers are not supported: - -* Layers that might arbitrary change tensor shape (such as Flatten, Permute, Reshape) -* Layers specific to object detection topologies (ROIPooling, ProirBox, DetectionOutput) -* Custom layers - -Topology analysis is performed during the process of loading a network into plugin, and if the topology is not supported, an exception is generated. \ No newline at end of file diff --git a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md index f42548e2c4a..fa0010fbb51 100644 --- a/docs/OV_Runtime_UG/migration_ov_2_0/intro.md +++ b/docs/OV_Runtime_UG/migration_ov_2_0/intro.md @@ -25,11 +25,11 @@ Older versions of OpenVINO (prior to 2022.1) required to change the logic of app OpenVINO Runtime API 2.0 is introduced to align logic of working with model as it is done in the frameworks - no layout and precision changes, operates with tensor names and indeces to address inputs and outputs. OpenVINO Runtime is composed of Inference Engine API used for inference and ngraph API targeted to work with models, operations. The OpenVINO API 2.0 has common structure, naming convention styles, namespaces, removes duplicated structures. See [How to migrate to OpenVINO 2.0 API](./common_inference_pipeline.md) for details. -> **NOTE**: Most important is that your existing application can continue working with OpenVINO Runtime 2.0 as it used to be, but we recommend migration to new API to unlock additional features like [Preprocessing](../preprocessing_overview.md) and [Dynamic shapes support](../DynamicBatching.md). +> **NOTE**: Most important is that your existing application can continue working with OpenVINO Runtime 2.0 as it used to be, but we recommend migration to new API to unlock additional features like [Preprocessing](../preprocessing_overview.md) and [Dynamic shapes support](../ov_dynamic_shapes.md). ### Introduce IR v11 -To support these features, OpenVINO introduced IR v11 which is generated by Model Optimizer by default since 2022.1. The model represented in IR v11 fully matches the original model in a original framework format in terms of inputs and outputs. Also, a user does not have to specify input shapes during the conversion, so the resulting IR v11 contains `-1` to denote undefined dimensions (see [Working with dynamic shapes](../DynamicBatching.md) to fully utilize this feature; or [Changning input shapes](../ShapeInference.md) to reshape to static shapes in the application). +To support these features, OpenVINO introduced IR v11 which is generated by Model Optimizer by default since 2022.1. The model represented in IR v11 fully matches the original model in a original framework format in terms of inputs and outputs. Also, a user does not have to specify input shapes during the conversion, so the resulting IR v11 contains `-1` to denote undefined dimensions (see [Working with dynamic shapes](../ov_dynamic_shapes.md) to fully utilize this feature; or [Changning input shapes](../ShapeInference.md) to reshape to static shapes in the application). What is also important to mention - the IR v11 is fully compatible with old applications written with Inference Engine API from older versions of OpenVINO. This is achieved by adding additional runtime information to the IR v11 which is responsible for backwark compatible behavior. So, once the IR v11 is read by the old Inference Engine based application, it's internally converted to IR v10 to provide backward-compatible behavior. @@ -50,7 +50,7 @@ But the following OpenVINO tools don't support IR v10 as an input, they require ### Differences between Inference Engine and OpenVINO Runtime 2.0 Inference Engine and ngraph APIs are not deprecated, they are fully functional and can be used in applications. But OpenVINO recommends users to migrate to new OpenVINO Runtime API 2.0, because it already has additional features and this list will be extended later. The following list of additional features is supported by new API: -- [Working with dynamic shapes](../DynamicBatching.md). The feature is quite usefull for best performance for NLP (Neural Language Processing) models, super resolution models and other which accepts dynamic input shapes. +- [Working with dynamic shapes](../ov_dynamic_shapes.md). The feature is quite usefull for best performance for NLP (Neural Language Processing) models, super resolution models and other which accepts dynamic input shapes. - [Preprocessing of the model](../preprocessing_overview.md) to add preprocessing operations to the inference models and fully ocupay the accelerator and free CPU resources. To define a difference on the API level between Inference Engine and OpenVINO RUntime API 2.0, let's define two types of behaviors: diff --git a/docs/OV_Runtime_UG/openvino_intro.md b/docs/OV_Runtime_UG/openvino_intro.md index 349535a262b..a4a7ae8a716 100644 --- a/docs/OV_Runtime_UG/openvino_intro.md +++ b/docs/OV_Runtime_UG/openvino_intro.md @@ -13,7 +13,7 @@ openvino_docs_IE_DG_ShapeInference openvino_docs_OV_UG_Working_with_devices openvino_docs_OV_Runtime_UG_Preprocessing_Overview - openvino_docs_IE_DG_DynamicBatching + openvino_docs_OV_UG_DynamicShapes openvino_docs_IE_DG_supported_plugins_AUTO openvino_docs_OV_UG_Running_on_multiple_devices openvino_docs_OV_UG_Hetero_execution diff --git a/docs/OV_Runtime_UG/openvino_temporary.md b/docs/OV_Runtime_UG/openvino_temporary.md index aa2c6adb66f..203f9dfd1e6 100644 --- a/docs/OV_Runtime_UG/openvino_temporary.md +++ b/docs/OV_Runtime_UG/openvino_temporary.md @@ -12,6 +12,7 @@ openvino_docs_IE_DG_Model_caching_overview openvino_docs_IE_DG_Int8Inference openvino_docs_IE_DG_Bfloat16Inference + openvino_docs_OV_UG_NoDynamicShapes @endsphinxdirective diff --git a/docs/OV_Runtime_UG/ov_dynamic_shapes.md b/docs/OV_Runtime_UG/ov_dynamic_shapes.md new file mode 100644 index 00000000000..a2c046c961c --- /dev/null +++ b/docs/OV_Runtime_UG/ov_dynamic_shapes.md @@ -0,0 +1,122 @@ +# Dynamic Shapes {#openvino_docs_OV_UG_DynamicShapes} + +As it was demonstrated in the [Changing Input Shapes](ShapeInference.md) article, there are models that support changing of input shapes before model compilation in `Core::compile_model`. +Reshaping models provides an ability to customize the model input shape for exactly that size that is required in the end application. +This article explains how the ability of model to reshape can further be leveraged in more dynamic scenarios. + + +## When to Apply Dynamic Shapes + +Conventional "static" model reshaping works well when it can be done once per many model inference calls with the same shape. +However, this approach doesn't perform efficiently if the input tensor shape is changed on every inference call: calling `reshape()` and `compile_model()` each time when a new size comes is extremely time-consuming. +A popular example would be an inference of natural language processing models (like BERT) with arbitrarily-sized input sequences that come from the user. +In this case, the sequence length cannot be predicted and may change every time you need to call inference. +Below, such dimensions that can be frequently changed are called *dynamic dimensions*. +When real shape of input is not known at `compile_model` time, that's the case when dynamic shapes should be considered. + +Here are several examples of dimensions that can be naturally dynamic: + - Sequence length dimension for various sequence processing models, like BERT + - Spatial dimensions in segmentation and style transfer models + - Batch dimension + - Arbitrary number of detections in object detection models output + +There are various tricks to address input dynamic dimensions through combining multiple pre-reshaped models and input data padding. +The tricks are sensitive to model internals, do not always give optimal performance and cumbersome. +Short overview of the methods you can find [here](ov_without_dynamic_shapes.md). +Apply those methods only if native dynamic shape API described in the following sections doesn't work for you or doesn't give desired performance. + +The decision about using dynamic shapes should be based on proper benchmarking of real application with real data. +That's because unlike statically shaped models, inference of dynamically shaped ones takes different inference time depending on input data shape or input tensor content. + +## Dynamic Shapes without Tricks + +This section describes how to handle dynamically shaped models natively with OpenVINO Runtime API version 2022.1 and higher. +There are three main parts in the flow that differ from static shapes: + - configure the model + - prepare data for inference + - read resulting data after inference + +### Configure the Model + +To avoid the tricks mentioned in the previous section there is a way to directly specify one or multiple dimensions in the model inputs to be dynamic. +This is achieved with the same reshape method that is used for alternating static shape of inputs. +Dynamic dimensions are specified as `-1` or `ov::Dimension()` instead of a positive number used for static dimensions: + +@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:reshape_undefined + +To simplify the code, the examples assume that the model has a single input and single output. +However, there are no limitations on the number of inputs and outputs to apply dynamic shapes. + +### Undefined Dimensions "Out Of the Box" + +Dynamic dimensions may appear in the input model without calling reshape. +Many DL frameworks support undefined dimensions. +If such a model is converted with Model Optimizer or read directly by Core::read_model, undefined dimensions are preserved. +Such dimensions automatically treated as dynamic ones. +So you don't need to call reshape if undefined dimensions are already configured in the original model or in the IR file. + +If the input model has undefined dimensions that you are not going to change during the inference, you can set them to static values, using the same `reshape` method of the model. +From the API perspective any combination of dynamic and static dimensions can be configured. + +Model Optimizer provides capability to reshape the model during the conversion, including specifying dynamic dimensions. +Use this capability to save time on calling `reshape` method in the end application. + +### Dimension Bounds + +Besides marking a dimension just dynamic, you can also specify lower and/or upper bounds that define a range of allowed values for the dimension. +Bounds are coded as arguments for `ov::Dimension`: + +@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:reshape_bounds + +Information about bounds gives opportunity for the inference plugin to apply additional optimizations. +Using dynamic shapes assumes the plugins apply more loose optimization technique during model compilation +It may require more time/memory for model compilation and inference. +So providing any additional information like bounds can be beneficial. +For the same reason it is not recommended to leave dimensions as undefined without the real need. + +When specifying bounds, the lower bound is not so important as upper bound, because knowing of upper bound allows inference devices to more precisely allocate memory for intermediate tensors for inference and use lesser number of tuned kernels for different sizes. +Precisely speaking benefits of specifying lower or upper bound is device dependent. +Depending on the plugin specifying upper bounds can be required. +. +If users known lower and upper bounds for dimension it is recommended to specify them even when plugin can execute model without the bounds. + +### Setting Input Tensors + +Preparing model with the reshape method was the first step. +The second step is passing a tensor with an appropriate shape to infer request. +This is similar to [regular steps](integrate_with_your_application.md), but now we can pass tensors with different shapes for the same executable model and even for the same inference request: + +@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:set_input_tensor + +In the example above `set_input_tensor` is used to specify input tensors. +The real dimensions of the tensor is always static, because it is a concrete tensor and it doesn't have any dimension variations in contrast to model inputs. + +Similar to static shapes, `get_input_tensor` can be used instead of `set_input_tensor`. +In contrast to static input shapes, when using `get_input_tensor` for dynamic inputs, `set_shape` method for the returned tensor should be called to define the shape and allocate memory. +Without doing that, the tensor returned by `get_input_tensor` is an empty tensor, it's shape is not initialized and memory is not allocated, because infer request doesn't have information about real shape you are going to feed. +Setting shape for input tensor is required when the corresponding input has at least one dynamic dimension regardless of bounds information. +The following example makes the same sequence of two infer request as the previous example but using `get_input_tensor` instead of `set_input_tensor`: + +@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:get_input_tensor + +### Dynamic Shapes in Outputs + +Examples above handle correctly case when dynamic dimensions in output may be implied by propagating of dynamic dimension from the inputs. +For example, batch dimension in input shape is usually propagated through the whole model and appears in the output shape. +The same is true for other dimensions, like sequence length for NLP models or spatial dimensions for segmentation models, that are propagated through the entire network. + +Whether or not output has dynamic dimensions can be examined by querying output partial shape after model read or reshape. +The same is applicable for inputs. For example: + +@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:print_dynamic + +Appearing `?` or ranges like `1..10` means there are dynamic dimensions in corresponding inputs or outputs. + +Or more programmatically: + +@snippet snippets/ov_dynamic_shapes.cpp ov_dynamic_shapes:detect_dynamic + +If at least one dynamic dimension exists in output of the model, shape of the corresponding output tensor will be set as the result of inference call. +Before the first inference, memory for such a tensor is not allocated and has shape `[0]`. +If user call `set_output_tensor` with pre-allocated tensor, the inference will call `set_shape` internally, and the initial shape is replaced by the really calculated shape. +So setting shape for output tensors in this case is useful only if you want to pre-allocate enough memory for output tensor, because `Tensor`'s `set_shape` method will re-allocate memory only if new shape requires more storage. diff --git a/docs/OV_Runtime_UG/ov_without_dynamic_shapes.md b/docs/OV_Runtime_UG/ov_without_dynamic_shapes.md new file mode 100644 index 00000000000..8e07d1b7821 --- /dev/null +++ b/docs/OV_Runtime_UG/ov_without_dynamic_shapes.md @@ -0,0 +1,44 @@ +# When Dynamic Shapes API is Not Applicable {#openvino_docs_OV_UG_NoDynamicShapes} + +Several approaches to emulate dynamic shapes are considered in this chapter +Apply these methods only if [native dynamic shape API](ov_dynamic_shapes.md) doesn't work for you or doesn't give desired performance. + +## Padding + +The model can be designed in a way that supports partially filled tensors. +For the BERT model you can use a special input to the model to mask unused elements out. +So, the model can be reshaped for some predefined big sequence length once and compiled once, and then the input tensors are used only partially with mask specifying valid tokens. +This approach is called *padding*. + +However, padding is not applicable to every model and every use case. +You should be aware of model internals to apply padding. Otherwise, if the model is not designed to handle dummy element gracefully in padding area, +then the results of inference may be totally scrambled, +or accuracy is significantly affected. +Model can even crash during inference. + +Besides the bad developer experience, +the main disadvantage of padding is a bad performance due to spending time for processing dummy elements in the padding area, +even if the model is properly designed to be used with padding. +It turns out that usually such models are designed in a way where calculations in the padded area still happen not affecting the end result. + +## Multiple Precompiled Models + +Another approach to handle arbitrary sized inputs is to pre-compile several models reshaped for different input shapes. +This method works well if the number of different shapes is small enough to afford increased time for multiple reshapes and compilations +as well as increased amount of consumed memory. +As this method cannot be scaled well it is used in combination with the padding: +model with the most suitable input shape among pre-reshaped models is chosen. +It gives smaller pad area in comparison to a single model. + +## Dimension Partitioning + +Another practical but still a complicated approach is when the input tensor can be divided into multiple chunks along the dynamic dimension. +For example, if we have a batch of independent inputs as a single tensor. +If arbitrary division along batch dimension is possible - and for batch dimension it should be possible by the dimension purpose - +you can run multiple inferences using the approach with several pre-compiled models choosing sized to have the minimal number of inferences +having a particular batch size in the input tensor. + +For example, if there are models pre-compiled for batch sizes 1, 2, 4 and 8, +the input tensor with batch 5 can be processed with two inference calls with batch size 1 and 4. +(Here it's assumed the batch processing is required for performance reasons, otherwise you can just loop over images in a batch, +and process image by image with a single compiled model.) diff --git a/docs/snippets/ov_dynamic_shapes.cpp b/docs/snippets/ov_dynamic_shapes.cpp new file mode 100644 index 00000000000..6c259c972dc --- /dev/null +++ b/docs/snippets/ov_dynamic_shapes.cpp @@ -0,0 +1,157 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include + +void reshape_with_dynamics() { +{ +//! [ov_dynamic_shapes:reshape_undefined] +ov::Core core; +auto model = core.read_model("model.xml"); + +// Set one static dimension (= 1) and another dynamic dimension (= Dimension()) +model->reshape({{1, ov::Dimension()}}); // {1,?} + +// The same as above +model->reshape({{1, -1}}); // {1,?} + +// Or set both dimensions as dynamic if both are going to be changed dynamically +model->reshape({{ov::Dimension(), ov::Dimension()}}); // {?,?} + +// The same as above +model->reshape({{-1, -1}}); // {?,?} +//! [ov_dynamic_shapes:reshape_undefined] +//! [ov_dynamic_shapes:reshape_bounds] +// Both dimensions are dynamic, first may have size within 1..10 and the second is withing 8..512 +model->reshape({{ov::Dimension(1, 10), ov::Dimension(8, 512)}}); // {1..10,8..512} + +// Both dimensions are dynamic, first doesn't have bounds, the second is in 8..512 +model->reshape({{-1, ov::Dimension(8, 512)}}); // {?,8..512} +//! [ov_dynamic_shapes:reshape_bounds] +} +{ +ov::Core core; +auto model = core.read_model("model.xml"); +//! [ov_dynamic_shapes:print_dynamic] +// Print output partial shape +std::cout << model->output().get_partial_shape() << "\n"; + +// Print input partial shape +std::cout << model->input().get_partial_shape() << "\n"; +//! [ov_dynamic_shapes:print_dynamic] +} +{ +ov::Core core; +//! [ov_dynamic_shapes:detect_dynamic] +auto model = core.read_model("model.xml"); + +if (model->input(0).get_partial_shape().is_dynamic()) { + // input is dynamic +} + +if (model->output(0).get_partial_shape().is_dynamic()) { + // output is dynamic +} + +if (model->output(0).get_partial_shape()[1].is_dynamic()) { + // 1-st dimension of output is dynamic +} +//! [ov_dynamic_shapes:detect_dynamic] +} +} + +void set_tensor() { +ov::Core core; +auto model = core.read_model("model.xml"); +auto executable = core.compile_model(model); +auto infer_request = executable.create_infer_request(); +//! [ov_dynamic_shapes:set_input_tensor] +// The first inference call + +// Create tensor compatible to the model input +// Shape {1, 128} is compatible to any reshape statements made in previous examples +auto input_tensor_1 = ov::Tensor(model->input().get_element_type(), {1, 128}); +// ... write values to input_tensor_1 + +// Set the tensor as a model input within infer request +infer_request.set_input_tensor(input_tensor_1); + +// Do the inference +infer_request.infer(); + +// Retrieve a tensor representing the output data +ov::Tensor output_tensor = infer_request.get_output_tensor(); + +// For dynamic models output shape usually depends on input shape, +// that means shape of output tensor is initialized after the first inference only +// and has to be queried after every infer request +auto output_shape_1 = output_tensor.get_shape(); + +// Take a pointer of an appropriate type to tensor data and read elements according to the shape +// Assuming model output is f32 data type +auto data_1 = output_tensor.data(); +// ... read values + +// The second inference call, repeat steps: + +// Create another tensor (if the previous one cannot be utilized) +// Notice, the shape is different from input_tensor_1 +auto input_tensor_2 = ov::Tensor(model->input().get_element_type(), {1, 200}); +// ... write values to input_tensor_2 + +infer_request.set_input_tensor(input_tensor_2); + +infer_request.infer(); + +// No need to call infer_request.get_output_tensor() again +// output_tensor queried after the first inference call above is valid here. +// But it may not be true for the memory underneath as shape changed, so re-take a pointer: +auto data_2 = output_tensor.data(); + +// and new shape as well +auto output_shape_2 = output_tensor.get_shape(); + +// ... read values in data_2 according to the shape output_shape_2 +//! [ov_dynamic_shapes:set_input_tensor] +} + +void get_tensor() { +ov::Core core; +auto model = core.read_model("model.xml"); +auto executable = core.compile_model(model); +auto infer_request = executable.create_infer_request(); +//! [ov_dynamic_shapes:get_input_tensor] +// The first inference call + +// Get the tensor; shape is not initialized +auto input_tensor = infer_request.get_input_tensor(); + +// Set shape is required +input_tensor.set_shape({1, 128}); +// ... write values to input_tensor + +infer_request.infer(); +ov::Tensor output_tensor = infer_request.get_output_tensor(); +auto output_shape_1 = output_tensor.get_shape(); +auto data_1 = output_tensor.data(); +// ... read values + +// The second inference call, repeat steps: + +// Set a new shape, may reallocate tensor memory +input_tensor.set_shape({1, 200}); +// ... write values to input_tensor memory + +infer_request.infer(); +auto data_2 = output_tensor.data(); +auto output_shape_2 = output_tensor.get_shape(); +// ... read values in data_2 according to the shape output_shape_2 +//! [ov_dynamic_shapes:get_input_tensor] +} + +int main() { +reshape_with_dynamics(); +get_tensor(); +set_tensor(); +} From 974ae136a6114dc516bcc25f14d8c3b7c622ffca Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 3 Mar 2022 09:36:26 +0300 Subject: [PATCH 163/310] Enabled old BA only under ENABLE_SAMPLES (#10746) --- tools/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index f3c19240b57..c8638f7fd39 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -6,7 +6,10 @@ # add_subdirectory(compile_tool) -add_subdirectory(legacy/benchmark_app) + +if(ENABLE_SAMPLES) + add_subdirectory(legacy/benchmark_app) +endif() # # Python tools From 1fec99afa3e5f576859d8d7418c3cc796d239acd Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 3 Mar 2022 09:50:54 +0300 Subject: [PATCH 164/310] Removed duplicated words (#10754) --- cmake/developer_package/cpplint/cpplint.py | 2 +- docs/Doxyfile.config | 2 +- docs/IE_PLUGIN_DG/Doxyfile | 2 +- docs/IE_PLUGIN_DG/InferRequest.md | 2 +- docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md | 2 +- docs/MO_DG/prepare_model/Getting_performance_numbers.md | 2 +- .../prepare_model/convert_model/Convert_Model_From_Caffe.md | 2 +- .../prepare_model/convert_model/Convert_Model_From_Kaldi.md | 2 +- .../prepare_model/convert_model/Convert_Model_From_MxNet.md | 2 +- .../prepare_model/convert_model/Convert_Model_From_ONNX.md | 2 +- .../convert_model/Convert_Model_From_TensorFlow.md | 2 +- docs/install_guides/installing-openvino-docker-linux.md | 4 ++-- docs/install_guides/installing-openvino-docker-windows.md | 2 +- docs/ops/sequence/RNNCell_3.md | 6 +++--- docs/suppress_warnings.txt | 2 +- licensing/onednn_third-party-programs.txt | 4 ++-- samples/cpp/benchmark_app/README.md | 2 +- src/core/include/openvino/core/descriptor/input.hpp | 4 ++-- src/core/include/openvino/op/softmax.hpp | 2 +- src/inference/include/ie/gna/gna_config.hpp | 2 +- src/tests/README.md | 4 ++-- tools/benchmark_tool/README.md | 2 +- 22 files changed, 28 insertions(+), 28 deletions(-) diff --git a/cmake/developer_package/cpplint/cpplint.py b/cmake/developer_package/cpplint/cpplint.py index efc12ba5c6b..2700bae29f8 100644 --- a/cmake/developer_package/cpplint/cpplint.py +++ b/cmake/developer_package/cpplint/cpplint.py @@ -3592,7 +3592,7 @@ def CheckOperatorSpacing(filename, clean_lines, linenum, error): elif not Match(r'#.*include', line): # Look for < that is not surrounded by spaces. This is only # triggered if both sides are missing spaces, even though - # technically should should flag if at least one side is missing a + # technically should flag if at least one side is missing a # space. This is done to avoid some false positives with shifts. match = Match(r'^(.*[^\s<])<[^\s=<,]', line) if match: diff --git a/docs/Doxyfile.config b/docs/Doxyfile.config index 3ee9ead3cb5..adffa442688 100644 --- a/docs/Doxyfile.config +++ b/docs/Doxyfile.config @@ -719,7 +719,7 @@ SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the +# popen()) the command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. diff --git a/docs/IE_PLUGIN_DG/Doxyfile b/docs/IE_PLUGIN_DG/Doxyfile index 7d7735b1fbc..84416e0483e 100644 --- a/docs/IE_PLUGIN_DG/Doxyfile +++ b/docs/IE_PLUGIN_DG/Doxyfile @@ -675,7 +675,7 @@ SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the +# popen()) the command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. diff --git a/docs/IE_PLUGIN_DG/InferRequest.md b/docs/IE_PLUGIN_DG/InferRequest.md index 5850c3fc625..5d50c8e36fc 100644 --- a/docs/IE_PLUGIN_DG/InferRequest.md +++ b/docs/IE_PLUGIN_DG/InferRequest.md @@ -54,7 +54,7 @@ Decrements a number of created inference requests: #### 1. `inferPreprocess` -Below is the code of the the `inferPreprocess` method to demonstrate Inference Engine common preprocessing step handling: +Below is the code of the `inferPreprocess` method to demonstrate Inference Engine common preprocessing step handling: @snippet src/template_infer_request.cpp infer_request:infer_preprocess diff --git a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md index 67160bc86bb..61b8a623d21 100644 --- a/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md +++ b/docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md @@ -637,7 +637,7 @@ To convert the model to the Intermediate Representation (IR), run Model Optimize mo --input_model INPUT_MODEL --output_dir ``` -You need to have have write permissions for an output directory. +You need to have write permissions for an output directory. > **NOTE**: Some models require using additional arguments to specify conversion parameters, such as `--input_shape`, `--scale`, `--scale_values`, `--mean_values`, `--mean_file`. To learn about when you need to use these parameters, refer to [Converting a Model to Intermediate Representation (IR)](prepare_model/convert_model/Converting_Model.md). diff --git a/docs/MO_DG/prepare_model/Getting_performance_numbers.md b/docs/MO_DG/prepare_model/Getting_performance_numbers.md index dc32b87e805..7e734432bd1 100644 --- a/docs/MO_DG/prepare_model/Getting_performance_numbers.md +++ b/docs/MO_DG/prepare_model/Getting_performance_numbers.md @@ -58,7 +58,7 @@ When comparing the OpenVINO Runtime performance with the framework or another re ## Using Tools
-Whether you are tuning for the first time or doing advanced performance optimization, you need a a tool that provides accurate insights. Intel® VTune™ Amplifier gives you the tool to mine it and interpret the profiling data. +Whether you are tuning for the first time or doing advanced performance optimization, you need a tool that provides accurate insights. Intel® VTune™ Amplifier gives you the tool to mine it and interpret the profiling data. Alternatively, you can gather the raw profiling data that samples report, the second chapter provides example of how to interpret these. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md index 365ccd2c781..91d808f4878 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Caffe.md @@ -50,7 +50,7 @@ To convert a Caffe\* model, run Model Optimizer with the path to the input model Two groups of parameters are available to convert your model: -* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. +* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. * [Caffe-specific parameters](#caffe_specific_conversion_params) are used to convert only Caffe\* models. ### Using Caffe\*-Specific Conversion Parameters diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md index ed199d565b0..79276825513 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Kaldi.md @@ -50,7 +50,7 @@ mo --input_model .nnet --output_dir Two groups of parameters are available to convert your model: -* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. +* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. * [Kaldi-specific parameters](#kaldi_specific_conversion_params) are used to convert only Kaldi\* models. ### Using Kaldi\*-Specific Conversion Parameters diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md index 81f04fca9c5..575218d5e4b 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_MxNet.md @@ -64,7 +64,7 @@ To convert an MXNet\* model, run Model Optimizer with a path to the input model Two groups of parameters are available to convert your model: -* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. +* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. * [MXNet-specific parameters](#mxnet_specific_conversion_params) are used to convert only MXNet models. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md index 7734c5902d7..7c4bfbf3f29 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_ONNX.md @@ -77,7 +77,7 @@ To convert an ONNX\* model, run Model Optimizer with the path to the input model ```sh mo --input_model .onnx --output_dir ``` -There are no ONNX\* specific parameters, so only framework-agnostic parameters are available to convert your model. For details, see see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. +There are no ONNX\* specific parameters, so only framework-agnostic parameters are available to convert your model. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. ## Supported ONNX\* Layers Refer to [Supported Framework Layers](../Supported_Frameworks_Layers.md) for the list of supported standard layers. diff --git a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md index ba51959bb28..c3fa4528816 100644 --- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md +++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_TensorFlow.md @@ -285,7 +285,7 @@ To convert a TensorFlow model: Two groups of parameters are available to convert your model: -* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. +* Framework-agnostic parameters are used to convert a model trained with any supported framework. For details, see the General Conversion Parameters section on the [Converting a Model to Intermediate Representation (IR)](Converting_Model.md) page. * [TensorFlow-specific parameters](#tensorflow_specific_conversion_params): Parameters used to convert only TensorFlow models. > **NOTE**: The color channel order (RGB or BGR) of an input data should match the channel order of the model training dataset. If they are different, perform the `RGB<->BGR` conversion specifying the command-line parameter: `--reverse_input_channels`. Otherwise, inference results may be incorrect. For more information about the parameter, refer to **When to Reverse Input Channels** section of [Converting a Model to Intermediate Representation (IR)](Converting_Model.md). diff --git a/docs/install_guides/installing-openvino-docker-linux.md b/docs/install_guides/installing-openvino-docker-linux.md index c40e36b8069..de9f5b02577 100644 --- a/docs/install_guides/installing-openvino-docker-linux.md +++ b/docs/install_guides/installing-openvino-docker-linux.md @@ -50,7 +50,7 @@ You can find prebuilt images on: ## Preparing a Dockerfile -You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the the Intel® Distribution of OpenVINO™ toolkit. +You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. You can also try our [Tutorials](https://github.com/openvinotoolkit/docker_ci/tree/master/docs/tutorials) which demonstrate the usage of Docker containers with OpenVINO. ## Configuring the Image for Different Devices @@ -59,7 +59,7 @@ If you want to run inferences on a CPU or Intel® Neural Compute Stick 2, no ext ### Configuring Docker Image for GPU -By default, the distributed Docker image for OpenVINO has the the recommended version of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver for the operating system installed inside. If you want to build an image with a custom version of OpenCL Runtime included, you need to modify the Dockerfile using the lines below (the 19.41.14441 version is used as an example) and build the image manually: +By default, the distributed Docker image for OpenVINO has the recommended version of Intel® Graphics Compute Runtime for oneAPI Level Zero and OpenCL Driver for the operating system installed inside. If you want to build an image with a custom version of OpenCL Runtime included, you need to modify the Dockerfile using the lines below (the 19.41.14441 version is used as an example) and build the image manually: **Ubuntu 18.04/20.04**: diff --git a/docs/install_guides/installing-openvino-docker-windows.md b/docs/install_guides/installing-openvino-docker-windows.md index 3910eb09489..49b27081772 100644 --- a/docs/install_guides/installing-openvino-docker-windows.md +++ b/docs/install_guides/installing-openvino-docker-windows.md @@ -52,7 +52,7 @@ You can find prebuilt images on: ## Preparing a Dockerfile -You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the the Intel® Distribution of OpenVINO™ toolkit. +You can use the [available Dockerfiles on GitHub](https://github.com/openvinotoolkit/docker_ci/tree/master/dockerfiles) or generate a Dockerfile with your settings via [DockerHub CI Framework](https://github.com/openvinotoolkit/docker_ci) which can generate a Dockerfile, build, test and deploy an image with the Intel® Distribution of OpenVINO™ toolkit. ## Configuring the Docker Image for Different Devices diff --git a/docs/ops/sequence/RNNCell_3.md b/docs/ops/sequence/RNNCell_3.md index 58f1b9ddd87..70ee0d9c8e0 100644 --- a/docs/ops/sequence/RNNCell_3.md +++ b/docs/ops/sequence/RNNCell_3.md @@ -57,11 +57,11 @@ Formula: * **2**: `H` - 2D tensor of type *T* `[batch_size, hidden_size]`, initial hidden state. **Required.** -* **3**: `W` - 2D tensor tensor of type *T* `[hidden_size, input_size]`, the weights for matrix multiplication. **Required.** +* **3**: `W` - 2D tensor of type *T* `[hidden_size, input_size]`, the weights for matrix multiplication. **Required.** -* **4**: `R` - 2D tensor tensor of type *T* `[hidden_size, hidden_size]`, the recurrence weights for matrix multiplication. **Required.** +* **4**: `R` - 2D tensor of type *T* `[hidden_size, hidden_size]`, the recurrence weights for matrix multiplication. **Required.** -* **5**: `B` 1D tensor tensor of type *T* `[hidden_size]`, the sum of biases (weights and recurrence weights). **Required.** +* **5**: `B` 1D tensor of type *T* `[hidden_size]`, the sum of biases (weights and recurrence weights). **Required.** **Outputs** diff --git a/docs/suppress_warnings.txt b/docs/suppress_warnings.txt index 572497bf642..af53128820e 100644 --- a/docs/suppress_warnings.txt +++ b/docs/suppress_warnings.txt @@ -13,7 +13,7 @@ the name \'.+?\' supplied as the argument warning: while setting up extension conf.py: csv directory not found argument \'.+?\' of command no uniquely matching class member found for -example example was already documented\. ignoring documentation found here\. +example was already documented\. ignoring documentation found here\. documentation for unknown define detected potential recursive class relation between class no matching file member found for diff --git a/licensing/onednn_third-party-programs.txt b/licensing/onednn_third-party-programs.txt index 0fed7990633..3897a99f34c 100644 --- a/licensing/onednn_third-party-programs.txt +++ b/licensing/onednn_third-party-programs.txt @@ -6,7 +6,7 @@ terms. This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms -govern your use of the third party programs as set forth in in the +govern your use of the third party programs as set forth in the "THIRD-PARTY-PROGRAMS" file. Third party programs and their corresponding required notices and/or license @@ -554,4 +554,4 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/samples/cpp/benchmark_app/README.md b/samples/cpp/benchmark_app/README.md index 3470480b968..11a3239a362 100644 --- a/samples/cpp/benchmark_app/README.md +++ b/samples/cpp/benchmark_app/README.md @@ -166,7 +166,7 @@ This section provides step-by-step instructions on how to run the Benchmark Tool > **NOTE**: The Internet access is required to execute the following steps successfully. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment. -1. Download the model. Go to the the Model Downloader directory and run the `downloader.py` script with specifying the model name and directory to download the model to: +1. Download the model. Go to the Model Downloader directory and run the `downloader.py` script with specifying the model name and directory to download the model to: ```sh cd /extras/open_model_zoo/tools/downloader ``` diff --git a/src/core/include/openvino/core/descriptor/input.hpp b/src/core/include/openvino/core/descriptor/input.hpp index 8302ed759b6..f357fb9581d 100644 --- a/src/core/include/openvino/core/descriptor/input.hpp +++ b/src/core/include/openvino/core/descriptor/input.hpp @@ -28,12 +28,12 @@ class OPENVINO_API Input { public: /// \param node The node that owns this input - /// \param index The position of this this tensor in all input tensors + /// \param index The position of this tensor in all input tensors /// \param output The output that supplies a value for this input Input(Node* node, size_t index, Output& output); /// \brief Create an Input that is not connected to an output /// \param node The node that owns this input - /// \param index The position of this this tensor in all input tensors + /// \param index The position of this tensor in all input tensors Input(Node* node, size_t index); ~Input(); diff --git a/src/core/include/openvino/op/softmax.hpp b/src/core/include/openvino/op/softmax.hpp index b24fb890183..e8e617c361f 100644 --- a/src/core/include/openvino/op/softmax.hpp +++ b/src/core/include/openvino/op/softmax.hpp @@ -47,7 +47,7 @@ private: } // namespace v1 namespace v8 { -/// \brief Softmax operation with with negative axis values +/// \brief Softmax operation with negative axis values class OPENVINO_API Softmax : public Op { public: OPENVINO_OP("Softmax", "opset8"); diff --git a/src/inference/include/ie/gna/gna_config.hpp b/src/inference/include/ie/gna/gna_config.hpp index 8f7a8bff166..1a9ff7af36b 100644 --- a/src/inference/include/ie/gna/gna_config.hpp +++ b/src/inference/include/ie/gna/gna_config.hpp @@ -38,7 +38,7 @@ namespace GNAConfigParams { * @brief Scale factor that is calculated by user, in order to use static quantisation feature * This option should be used with floating point value serialized to string with decimal separator equals to . (dot) * @details For multiple input case, individual scale factors can be passed, using - * KEY_GNA_SCALE_FACTOR[_input_layer_name] where input_layer can be obtained from from CNNNetwork::GetInputsInfo + * KEY_GNA_SCALE_FACTOR[_input_layer_name] where input_layer can be obtained from CNNNetwork::GetInputsInfo */ DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR); diff --git a/src/tests/README.md b/src/tests/README.md index 4dd2f8af5dc..32e5dfb5e08 100644 --- a/src/tests/README.md +++ b/src/tests/README.md @@ -15,7 +15,7 @@ This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine t files. > **Example**: We have `ie_reshaper.cpp` within the `src/shape_infer` subfolder of the tested module. In this case - new `shape_infer` subfolder should be created within the the root of the Unit Test folder for this module. And new + new `shape_infer` subfolder should be created within the root of the Unit Test folder for this module. And new `ie_reshaper_test.cpp` file should be created within this newly created subfolder. This test file should cover all the classes and methods from the original file. @@ -66,4 +66,4 @@ This is OpenVINO Inference Engine testing framework. OpenVINO Inference Engine t Internal namespaces (for example, `CommonTestUtils::`, `FuncTestUtils::` or `UnitTestUtils::`) must be used to separate utilities by domains. > **NOTE**: All the utilities libraries are added to the developer package and available for closed source - development. \ No newline at end of file + development. diff --git a/tools/benchmark_tool/README.md b/tools/benchmark_tool/README.md index 9bacb159065..fd2d542fb6c 100644 --- a/tools/benchmark_tool/README.md +++ b/tools/benchmark_tool/README.md @@ -229,7 +229,7 @@ This section provides step-by-step instructions on how to run the Benchmark Tool > **NOTE**: The Internet access is required to execute the following steps successfully. If you have access to the Internet through the proxy server only, please make sure that it is configured in your OS environment. -1. Download the model. Go to the the Model Downloader directory and run the `downloader.py` script with the model name and directory to download the model to: +1. Download the model. Go to the Model Downloader directory and run the `downloader.py` script with the model name and directory to download the model to: ```sh cd /extras/open_model_zoo/tools/downloader ``` From 59cfdce73b3a97186956a7229a21b0d7aee010cc Mon Sep 17 00:00:00 2001 From: Nikolay Tyukaev Date: Thu, 3 Mar 2022 11:25:54 +0300 Subject: [PATCH 165/310] ignore doc python errors sphinx (#10756) * fixes * fixes * Update workbench.md Co-authored-by: Andrey Zaytsev --- docs/doxygen-xfail.txt | 2 ++ docs/security_guide/workbench.md | 18 +++++++----------- docs/suppress_warnings.txt | 1 + 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/doxygen-xfail.txt b/docs/doxygen-xfail.txt index 72d9cd0acdc..d711ee20a37 100644 --- a/docs/doxygen-xfail.txt +++ b/docs/doxygen-xfail.txt @@ -67,3 +67,5 @@ api/api_reference.rst workbench/docs/workbench_dg/key_concepts.md workbench/docs/workbench_dg/run_single_inference.md omz_tools_downloader.rst +warning +attribute diff --git a/docs/security_guide/workbench.md b/docs/security_guide/workbench.md index cfcbdc56b99..3760277a958 100644 --- a/docs/security_guide/workbench.md +++ b/docs/security_guide/workbench.md @@ -5,18 +5,14 @@ Deep Learning Workbench (DL Workbench) is a web application running within a Doc ## Run DL Workbench Unless necessary, limit the connections to the DL Workbench to `localhost` (127.0.0.1), so that it -is only accessible from the machine the Docker container is built on: +is only accessible from the machine the Docker container is built on. -* The script [starting the DL Workbench from the - package](@ref workbench_docs_Workbench_DG_Install_from_Package) ensures that the container and the web - application are accessible only from the `localhost` by default. - -* When using `docker run` to [start the DL Workbench from Docker - Hub](@ref workbench_docs_Workbench_DG_Run_Locally), limit connections for the host IP 127.0.0.1. - For example, limit the connections for the host IP to the port `5665` with the `-p - 127.0.0.1:5665:5665` command . Refer to [Container - networking](https://docs.docker.com/config/containers/container-networking/#published-ports) for - details. +When using `docker run` to [start the DL Workbench from Docker +Hub](@ref workbench_docs_Workbench_DG_Run_Locally), limit connections for the host IP 127.0.0.1. +For example, limit the connections for the host IP to the port `5665` with the `-p +127.0.0.1:5665:5665` command . Refer to [Container +networking](https://docs.docker.com/config/containers/container-networking/#published-ports) for +details. ## Authentication Security diff --git a/docs/suppress_warnings.txt b/docs/suppress_warnings.txt index af53128820e..8e9d8db84b0 100644 --- a/docs/suppress_warnings.txt +++ b/docs/suppress_warnings.txt @@ -100,3 +100,4 @@ explicit markup ends without a blank line \* keyerror: \* modulenotfounderror unexpected unindent +failed to import object From 75f7bced65603f63d821f4fded2350920adb26f9 Mon Sep 17 00:00:00 2001 From: Dmitry Pigasin Date: Thu, 3 Mar 2022 12:12:22 +0300 Subject: [PATCH 166/310] Fix `-layout` option (#10648) --- samples/python/speech_sample/README.md | 46 ++++++++++++++------------ samples/python/speech_sample/utils.py | 7 ++-- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/samples/python/speech_sample/README.md b/samples/python/speech_sample/README.md index 3c8dfa99b4c..48752c6a575 100644 --- a/samples/python/speech_sample/README.md +++ b/samples/python/speech_sample/README.md @@ -87,11 +87,11 @@ python speech_sample.py -h Usage message: ``` -usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT [-o OUTPUT] [-r REFERENCE] [-d DEVICE] [-bs [1-8]] - [-qb [8, 16]] [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] [-we EXPORT_EMBEDDED_GNA_MODEL] - [-we_gen [GNA1, GNA3]] [--exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0]] [-pc] [-a [CORE, ATOM]] - [-iname INPUT_LAYERS] [-oname OUTPUT_LAYERS] [-cw_l CONTEXT_WINDOW_LEFT] [-cw_r CONTEXT_WINDOW_RIGHT] - [-pwl_me PWL_ME] +usage: speech_sample.py [-h] (-m MODEL | -rg IMPORT_GNA_MODEL) -i INPUT [-o OUTPUT] [-r REFERENCE] [-d DEVICE] [-bs [1-8]] + [-layout LAYOUT] [-qb [8, 16]] [-sf SCALE_FACTOR] [-wg EXPORT_GNA_MODEL] + [-we EXPORT_EMBEDDED_GNA_MODEL] [-we_gen [GNA1, GNA3]] + [--exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0]] [-pc] [-a [CORE, ATOM]] [-iname INPUT_LAYERS] + [-oname OUTPUT_LAYERS] [-cw_l CONTEXT_WINDOW_LEFT] [-cw_r CONTEXT_WINDOW_RIGHT] [-pwl_me PWL_ME] optional arguments: -m MODEL, --model MODEL @@ -108,44 +108,46 @@ Options: -r REFERENCE, --reference REFERENCE Optional. Read reference score file and compare scores. -d DEVICE, --device DEVICE - Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, GNA_SW_FP32, - GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. - HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified. Default - value is CPU. + Optional. Specify a target device to infer on. CPU, GPU, MYRIAD, GNA_AUTO, GNA_HW, GNA_SW_FP32, + GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. + HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified. + Default value is CPU. -bs [1-8], --batch_size [1-8] - Optional. Batch size 1-8 (default 1). + Optional. Batch size 1-8. + -layout LAYOUT Optional. Custom layout in format: "input0[value0],input1[value1]" or "[value]" (applied to all + inputs) -qb [8, 16], --quantization_bits [8, 16] Optional. Weight bits for quantization: 8 or 16 (default 16). -sf SCALE_FACTOR, --scale_factor SCALE_FACTOR - Optional. The user-specified input scale factor for quantization. If the model contains multiple + Optional. The user-specified input scale factor for quantization. If the model contains multiple inputs, provide scale factors by separating them with commas. -wg EXPORT_GNA_MODEL, --export_gna_model EXPORT_GNA_MODEL Optional. Write GNA model to file using path/filename provided. -we EXPORT_EMBEDDED_GNA_MODEL, --export_embedded_gna_model EXPORT_EMBEDDED_GNA_MODEL Optional. Write GNA embedded model to file using path/filename provided. -we_gen [GNA1, GNA3], --embedded_gna_configuration [GNA1, GNA3] - Optional. GNA generation configuration string for embedded export. Can be GNA1 (default) or GNA3. + Optional. GNA generation configuration string for embedded export. Can be GNA1 (default) or GNA3. --exec_target [GNA_TARGET_2_0, GNA_TARGET_3_0] - Optional. Specify GNA execution target generation. By default, generation corresponds to the GNA HW - available in the system or the latest fully supported generation by the software. See the GNA Plugin's - GNA_EXEC_TARGET config option description. + Optional. Specify GNA execution target generation. By default, generation corresponds to the GNA HW + available in the system or the latest fully supported generation by the software. See the GNA + Plugin's GNA_EXEC_TARGET config option description. -pc, --performance_counter Optional. Enables performance report (specify -a to ensure arch accurate results). -a [CORE, ATOM], --arch [CORE, ATOM] Optional. Specify architecture. CORE, ATOM with the combination of -pc. -iname INPUT_LAYERS, --input_layers INPUT_LAYERS - Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order of - input layers for -i flag. Example: Input1,Input2 + Optional. Layer names for input blobs. The names are separated with ",". Allows to change the order + of input layers for -i flag. Example: Input1,Input2 -oname OUTPUT_LAYERS, --output_layers OUTPUT_LAYERS - Optional. Layer names for output blobs. The names are separated with ",". Allows to change the order of - output layers for -o flag. Example: Output1:port,Output2:port. + Optional. Layer names for output blobs. The names are separated with ",". Allows to change the + order of output layers for -o flag. Example: Output1:port,Output2:port. -cw_l CONTEXT_WINDOW_LEFT, --context_window_left CONTEXT_WINDOW_LEFT - Optional. Number of frames for left context windows (default is 0). Works only with context window + Optional. Number of frames for left context windows (default is 0). Works only with context window models. If you use the cw_l or cw_r flag, then batch size argument is ignored. -cw_r CONTEXT_WINDOW_RIGHT, --context_window_right CONTEXT_WINDOW_RIGHT - Optional. Number of frames for right context windows (default is 0). Works only with context window + Optional. Number of frames for right context windows (default is 0). Works only with context window models. If you use the cw_l or cw_r flag, then batch size argument is ignored. - -pwl_me PWL_ME Optional. The maximum percent of error for PWL function. The value must be in <0, 100> range. The + -pwl_me PWL_ME Optional. The maximum percent of error for PWL function. The value must be in <0, 100> range. The default value is 1.0. ``` diff --git a/samples/python/speech_sample/utils.py b/samples/python/speech_sample/utils.py index ded1dd52a2c..e3a87635958 100644 --- a/samples/python/speech_sample/utils.py +++ b/samples/python/speech_sample/utils.py @@ -82,7 +82,8 @@ def parse_outputs_from_args(args: argparse.Namespace) -> Tuple[List[str], List[i def parse_input_layouts(args: argparse.Namespace, inputs: List[Output]) -> Dict[str, str]: - if ',' in args.layout: - return dict([_input.split('[') for _input in args.layout[:-1].split('],')]) - else: + if args.layout[0] == '[': return {_input.get_any_name(): args.layout[1:-1] for _input in inputs} + else: + sep = '],' if ',' in args.layout else ']' + return dict([_input.split('[') for _input in args.layout[:-1].split(sep)]) From d1630c9ac145b962760347b2565fb3274f63cddf Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Thu, 3 Mar 2022 11:22:42 +0100 Subject: [PATCH 167/310] Fix problem with segfault during using extension feature via Python (#10650) --- .../src/pyopenvino/frontend/frontend.cpp | 8 +- .../tests/test_frontend/test_frontend_onnx.py | 66 +++++++- .../tests/frontend/shared/src/json_config.cpp | 2 +- .../builtin_extensions.cpp | 4 +- .../test_extension.cpp | 7 + .../test_extension.hpp | 2 + .../mo/openvino/tools/mo/utils/cli_parser.py | 14 +- .../unit_tests/mo/extensions_test_actual.py | 148 ++++++++++++++++++ .../mo/unit_tests/mo/frontend_ngraph_test.py | 9 ++ 9 files changed, 248 insertions(+), 12 deletions(-) create mode 100644 tools/mo/unit_tests/mo/extensions_test_actual.py diff --git a/src/bindings/python/src/pyopenvino/frontend/frontend.cpp b/src/bindings/python/src/pyopenvino/frontend/frontend.cpp index d4f8fde9609..8997bb0697b 100644 --- a/src/bindings/python/src/pyopenvino/frontend/frontend.cpp +++ b/src/bindings/python/src/pyopenvino/frontend/frontend.cpp @@ -43,6 +43,7 @@ void regclass_frontend_FrontEnd(py::module m) { fem.def("convert", static_cast (FrontEnd::*)(const InputModel::Ptr&) const>(&FrontEnd::convert), py::arg("model"), + py::keep_alive<0, 1>(), R"( Completely convert and normalize entire function, throws if it is not possible. @@ -67,16 +68,12 @@ void regclass_frontend_FrontEnd(py::module m) { ---------- function : Model Partially converted nGraph function. - - Returns - ---------- - convert : Model - Fully converted nGraph function. )"); fem.def("convert_partially", &FrontEnd::convert_partially, py::arg("model"), + py::keep_alive<0, 1>(), R"( Convert only those parts of the model that can be converted leaving others as-is. Converted parts are not normalized by additional transformations; normalize function or @@ -96,6 +93,7 @@ void regclass_frontend_FrontEnd(py::module m) { fem.def("decode", &FrontEnd::decode, py::arg("model"), + py::keep_alive<0, 1>(), R"( Convert operations with one-to-one mapping with decoding nodes. Each decoding node is an nGraph node representing a single FW operation node with diff --git a/src/bindings/python/tests/test_frontend/test_frontend_onnx.py b/src/bindings/python/tests/test_frontend/test_frontend_onnx.py index 74914ee2aef..5026b9e2423 100644 --- a/src/bindings/python/tests/test_frontend/test_frontend_onnx.py +++ b/src/bindings/python/tests/test_frontend/test_frontend_onnx.py @@ -6,6 +6,8 @@ import onnx import numpy as np from onnx.helper import make_graph, make_model, make_tensor_value_info import pytest +from pathlib import Path +from itertools import chain from openvino.frontend import FrontEndManager from tests.runtime import get_runtime @@ -26,7 +28,19 @@ def create_onnx_model(): ] output_tensors = [make_tensor_value_info("out", onnx.TensorProto.FLOAT, (2, 2))] graph = make_graph([add, const_node, mul], "graph", input_tensors, output_tensors) - return make_model(graph, producer_name="ngraph ONNX Importer") + return make_model(graph, producer_name="ONNX Frontend") + + +def create_onnx_model_2(): + relu = onnx.helper.make_node("Relu", inputs=["in"], outputs=["out"]) + input_tensors = [ + make_tensor_value_info("in", onnx.TensorProto.FLOAT, (1, 2)), + ] + output_tensors = [ + make_tensor_value_info("out", onnx.TensorProto.FLOAT, (1, 2)), + ] + graph = make_graph([relu], "test_graph", input_tensors, output_tensors) + return make_model(graph, producer_name="ONNX Frontend") def create_onnx_model_with_subgraphs(): @@ -52,7 +66,7 @@ def create_onnx_model_with_subgraphs(): res = onnx.helper.make_tensor_value_info("res", onnx.TensorProto.FLOAT, [3]) graph = make_graph([if_node], "graph", [cond, A, B], [res]) - return make_model(graph, producer_name="ngraph ONNX Importer") + return make_model(graph, producer_name="ONNX Frontend") def create_onnx_model_with_custom_attributes(): @@ -88,7 +102,7 @@ def create_onnx_model_with_custom_attributes(): ] output_tensors = [make_tensor_value_info("out", onnx.TensorProto.FLOAT, (2, 2))] graph = make_graph([add, const_node, mul], "graph", input_tensors, output_tensors) - return make_model(graph, producer_name="ngraph ONNX Importer") + return make_model(graph, producer_name="ONNX Frontend") def create_onnx_model_for_op_extension(): @@ -124,7 +138,7 @@ def create_onnx_model_for_op_extension(): output_tensors = [make_tensor_value_info("out", onnx.TensorProto.FLOAT, (3, 3, 32, 32))] graph = make_graph([const_node, elu, avg_pool, floor, concat, mul, cast], "graph", input_tensors, output_tensors) - return make_model(graph, producer_name="ngraph ONNX Importer") + return make_model(graph, producer_name="ONNX Frontend") def run_function(function, *inputs, expected): @@ -140,6 +154,7 @@ def run_function(function, *inputs, expected): # This is because destroy of FrontEndManager will unload all plugins, no objects shall exist after this fem = FrontEndManager() onnx_model_filename = "model.onnx" +onnx_model_2_filename = "model2.onnx" onnx_model_with_custom_attributes_filename = "model_custom_attributes.onnx" onnx_model_with_subgraphs_filename = "model_subgraphs.onnx" onnx_model_for_op_extension_test = "model_op_extension.onnx" @@ -148,6 +163,7 @@ ONNX_FRONTEND_NAME = "onnx" def setup_module(): onnx.save_model(create_onnx_model(), onnx_model_filename) + onnx.save_model(create_onnx_model_2(), onnx_model_2_filename) onnx.save_model(create_onnx_model_with_custom_attributes(), onnx_model_with_custom_attributes_filename) onnx.save_model(create_onnx_model_with_subgraphs(), onnx_model_with_subgraphs_filename) @@ -156,6 +172,7 @@ def setup_module(): def teardown_module(): os.remove(onnx_model_filename) + os.remove(onnx_model_2_filename) os.remove(onnx_model_with_custom_attributes_filename) os.remove(onnx_model_with_subgraphs_filename) os.remove(onnx_model_for_op_extension_test) @@ -593,3 +610,44 @@ def test_op_extension_via_frontend_extension_map_attributes(): model = ie.read_model(onnx_model_for_op_extension_test) assert model + + +def get_builtin_extensions_path(): + win_folder_path = Path(__file__).parent.parent.parent.parent + linux_folder_path = win_folder_path.joinpath("lib") + for lib_path in chain(win_folder_path.glob("*.dll"), linux_folder_path.glob("*.so")): + if "libtest_builtin_extensions_1" in lib_path.name: + return str(lib_path) + return "" + + +@pytest.mark.skipif(len(get_builtin_extensions_path()) == 0, + reason="The extension library path was not found") +def test_so_extension_via_frontend_convert_input_model(): + skip_if_onnx_frontend_is_disabled() + + def load_model(): + fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME) + fe.add_extension(get_builtin_extensions_path()) + in_model = fe.load(onnx_model_2_filename) + return fe.convert(in_model) + + model = load_model() # model has longer lifetime than frontend + + assert any(op.get_type_name() == "Swish" for op in model.get_ops()) + assert all(op.get_type_name() != "Relu" for op in model.get_ops()) + + +@pytest.mark.skipif(len(get_builtin_extensions_path()) == 0, + reason="The extension library path was not found") +def test_so_extension_via_frontend_decode_input_model(): + skip_if_onnx_frontend_is_disabled() + + def load_decoded_model(): + fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME) + fe.add_extension(get_builtin_extensions_path()) + in_model = fe.load(onnx_model_2_filename) + return fe.decode(in_model) + + decoded_model = load_decoded_model() # decoded model has longer lifetime than frontend + assert decoded_model diff --git a/src/core/tests/frontend/shared/src/json_config.cpp b/src/core/tests/frontend/shared/src/json_config.cpp index 64f5844e781..6caff415aab 100644 --- a/src/core/tests/frontend/shared/src/json_config.cpp +++ b/src/core/tests/frontend/shared/src/json_config.cpp @@ -143,7 +143,7 @@ TEST_P(FrontEndJsonConfigTest, testAddJsonConfigExtension) { auto target_ext = json_config_ext->get_target_extensions(); // the number of Loaded extensions can be more than the number of actually used ones. - EXPECT_EQ(loaded_ext.size(), 8); + EXPECT_EQ(loaded_ext.size(), 9); EXPECT_EQ(target_ext.size(), 3); for (const auto& target : target_ext) { diff --git a/src/core/tests/frontend/shared/test_builtin_extensions_1/builtin_extensions.cpp b/src/core/tests/frontend/shared/test_builtin_extensions_1/builtin_extensions.cpp index 905cb7d18fe..6fc98b25e29 100644 --- a/src/core/tests/frontend/shared/test_builtin_extensions_1/builtin_extensions.cpp +++ b/src/core/tests/frontend/shared/test_builtin_extensions_1/builtin_extensions.cpp @@ -7,7 +7,9 @@ #ifdef ENABLE_OV_ONNX_FRONTEND # include -# define ONNX_EXT std::make_shared("NewCustomOp_3", CustomTranslatorONNX), +# define ONNX_EXT \ + std::make_shared("NewCustomOp_3", CustomTranslatorONNX), \ + std::make_shared("Relu", ReluToSwishTranslatorONNX), #else # define ONNX_EXT #endif diff --git a/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.cpp b/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.cpp index 363860a9245..9ff0a415097 100644 --- a/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.cpp +++ b/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.cpp @@ -6,6 +6,8 @@ #include +#include "openvino/opsets/opset8.hpp" + bool TestExtension1::transform(const std::shared_ptr& function, const std::string& config) const { function->set_friendly_name("TestFunction"); return true; @@ -29,6 +31,11 @@ ov::OutputVector CustomTranslatorONNX(const ov::frontend::NodeContext& node) { return ov::OutputVector(); } +ov::OutputVector ReluToSwishTranslatorONNX(const ov::frontend::NodeContext& node) { + auto swish = std::make_shared(node.get_input(0)); + return {swish}; +} + std::map CustomTranslatorPaddle(const ov::frontend::NodeContext& node) { return std::map(); } diff --git a/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.hpp b/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.hpp index 3bb978a7e36..7d8b79bbc4f 100644 --- a/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.hpp +++ b/src/core/tests/frontend/shared/test_builtin_extensions_1/test_extension.hpp @@ -23,4 +23,6 @@ ov::OutputVector CustomTranslatorTensorflow(const ov::frontend::NodeContext& nod ov::OutputVector CustomTranslatorONNX(const ov::frontend::NodeContext& node); +ov::OutputVector ReluToSwishTranslatorONNX(const ov::frontend::NodeContext& node); + std::map CustomTranslatorPaddle(const ov::frontend::NodeContext& node); diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py index 29b2ff096dc..a363e1298f3 100644 --- a/tools/mo/openvino/tools/mo/utils/cli_parser.py +++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py @@ -191,6 +191,18 @@ def readable_dirs_or_empty(paths: str): return paths +def readable_dirs_or_files_or_empty(paths: str): + """ + Checks that comma separated list of paths are readable directories, files or a provided path is empty. + :param paths: comma separated list of paths. + :return: comma separated list of paths. + """ + if paths: + paths_list = [readable_file_or_dir(path) for path in paths.split(',')] + return ','.join(paths_list) + return paths + + def readable_dir(path: str): """ Check that specified path is a readable directory. @@ -395,7 +407,7 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): "string.", default=import_extensions.default_path(), action=CanonicalizePathCheckExistenceAction, - type=readable_dirs_or_empty) + type=readable_dirs_or_files_or_empty) common_group.add_argument("--batch", "-b", type=check_positive, default=None, diff --git a/tools/mo/unit_tests/mo/extensions_test_actual.py b/tools/mo/unit_tests/mo/extensions_test_actual.py new file mode 100644 index 00000000000..c8f6bf12367 --- /dev/null +++ b/tools/mo/unit_tests/mo/extensions_test_actual.py @@ -0,0 +1,148 @@ +# Copyright (C) 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import pytest +import unittest +from unittest.mock import Mock +import onnx +from onnx.helper import make_graph, make_model, make_tensor_value_info +import os +from os import path +import json +import argparse +from pathlib import Path +from itertools import chain +from openvino.tools.mo.main import prepare_ir +from openvino.frontend import ( + FrontEndManager, +) # pylint: disable=no-name-in-module,import-error + +try: + import openvino_telemetry as tm +except ImportError: + import openvino.tools.mo.utils.telemetry_stub as tm + + +def base_args_config(): + args = argparse.Namespace() + args.feManager = FrontEndManager() + args.extensions = None + args.use_legacy_frontend = False + args.use_new_frontend = True + args.framework = "onnx" + args.model_name = None + args.input_model = None + args.silent = True + args.transform = [] + args.legacy_ir_generation = False + args.scale = None + args.output = None + args.input = None + args.input_shape = None + args.batch = None + args.mean_values = None + args.scale_values = None + args.output_dir = os.getcwd() + args.freeze_placeholder_with_value = None + args.transformations_config = None + args.disable_fusing = None + args.finegrain_fusing = None + args.disable_gfusing = None + args.disable_resnet_optimization = None + args.enable_concat_optimization = None + args.static_shape = None + args.disable_weights_compression = None + args.reverse_input_channels = None + args.data_type = None + args.layout = None + args.source_layout = None + args.target_layout = None + return args + + +def get_builtin_extensions_path(): + win_folder_path = Path(__file__).parent.parent.parent.parent + linux_folder_path = win_folder_path.joinpath("lib") + for lib_path in chain( + win_folder_path.glob("*.dll"), linux_folder_path.glob("*.so") + ): + if "libtest_builtin_extensions_1" in lib_path.name: + return str(lib_path) + return "" + + +class TestMoFallback(unittest.TestCase): + def setUp(self): + tm.Telemetry.__init__ = Mock(return_value=None) + tm.Telemetry.send_event = Mock() + + self.models = {} + relu = onnx.helper.make_node("Relu", inputs=["in"], outputs=["out"]) + input_tensors = [ + make_tensor_value_info("in", onnx.TensorProto.FLOAT, (1, 2)), + ] + output_tensors = [ + make_tensor_value_info("out", onnx.TensorProto.FLOAT, (1, 2)), + ] + graph = make_graph([relu], "test_graph", input_tensors, output_tensors) + model = make_model( + graph, + producer_name="MO tests", + opset_imports=[onnx.helper.make_opsetid("", 13)], + ) + self.models["test_model.onnx"] = model + + self.test_config_files = {} + self.test_config_files[ + "test_config.json" + ] = """[ + { + "custom_attributes": { + "test_attribute": true + }, + "id": "buildin_extensions_1::TestExtension1", + "library": "library_path", + "match_kind": "scope" + } + ]""".replace( + "library_path", get_builtin_extensions_path() + ) + + for name, model in self.models.items(): + onnx.save(model, name) + for name, config in self.test_config_files.items(): + with open(name, "w") as f: + f.write(config) + + def tearDown(self): + for name in self.models.keys(): + os.remove(name) + for name in self.test_config_files.keys(): + os.remove(name) + + @pytest.mark.skipif( + len(get_builtin_extensions_path()) == 0, + reason="The extension library path was not found", + ) + def test_conersion_if_extensions_is_used(self): + args = base_args_config() + args.input_model = "test_model.onnx" + args.extensions = get_builtin_extensions_path() + + graph, model = prepare_ir(args) + + assert any(op.get_type_name() == "Swish" for op in model.get_ops()) + assert all(op.get_type_name() != "Relu" for op in model.get_ops()) + + @pytest.mark.skipif( + len(get_builtin_extensions_path()) == 0, + reason="The extension library path was not found", + ) + def test_conersion_if_transformations_config_is_used(self): + args = base_args_config() + args.input_model = "test_model.onnx" + args.transformations_config = "test_config.json" + + graph, model = prepare_ir(args) + + assert model.get_friendly_name() == "TestFunction" diff --git a/tools/mo/unit_tests/mo/frontend_ngraph_test.py b/tools/mo/unit_tests/mo/frontend_ngraph_test.py index fa99ed8c228..24cc8e215f9 100644 --- a/tools/mo/unit_tests/mo/frontend_ngraph_test.py +++ b/tools/mo/unit_tests/mo/frontend_ngraph_test.py @@ -61,6 +61,15 @@ def test_main_test(): assert not status.returncode +def test_mo_extensions_test(): + setup_env() + args = [sys.executable, '-m', 'pytest', + os.path.join(os.path.dirname(__file__), 'extensions_test_actual.py'), '-s'] + + status = subprocess.run(args, env=os.environ) + assert not status.returncode + + def test_mo_fallback_test(): setup_env() args = [sys.executable, '-m', 'pytest', From 8121de731c4fbe4cf73473cf1a4b55003843a9ca Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Thu, 3 Mar 2022 19:59:16 +0900 Subject: [PATCH 168/310] Add tests to OpImplCheckTest (#10413) * Add tests to OpImplCheckTest * Fix Gelu, Interpolate, LRN and related codes --- .../op_impl_check/single_op_graph.cpp | 445 +++++++++++++++++- 1 file changed, 435 insertions(+), 10 deletions(-) diff --git a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp index c78823e79f6..a15ed01bb87 100644 --- a/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp +++ b/src/tests/functional/plugin/shared/src/single_layer_tests/op_impl_check/single_op_graph.cpp @@ -14,6 +14,258 @@ std::shared_ptr generate(const std::shared_ptr &node) { return nullptr; } +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 6, 8, 9}); + const auto out_shape = ov::op::v0::Constant::create(element::i64, {2}, {5, 7}); + const auto adaptiveAvgPoolNode = std::make_shared(data, out_shape); + ov::ResultVector results{std::make_shared(adaptiveAvgPoolNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "AdaptiveAvgPoolGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 6, 8, 9}); + const auto out_shape = ov::op::v0::Constant::create(element::i32, {2}, {5, 7}); + const auto adaptiveMaxPoolNode = std::make_shared(data, out_shape, ov::element::i32); + ov::ResultVector results{std::make_shared(adaptiveMaxPoolNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "AdaptiveMaxPoolGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 3, 32}); + const ov::Strides strides{1}; + const ov::Shape pads_begin{0}; + const ov::Shape pads_end{0}; + const ov::Shape kernel{2}; + const auto exclude_pad = false; + const auto rounding_type = ov::op::RoundingType::FLOOR; + const auto auto_pad = ov::op::PadType::SAME_LOWER; + const auto avgPoolNode = std::make_shared(data, + strides, + pads_begin, + pads_end, + kernel, + exclude_pad, + rounding_type, + auto_pad); + ov::ResultVector results{std::make_shared(avgPoolNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "AvgPoolGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 3}); + const auto gamma = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto beta = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto mean = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto variance = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto epsilon = 0.25f; + const auto batchNormInterferenceNode = std::make_shared(data, + gamma, + beta, + mean, + variance, + epsilon); + ov::ResultVector results{std::make_shared(batchNormInterferenceNode)}; + return std::make_shared(results, + ov::ParameterVector{data, gamma, beta, mean, variance}, + "BatchNormInterferenceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 3}); + const auto gamma = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto beta = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto mean = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto variance = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto epsilon = 0.25f; + const auto batchNormInterferenceNode = std::make_shared(data, + gamma, + beta, + mean, + variance, + epsilon); + ov::ResultVector results{std::make_shared(batchNormInterferenceNode)}; + return std::make_shared(results, + ov::ParameterVector{data, gamma, beta, mean, variance}, + "BatchNormInterferenceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{4, 1, 1, 3}); + const auto block_shape = ov::op::v0::Constant::create(ov::element::i64, {4}, {1, 1, 1, 2}); + const auto crops_begin = ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 0, 0, 0}); + const auto crops_end = ov::op::v0::Constant::create(ov::element::i64, {4}, {0, 0, 0, 0}); + const auto batchToSpaceNode = std::make_shared(data, + block_shape, + crops_begin, + crops_end); + ov::ResultVector results{std::make_shared(batchToSpaceNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "BatchToSpaceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, 5, 5}); + const auto kernel = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, 3, 3}); + const ov::Strides strides{1, 1}; + const ov::CoordinateDiff pads_begin{0, 0}; + const ov::CoordinateDiff pads_end{0, 0}; + const ov::Strides dilations{1, 1}; + const auto mode = ov::op::v1::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT; + const auto pad_value = 1.0f; + const auto auto_pad = ov::op::PadType::SAME_LOWER; + const auto binaryConvolutionNode = std::make_shared(data, + kernel, + strides, + pads_begin, + pads_end, + dilations, + mode, + pad_value, + auto_pad); + ov::ResultVector results{std::make_shared(binaryConvolutionNode)}; + return std::make_shared(results, ov::ParameterVector{data, kernel}, "BinaryConvolutionGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 3, 2}); + const auto buckets = std::make_shared(ov::element::f32, ov::PartialShape{4}); + const auto bucketizeNode = std::make_shared(data, buckets); + ov::ResultVector results{std::make_shared(bucketizeNode)}; + return std::make_shared(results, ov::ParameterVector{data, buckets}, "BucketizeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{100, 3, 1200}); + const auto sequence_mask = std::make_shared(ov::element::f32, ov::PartialShape{100, 3}); + const auto CTCGreedyDecoderNode = std::make_shared(data, sequence_mask, false); + ov::ResultVector results{std::make_shared(CTCGreedyDecoderNode)}; + return std::make_shared(results, ov::ParameterVector{data, sequence_mask}, "CTCGreedyDecoderGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{3, 100, 1200}); + const auto sequence_length = std::make_shared(ov::element::i32, ov::PartialShape{3}); + const auto CTCGreedyDecoderSeqLenNode = std::make_shared(data, sequence_length); + ov::ResultVector results{std::make_shared(CTCGreedyDecoderSeqLenNode)}; + return std::make_shared(results, ov::ParameterVector{data, sequence_length}, "CTCGreedyDecoderSeqLenGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto logits = std::make_shared(ov::element::f32, ov::PartialShape{10, 120, 28}); + const auto logit_length = std::make_shared(ov::element::i32, ov::PartialShape{10}); + const auto labels = std::make_shared(ov::element::i32, ov::PartialShape{10, 120}); + const auto label_length = std::make_shared(ov::element::i32, ov::PartialShape{10}); + const auto blank_index = std::make_shared(ov::element::i32, ov::PartialShape{}); + const auto CTCLossNode = std::make_shared(logits, logit_length, labels, label_length, blank_index); + ov::ResultVector results{std::make_shared(CTCLossNode)}; + return std::make_shared(results, + ov::ParameterVector{logits, logit_length, labels, label_length, blank_index}, + "CTCLossGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{10, 120, 28}); + const auto clampNode = std::make_shared(data, 0.0, 2.1); + ov::ResultVector results{std::make_shared(clampNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "ClampGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3, 4}, {2, 7, 4}, {2, 2, 4}}); + const auto concatNode = std::make_shared(ov::NodeVector{params[0], params[1], params[2]}, 1); + ov::ResultVector results{std::make_shared(concatNode)}; + return std::make_shared(results, params, "ConcatGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto A = ov::op::v0::Constant::create(ov::element::f32, {2, 2}, {1, 2, 3, 4}); + const auto B = ov::op::v0::Constant::create(ov::element::f32, {2, 2}, {1, 2, 3, 4}); + return std::make_shared(ov::NodeVector{A, B}, ov::ParameterVector{}, "ConstantGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto param = std::make_shared(ov::element::f32, ov::PartialShape{2, 3, 4}); + const auto convertNode = std::make_shared(param, ov::element::i32); + ov::ResultVector results{std::make_shared(convertNode)}; + return std::make_shared(results, ov::ParameterVector{param}, "ConvertGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::i32, ov::PartialShape{256, 56}); + const auto like = std::make_shared(ov::element::f32, ov::PartialShape{3}); + const auto convertNode = std::make_shared(data, like); + ov::ResultVector results{std::make_shared(convertNode)}; + return std::make_shared(results, ov::ParameterVector{data, like}, "ConvertLikeGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, 5, 5}); + const auto kernel = std::make_shared(ov::element::f32, ov::PartialShape{1, 1, 3, 3}); + const ov::Strides strides{1, 1}; + const ov::CoordinateDiff pads_begin{0, 0}; + const ov::CoordinateDiff pads_end{0, 0}; + const ov::Strides dilations{1, 1}; + const auto auto_pad = ov::op::PadType::SAME_LOWER; + const auto convolutionNode = std::make_shared(data, + kernel, + strides, + pads_begin, + pads_end, + dilations, + auto_pad); + ov::ResultVector results{std::make_shared(convolutionNode)}; + return std::make_shared(results, ov::ParameterVector{data, kernel}, "ConvolutionGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 512, 1, 37}); + const auto kernel = std::make_shared(ov::element::f32, ov::PartialShape{512, 256, 1, 1}); + const auto output_shape = ov::op::v0::Constant::create(ov::element::i64, {2}, {1, 74}); + const ov::Strides strides{1, 2}; + const ov::CoordinateDiff pads_begin{0, 0}; + const ov::CoordinateDiff pads_end{0, 0}; + const ov::Strides dilations{1, 1}; + const auto auto_pad = ov::op::PadType::SAME_LOWER; + const auto convolutionBackpropDataNode = std::make_shared(data, + kernel, + output_shape, + strides, + pads_begin, + pads_end, + dilations, + auto_pad); + ov::ResultVector results{std::make_shared(convolutionBackpropDataNode)}; + return std::make_shared(results, ov::ParameterVector{data, kernel}, "ConvolutionBackpropDataGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 2}); + const auto cumSumNode = std::make_shared(data); + ov::ResultVector results{std::make_shared(cumSumNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "CumSumGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 7938, 63, 38}); + const auto coord = std::make_shared(ov::element::f32, ov::PartialShape{300, 5}); + const auto deformablePSROIPoolingNode = std::make_shared(data, coord, 882, 0.0625, 3); + ov::ResultVector results{std::make_shared(deformablePSROIPoolingNode)}; + return std::make_shared(results, ov::ParameterVector{data, coord}, "DeformablePSROIPoolingGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 16, 3, 1080, 1616}); + const auto depthToSpaceNode = std::make_shared(data, ov::op::v0::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST, 2); + ov::ResultVector results{std::make_shared(depthToSpaceNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "DepthToSpaceGraph"); +} + +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3}, {3}}); + const auto einsumNode = std::make_shared(ov::OutputVector{params.front(), params.back()}, "i,i->"); + ov::ResultVector results{std::make_shared(einsumNode)}; + return std::make_shared(results, params, "EinsumGraph"); +} + std::shared_ptr generate(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{3, 2}}); const auto elu = std::make_shared(params[0], 0.5f); @@ -234,6 +486,13 @@ std::shared_ptr generate(const std::shared_ptr &nod return std::make_shared(results, params, "Gelu"); } +std::shared_ptr generate(const std::shared_ptr &node) { + const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{8}}); + const auto gelu = std::make_shared(params[0]); + ov::ResultVector results{std::make_shared(gelu)}; + return std::make_shared(results, params, "Gelu"); +} + std::shared_ptr generate(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 1, 6}, {1, 1, 1, 3}}); const auto group_convolution = std::make_shared(params[0], @@ -289,19 +548,18 @@ std::shared_ptr generate(const std::shared_ptr(ov::element::i64, {4}, {1, 1, 1, 2}); - const auto scales = ngraph::builder::makeConstant(ov::element::f32, {1}, {1.0}); + const auto params = ngraph::builder::makeDynamicParams({ov::element::f32, ov::element::i32}, {{2, 2, 30, 60}, {15, 30}}); + const auto scales = ngraph::builder::makeConstant(ov::element::f32, {2}, {0.5f, 0.5f}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {2}, {2, 3}); const InterpolateAttrs attrs{InterpolateMode::NEAREST, - ShapeCalcMode::SIZES, + ShapeCalcMode::SCALES, std::vector{0, 0, 0, 0}, std::vector{0, 0, 0, 0}, TransformMode::HALF_PIXEL, NearestMode::ROUND_PREFER_FLOOR, false, -0.75}; - const auto interpolate = std::make_shared(params[0], out_shape_in, scales, attrs); + const auto interpolate = std::make_shared(params[0], params[1], scales, axes, attrs); ov::ResultVector results{std::make_shared(interpolate)}; return std::make_shared(results, params, "Interpolate-4"); } @@ -328,7 +586,7 @@ std::shared_ptr generate(const std::shared_ptr &n std::shared_ptr generate(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{2, 3, 2, 1}}); - const auto axes = ngraph::builder::makeConstant(ov::element::i64, {1}, std::vector{2}); + const auto axes = ngraph::builder::makeConstant(ov::element::i64, {1}, std::vector{1}); const auto lrn = std::make_shared(params[0], axes, 3, 0.5, 1, 3); ov::ResultVector results{std::make_shared(lrn)}; return std::make_shared(results, params, "LRN"); @@ -927,6 +1185,162 @@ std::shared_ptr generate(const std::shared_ptr(results, params, "VariadicSplitGraph"); } +std::shared_ptr generateArithmeticReductionKeepDims(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{3, 3}); + const auto axes = ov::op::v0::Constant::create(ov::element::i32, {1}, {1}); + std::shared_ptr reduceNode; + if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, true); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(reduceNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "ArithmeticReductionKeepDimsGraph"); +} + +std::shared_ptr generateLogicalReductionKeepDims(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::boolean, ov::PartialShape{3, 3}); + const auto axes = ov::op::v0::Constant::create(ov::element::i32, {1}, {1}); + std::shared_ptr reduceNode; + if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, false); + } else if (ov::is_type(node)) { + reduceNode = std::make_shared(data, axes, false); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(reduceNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "LogicalReductionKeepDimsGraph"); +} + +std::shared_ptr generateMaxPoolBase(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{1, 7, 3}); + const ov::Strides strides{1}; + const ov::Strides dilations{1}; + const ov::Shape pads_begin{0}; + const ov::Shape pads_end{0}; + const ov::Shape kernel_shape{3}; + const auto rounding_mode = ov::op::RoundingType::FLOOR; + const auto auto_pad = ov::op::PadType::VALID; + std::shared_ptr maxPoolNode; + if (ov::is_type(node)) { + maxPoolNode = std::make_shared(data, strides, pads_begin, pads_end, kernel_shape, rounding_mode, auto_pad); + } else if (ov::is_type(node)) { + maxPoolNode = std::make_shared(data, strides, dilations, pads_begin, pads_end, kernel_shape); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(maxPoolNode)}; + return std::make_shared(results, ov::ParameterVector{data}, "MaxPoolBaseGraph"); +} + +std::shared_ptr generateScatterBase(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 3, 4}); + const auto indices = std::make_shared(ov::element::i32, ov::PartialShape{2, 1}); + const auto updates = std::make_shared(ov::element::f32, ov::PartialShape{2, 2, 1, 4}); + const auto axis = ov::op::v0::Constant::create(ov::element::i32, {1}, {1}); + std::shared_ptr scatterNode; + if (ov::is_type(node)) { + scatterNode = std::make_shared(data, indices, updates, axis); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(scatterNode)}; + return std::make_shared(results, ov::ParameterVector{data, indices, updates}, "ScatterBaseGraph"); +} + +std::shared_ptr generateScatterNDBase(const std::shared_ptr &node) { + const auto data = std::make_shared(ov::element::f32, ov::PartialShape{2, 2}); + const auto indices = std::make_shared(ov::element::i32, ov::PartialShape{2, 1}); + const auto updates = std::make_shared(ov::element::f32, ov::PartialShape{2, 2}); + std::shared_ptr scatterNode; + if (ov::is_type(node)) { + scatterNode = std::make_shared(data, indices, updates); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(scatterNode)}; + return std::make_shared(results, ov::ParameterVector{data, indices, updates}, "ScatterNDBaseGraph"); +} + +std::shared_ptr generateUnaryEltwise(const std::shared_ptr &node) { + const auto param = std::make_shared(ov::element::f32, ov::PartialShape{1, 2}); + std::shared_ptr eltwiseNode; + if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else if (ov::is_type(node)) { + eltwiseNode = std::make_shared(param); + } else { + return nullptr; + } + + ov::ResultVector results{std::make_shared(eltwiseNode)}; + return std::make_shared(results, ov::ParameterVector{param}, "UnaryEltwiseGraph"); +} + std::shared_ptr generateBinaryEltwise(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2}, {1, 2}}); std::shared_ptr eltwiseNode; @@ -1094,7 +1508,6 @@ std::shared_ptr generateReadValueBase(const std::shared_ptr generateDeformableConvolutionBase(const std::shared_ptr &node) { const auto params = ngraph::builder::makeDynamicParams(ov::element::f32, {{1, 2, 4, 4}, {1, 18, 2, 2}, @@ -1356,6 +1769,18 @@ std::shared_ptr generateGraph() { std::shared_ptr node = std::shared_ptr(new T); if (ov::is_type(node)) { return generateBinaryEltwise(node); + } else if (ov::is_type(node)) { + return generateArithmeticReductionKeepDims(node); + } else if (ov::is_type(node)) { + return generateLogicalReductionKeepDims(node); + } else if (ov::is_type(node)) { + return generateMaxPoolBase(node); + } else if (ov::is_type(node)) { + return generateScatterBase(node); + } else if (ov::is_type(node)) { + return generateScatterNDBase(node); + } else if (ov::is_type(node)) { + return generateUnaryEltwise(node); } else if (ov::is_type(node)) { return generateBinaryEltwiseComp(node); } else if (ov::is_type(node)) { @@ -1365,8 +1790,6 @@ std::shared_ptr generateGraph() { } else if (ov::is_type(node) || ov::is_type(node)) { return generateConvertColor(node); - } else if (ov::is_type(node)) { - return generateMultiSubGraph(node); } else if (ov::is_type(node)) { return generateNmsBase(node); } else if (ov::is_type(node)) { @@ -1389,6 +1812,8 @@ std::shared_ptr generateGraph() { return generateRNNCellBase(node); } else if (ov::is_type(node)) { return generateSubGraphOp(node); + } else if (ov::is_type(node)) { + return generateMultiSubGraph(node); } return generate(node); From fdf12c95379b4075b070545d4a318751a39df5f9 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Thu, 3 Mar 2022 14:09:55 +0300 Subject: [PATCH 169/310] Update main.cpp (#10740) --- .../plugin/conformance/subgraphs_dumper/src/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp index 2f2702800e6..1a3634dfe53 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/src/main.cpp @@ -93,15 +93,15 @@ int main(int argc, char *argv[]) { std::vector local_cache_dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_local_cache); std::vector dirs = CommonTestUtils::splitStringByDelimiter(FLAGS_input_folders); - auto cachedOps = findModelsInDirs(local_cache_dirs); auto models = findModelsInDirs(dirs); auto cache = SubgraphsDumper::OPCache::make_cache(); if (!FLAGS_local_cache.empty()) { + auto cachedOps = findModelsInDirs(local_cache_dirs); cacheModels(cache, ret_code, cachedOps, FLAGS_extract_body); } cacheModels(cache, ret_code, models, FLAGS_extract_body); cache->serialize_cached_ops(FLAGS_output_folder); return ret_code; -} \ No newline at end of file +} From 28889c483354eb6e161693df22dafc417c81febd Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Thu, 3 Mar 2022 14:10:07 +0300 Subject: [PATCH 170/310] [IE TESTS][CONFORMANCE] Fix Crashes in ReadIRTest::SetUp() (#10736) * [IE TESTS][CONFORMANCE] Fix Crashes in ReadIRTest::SetUp() * remove extra lines * Update read_ir.cpp --- .../functional/shared_test_classes/src/base/ov_subgraph.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 2700cf0eb45..b30b90cf71c 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -57,7 +57,7 @@ void SubgraphBaseTest::run() { if (isCurrentTestDisabled) GTEST_SKIP() << "Disabled test due to configuration" << std::endl; - ASSERT_FALSE(targetStaticShapes.empty()) << "Target Static Shape is empty!!!"; + ASSERT_FALSE(targetStaticShapes.empty() && !function->get_parameters().empty()) << "Target Static Shape is empty!!!"; std::string errorMessage; try { compile_model(); @@ -317,6 +317,10 @@ void SubgraphBaseTest::validate() { } void SubgraphBaseTest::init_input_shapes(const std::vector& shapes) { + if (shapes.empty()) { + targetStaticShapes = {{}}; + return; + } size_t targetStaticShapeSize = shapes.front().second.size(); for (size_t i = 1; i < shapes.size(); ++i) { if (targetStaticShapeSize < shapes[i].second.size()) { From f81f819ecda201892c1eb1a83f0f493672f715fc Mon Sep 17 00:00:00 2001 From: Maxim Gordeev Date: Thu, 3 Mar 2022 16:35:41 +0300 Subject: [PATCH 171/310] [IE Samples] Improved processing outputs for model with more than one output (#10737) * Improved processing outputs for model with more than one output * fixed condition * added checking count of output/reference files --- samples/cpp/speech_sample/main.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index f62d9f009f0..431bddee3e6 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -342,11 +342,17 @@ int main(int argc, char* argv[]) { } count_file = reference_name_files.empty() ? 1 : reference_name_files.size(); } + if (count_file > executableNet.outputs().size()) { + throw std::logic_error( + "The number of output/reference files is not equal to the number of network outputs."); + } // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 5. Do inference -------------------------------------------------------- std::vector> ptrUtterances; - std::vector> vectorPtrScores((outputs.size() == 0) ? 1 : outputs.size()); - std::vector numScoresPerOutput((outputs.size() == 0) ? 1 : outputs.size()); + std::vector> vectorPtrScores((outputs.size() == 0) ? executableNet.outputs().size() + : outputs.size()); + std::vector numScoresPerOutput((outputs.size() == 0) ? executableNet.outputs().size() + : outputs.size()); std::vector> vectorPtrReferenceScores(reference_name_files.size()); std::vector vectorFrameError(reference_name_files.size()), vectorTotalError(reference_name_files.size()); @@ -474,8 +480,9 @@ int main(int argc, char* argv[]) { inferRequest.inferRequest.wait(); if (inferRequest.frameIndex >= 0) for (size_t next_output = 0; next_output < count_file; next_output++) { - std::string outputName = (outputs.size() == 0) ? executableNet.output(0).get_any_name() - : output_names[next_output]; + std::string outputName = (outputs.size() == 0) + ? executableNet.output(next_output).get_any_name() + : output_names[next_output]; auto dims = executableNet.output(outputName).get_shape(); numScoresPerOutput[next_output] = std::accumulate(std::begin(dims), std::end(dims), @@ -493,10 +500,6 @@ int main(int argc, char* argv[]) { ov::Tensor outputBlob = inferRequest.inferRequest.get_tensor(executableNet.output(outputName)); - if (!outputs.empty()) { - outputBlob = - inferRequest.inferRequest.get_tensor(executableNet.output(outputName)); - } // locked memory holder should be alive all time while access to its buffer happens auto byteSize = numScoresPerOutput[next_output] * sizeof(float); std::memcpy(outputFrame, outputBlob.data(), byteSize); @@ -654,8 +657,8 @@ int main(int argc, char* argv[]) { } if (!FLAGS_r.empty()) { // print statistical score error - std::string outputName = - (outputs.size() == 0) ? executableNet.output(0).get_any_name() : output_names[next_output]; + std::string outputName = (outputs.size() == 0) ? executableNet.output(next_output).get_any_name() + : output_names[next_output]; std::cout << "Output name: " << outputName << std::endl; std::cout << "Number scores per frame: " << numScoresPerOutput[next_output] / batchSize << std::endl << std::endl; From f8ce57319be215938ea129a2653824adf2d0c4bc Mon Sep 17 00:00:00 2001 From: Vladimir Gavrilov Date: Thu, 3 Mar 2022 16:47:23 +0300 Subject: [PATCH 172/310] Specifications of operations RDFT and IRDFT (#10242) * Written the draft of the specification of the operation RFFT. * Started to write the specification of the operation IRFFT. * Small fix. * Renamed RFFT operation as RDFT. * Fix in Operations_specifications.md. * Written the specification of the operation IRDFT. * Fixes in examples. * Fixes in opset9.md and Operations_specifications.md. * Small fix. * Replaced opset8 by opset9 in opset9.md. * Deleted redundant sentences. * Small fix. * Replaced input_shape by data_shape. * Fixed mistypes. * Fixes of mistypes. * Fixed typo. * Fixed RDFT specification, in order to perform signal_size input as in TF and PyTorch. * Fixes in examples for RDFT. * Fixes in the output shape calculation of IRDFT. Now this calculation is as in TF and PyTorch. --- .../Operations_specifications.md | 4 +- docs/ops/opset.md | 25 +- docs/ops/opset9.md | 180 +++++++++++++++ docs/ops/signals/IRDFT_9.md | 218 ++++++++++++++++++ docs/ops/signals/RDFT_9.md | 210 +++++++++++++++++ 5 files changed, 624 insertions(+), 13 deletions(-) create mode 100644 docs/ops/opset9.md create mode 100644 docs/ops/signals/IRDFT_9.md create mode 100644 docs/ops/signals/RDFT_9.md diff --git a/docs/OV_Runtime_UG/Operations_specifications.md b/docs/OV_Runtime_UG/Operations_specifications.md index 937d5def19b..264b0e68982 100644 --- a/docs/OV_Runtime_UG/Operations_specifications.md +++ b/docs/OV_Runtime_UG/Operations_specifications.md @@ -4,7 +4,7 @@ .. toctree:: :maxdepth: 1 - + openvino_docs_ops_arithmetic_Abs_1 openvino_docs_ops_arithmetic_Acos_1 openvino_docs_ops_arithmetic_Acosh_3 @@ -85,6 +85,7 @@ openvino_docs_ops_image_I420toBGR_8 openvino_docs_ops_image_I420toRGB_8 openvino_docs_ops_signals_IDFT_7 + openvino_docs_ops_signals_IRDFT_9 openvino_docs_ops_infrastructure_If_8 openvino_docs_ops_image_Interpolate_1 openvino_docs_ops_image_Interpolate_4 @@ -136,6 +137,7 @@ openvino_docs_ops_generation_RandomUniform_8 openvino_docs_ops_generation_Range_1 openvino_docs_ops_generation_Range_4 + openvino_docs_ops_signals_RDFT_9 openvino_docs_ops_infrastructure_ReadValue_3 openvino_docs_ops_activation_ReLU_1 openvino_docs_ops_reduction_ReduceL1_4 diff --git a/docs/ops/opset.md b/docs/ops/opset.md index af166242766..48e4d1ef01e 100644 --- a/docs/ops/opset.md +++ b/docs/ops/opset.md @@ -5,7 +5,8 @@ .. toctree:: :maxdepth: 1 :hidden: - + + openvino_docs_ops_opset9 openvino_docs_ops_opset8 openvino_docs_ops_opset7 openvino_docs_ops_opset6 @@ -14,24 +15,24 @@ openvino_docs_ops_opset3 openvino_docs_ops_opset2 openvino_docs_ops_opset1 - + @endsphinxdirective -According to capabilities of supported deep learning frameworks and hardware capabilities of a target inference device, all operations are combined into operations sets each fully supported in a specific version of OpenVINO™ toolkit. +According to capabilities of supported deep learning frameworks and hardware capabilities of a target inference device, all operations are combined into operations sets each fully supported in a specific version of OpenVINO™ toolkit. -This topic provides a complete list of available sets of operations supported in different versions of OpenVINO™ toolkit. It's highly recommended to use the actual version of the operations set for a particular release. For a list of operations included into an operations set, click a link in the table. +This topic provides a complete list of available sets of operations supported in different versions of OpenVINO™ toolkit. Use the relevant version of the operations set for a particular release. For a list of operations included into an operations set, click a link in the table. -| OpenVINO™ Version | Actual Operations Set | -| :---------------- | :------------------------------- | +| OpenVINO™ Version | Actual Operations Set | +| :---------------- | :------------------------------- | | 2022.1 | [opset8](opset8.md) | -| 2021.4 | [opset7](opset7.md) | -| 2021.3 | [opset6](opset6.md) | -| 2021.2 | [opset5](opset5.md) | -| 2021.1 | [opset4](opset4.md) | +| 2021.4 | [opset7](opset7.md) | +| 2021.3 | [opset6](opset6.md) | +| 2021.2 | [opset5](opset5.md) | +| 2021.1 | [opset4](opset4.md) | | 2020.4 | [opset3](opset3.md) | | 2020.3 | [opset2](opset2.md) | -| 2020.2 | [opset2](opset2.md) | -| 2020.1 | [opset1](opset1.md) | +| 2020.2 | [opset2](opset2.md) | +| 2020.1 | [opset1](opset1.md) | ## See Also [Deep Learning Network Intermediate Representation and Operations Sets in OpenVINO™](../MO_DG/IR_and_opsets.md) diff --git a/docs/ops/opset9.md b/docs/ops/opset9.md new file mode 100644 index 00000000000..c56abd7f480 --- /dev/null +++ b/docs/ops/opset9.md @@ -0,0 +1,180 @@ +# opset9 {#openvino_docs_ops_opset9} + +This specification document describes the `opset9` operation set supported in OpenVINO™. +Support for each particular operation from the list below depends on the capabilities of an inference plugin +and may vary among different hardware platforms and devices. Examples of operation instances are provided as IR V10 xml +snippets. Such IR is generated by the Model Optimizer. The semantics match corresponding nGraph operation classes +declared in `namespace opset9`. + + +## Table of Contents + +* [Abs](arithmetic/Abs_1.md) +* [Acos](arithmetic/Acos_1.md) +* [Acosh](arithmetic/Acosh_3.md) +* [AdaptiveAvgPool](pooling/AdaptiveAvgPool_8.md) +* [AdaptiveMaxPool](pooling/AdaptiveMaxPool_8.md) +* [Add](arithmetic/Add_1.md) +* [Asin](arithmetic/Asin_1.md) +* [Asinh](arithmetic/Asinh_3.md) +* [Assign](infrastructure/Assign_3.md) +* [Atan](arithmetic/Atan_1.md) +* [Atanh](arithmetic/Atanh_3.md) +* [AvgPool](pooling/AvgPool_1.md) +* [BatchNormInference](normalization/BatchNormInference_5.md) +* [BatchToSpace](movement/BatchToSpace_2.md) +* [BinaryConvolution](convolution/BinaryConvolution_1.md) +* [Broadcast](movement/Broadcast_3.md) +* [Bucketize](condition/Bucketize_3.md) +* [CTCGreedyDecoder](sequence/CTCGreedyDecoder_1.md) +* [CTCGreedyDecoderSeqLen](sequence/CTCGreedyDecoderSeqLen_6.md) +* [CTCLoss](sequence/CTCLoss_4.md) +* [Ceiling](arithmetic/Ceiling_1.md) +* [Clamp](activation/Clamp_1.md) +* [Concat](movement/Concat_1.md) +* [Constant](infrastructure/Constant_1.md) +* [Convert](type/Convert_1.md) +* [ConvertLike](type/ConvertLike_1.md) +* [Convolution](convolution/Convolution_1.md) +* [ConvolutionBackpropData](convolution/ConvolutionBackpropData_1.md) +* [Cos](arithmetic/Cos_1.md) +* [Cosh](arithmetic/Cosh_1.md) +* [CumSum](arithmetic/CumSum_3.md) +* [DeformableConvolution](convolution/DeformableConvolution_8.md) +* [DeformablePSROIPooling](detection/DeformablePSROIPooling_1.md) +* [DepthToSpace](movement/DepthToSpace_1.md) +* [DetectionOutput](detection/DetectionOutput_8.md) +* [DFT](signals/DFT_7.md) +* [Divide](arithmetic/Divide_1.md) +* [Einsum](matrix/Einsum_7.md) +* [Elu](activation/Elu_1.md) +* [EmbeddingBagOffsetsSum](sparse/EmbeddingBagOffsetsSum_3.md) +* [EmbeddingBagPackedSum](sparse/EmbeddingBagPackedSum_3.md) +* [EmbeddingSegmentsSum](sparse/EmbeddingSegmentsSum_3.md) +* [Equal](comparison/Equal_1.md) +* [Erf](arithmetic/Erf_1.md) +* [Exp](activation/Exp_1.md) +* [ExperimentalDetectronDetectionOutput_6](detection/ExperimentalDetectronDetectionOutput_6.md) +* [ExperimentalDetectronGenerateProposalsSingleImage_6](detection/ExperimentalDetectronGenerateProposalsSingleImage_6.md) +* [ExperimentalDetectronPriorGridGenerator_6](detection/ExperimentalDetectronPriorGridGenerator_6.md) +* [ExperimentalDetectronROIFeatureExtractor_6](detection/ExperimentalDetectronROIFeatureExtractor_6.md) +* [ExperimentalDetectronTopKROIs_6](sort/ExperimentalDetectronTopKROIs_6.md) +* [ExtractImagePatches](movement/ExtractImagePatches_3.md) +* [FakeQuantize](quantization/FakeQuantize_1.md) +* [Floor](arithmetic/Floor_1.md) +* [FloorMod](arithmetic/FloorMod_1.md) +* [Gather](movement/Gather_8.md) +* [GatherElements](movement/GatherElements_6.md) +* [GatherND](movement/GatherND_8.md) +* [GatherTree](movement/GatherTree_1.md) +* [Gelu](activation/GELU_7.md) +* [Greater](comparison/Greater_1.md) +* [GreaterEqual](comparison/GreaterEqual_1.md) +* [GRN](normalization/GRN_1.md) +* [GroupConvolution](convolution/GroupConvolution_1.md) +* [GroupConvolutionBackpropData](convolution/GroupConvolutionBackpropData_1.md) +* [GRUCell](sequence/GRUCell_3.md) +* [GRUSequence](sequence/GRUSequence_5.md) +* [HardSigmoid](activation/HardSigmoid_1.md) +* [HSigmoid](activation/HSigmoid_5.md) +* [HSwish](activation/HSwish_4.md) +* [IDFT](signals/IDFT_7.md) +* [I420toBGR](image/I420toBGR_8.md) +* [I420toRGB](image/I420toRGB_8.md) +* [If](condition/If_8.md) +* [Interpolate](image/Interpolate_4.md) +* [IRDFT](signals/IRDFT_9.md) +* [Less](comparison/Less_1.md) +* [LessEqual](comparison/LessEqual_1.md) +* [Log](arithmetic/Log_1.md) +* [LogicalAnd](logical/LogicalAnd_1.md) +* [LogicalNot](logical/LogicalNot_1.md) +* [LogicalOr](logical/LogicalOr_1.md) +* [LogicalXor](logical/LogicalXor_1.md) +* [LogSoftmax](activation/LogSoftmax_5.md) +* [Loop](infrastructure/Loop_5.md) +* [LRN](normalization/LRN_1.md) +* [LSTMCell](sequence/LSTMCell_1.md) +* [LSTMSequence](sequence/LSTMSequence_1.md) +* [MatMul](matrix/MatMul_1.md) +* [MatrixNMS](sort/MatrixNMS_8.md) +* [MaxPool](pooling/MaxPool_8.md) +* [Maximum](arithmetic/Maximum_1.md) +* [Minimum](arithmetic/Minimum_1.md) +* [Mish](activation/Mish_4.md) +* [Mod](arithmetic/Mod_1.md) +* [MVN](normalization/MVN_6.md) +* [MulticlassNMS](sort/MulticlassNMS_8.md) +* [Multiply](arithmetic/Multiply_1.md) +* [Negative](arithmetic/Negative_1.md) +* [NonMaxSuppression](sort/NonMaxSuppression_5.md) +* [NonZero](condition/NonZero_3.md) +* [NormalizeL2](normalization/NormalizeL2_1.md) +* [NotEqual](comparison/NotEqual_1.md) +* [NV12toBGR](image/NV12toBGR_8.md) +* [NV12toRGB](image/NV12toRGB_8.md) +* [OneHot](sequence/OneHot_1.md) +* [Pad](movement/Pad_1.md) +* [Parameter](infrastructure/Parameter_1.md) +* [Power](arithmetic/Power_1.md) +* [PReLU](activation/PReLU_1.md) +* [PriorBoxClustered](detection/PriorBoxClustered_1.md) +* [PriorBox](detection/PriorBox_8.md) +* [Proposal](detection/Proposal_4.md) +* [PSROIPooling](detection/PSROIPooling_1.md) +* [RandomUniform](generation/RandomUniform_8.md) +* [Range](generation/Range_4.md) +* [RDFT](signals/RDFT_9.md) +* [ReLU](activation/ReLU_1.md) +* [ReadValue](infrastructure/ReadValue_3.md) +* [ReduceL1](reduction/ReduceL1_4.md) +* [ReduceL2](reduction/ReduceL2_4.md) +* [ReduceLogicalAnd](reduction/ReduceLogicalAnd_1.md) +* [ReduceLogicalOr](reduction/ReduceLogicalOr_1.md) +* [ReduceMax](reduction/ReduceMax_1.md) +* [ReduceMean](reduction/ReduceMean_1.md) +* [ReduceMin](reduction/ReduceMin_1.md) +* [ReduceProd](reduction/ReduceProd_1.md) +* [ReduceSum](reduction/ReduceSum_1.md) +* [RegionYolo](detection/RegionYolo_1.md) +* [ReorgYolo](detection/ReorgYolo_1.md) +* [Reshape](shape/Reshape_1.md) +* [Result](infrastructure/Result_1.md) +* [ReverseSequence](movement/ReverseSequence_1.md) +* [RNNCell](sequence/RNNCell_3.md) +* [RNNSequence](sequence/RNNSequence_5.md) +* [ROIAlign](detection/ROIAlign_3.md) +* [ROIPooling](detection/ROIPooling_1.md) +* [Roll](movement/Roll_7.md) +* [Round](arithmetic/Round_5.md) +* [ScatterElementsUpdate](movement/ScatterElementsUpdate_3.md) +* [ScatterNDUpdate](movement/ScatterNDUpdate_3.md) +* [ScatterUpdate](movement/ScatterUpdate_3.md) +* [Select](condition/Select_1.md) +* [Selu](activation/Selu_1.md) +* [ShapeOf](shape/ShapeOf_3.md) +* [ShuffleChannels](movement/ShuffleChannels_1.md) +* [Sigmoid](activation/Sigmoid_1.md) +* [Sign](arithmetic/Sign_1.md) +* [Sin](arithmetic/Sin_1.md) +* [Sinh](arithmetic/Sinh_1.md) +* [Slice](movement/Slice_8.md) +* [SoftMax](activation/SoftMax_8.md) +* [SoftPlus](activation/SoftPlus_4.md) +* [SpaceToBatch](movement/SpaceToBatch_2.md) +* [SpaceToDepth](movement/SpaceToDepth_1.md) +* [Split](movement/Split_1.md) +* [Sqrt](arithmetic/Sqrt_1.md) +* [SquaredDifference](arithmetic/SquaredDifference_1.md) +* [Squeeze](shape/Squeeze_1.md) +* [StridedSlice](movement/StridedSlice_1.md) +* [Subtract](arithmetic/Subtract_1.md) +* [Swish](activation/Swish_4.md) +* [Tan](arithmetic/Tan_1.md) +* [Tanh](arithmetic/Tanh_1.md) +* [TensorIterator](infrastructure/TensorIterator_1.md) +* [Tile](movement/Tile_1.md) +* [TopK](sort/TopK_3.md) +* [Transpose](movement/Transpose_1.md) +* [Unsqueeze](shape/Unsqueeze_1.md) +* [VariadicSplit](movement/VariadicSplit_1.md) diff --git a/docs/ops/signals/IRDFT_9.md b/docs/ops/signals/IRDFT_9.md new file mode 100644 index 00000000000..8e4e940d3e5 --- /dev/null +++ b/docs/ops/signals/IRDFT_9.md @@ -0,0 +1,218 @@ +# Inverse Discrete complex-to-real Fourier Transformation (IRDFT) {#openvino_docs_ops_signals_IRDFT_9} + +**Versioned name**: *IRDFT-9* + +**Category**: *Signal processing* + +**Short description**: *IRDFT* operation performs the inverse complex-to-real discrete Fourier transformation of the input tensor by specified dimensions. + +**Attributes**: + + No attributes available. + +**Inputs** + +* **1**: `data` - Input tensor of type *T* with data for the IRDFT transformation. The last dimension of the input tensor must be equal to 2, that is the input tensor shape must have the form `[D_0, D_1, ..., D_{N-1}, 2]`, representing the real and imaginary components of complex numbers in `[:, ..., :, 0]` and in `[:, ..., :, 1]` correspondingly. **Required.** +* **2**: `axes` - 1D tensor of type *T_IND* specifying dimension indices where IRDFT is applied, and `axes` is any unordered list of indices of different dimensions of the input tensor, for example, `[0, 4]`, `[4, 0]`, `[4, 2, 1]`, `[1, 2, 3]`, `[-3, 0, -2]`. These indices should be integers from `-(r - 1)` to `(r - 2)` inclusively, where `r = rank(data)`. A negative axis `a` is interpreted as an axis `r - 1 + a`. Other dimensions do not change. The order of elements in the `axes` attribute matters, and is mapped directly to elements in the third input `signal_size`. **Required.** +* **NOTE**: The following constraint must be satisfied: `rank(data) >= len(axes) + 1 and (rank(data) - 1) not in axes and (-1) not in axes`. +* **3**: `signal_size` - 1D tensor of type *T_SIZE* describing signal size with respect to axes from the input `axes`. If `signal_size[i] == -1`, then IRDFT is calculated for full size of the axis `axes[i]`. If `signal_size[i] > data_shape[: r - 1][axes[i]]`, then input data is zero-padded with respect to the axis `axes[i]` at the end. Finally, if `signal_size[i] < data_shape[: r - 1][axes[i]]`, then input data is trimmed with respect to the axis `axes[i]`. More precisely, if `signal_size[i] < data_shape[: r - 1][axes[i]]`, the slice `0: signal_size[i]` of the axis `axes[i]` is considered. Optionally, with default value `[data_shape[: r - 1][a] for a in axes]`. +* **NOTE**: If the input `signal_size` is specified, then the size of `signal_size` must be the same as the size of `axes`. + +**Outputs** + +* **1**: Resulting tensor with elements of the same type as input `data` tensor and with rank `r - 1`, where `r = rank(data)`. The shape of the output has the form `[S_0, S_1, ..., S_{r-2}]`, where all `S_a` are calculated as follows: + +1. Calculate `normalized_axes`, where each `normalized_axes[i] = axes[i]`, if `axes[i] >= 0`, and `normalized_axes[i] = axes[i] + r - 1` otherwise. + +2. If `a not in normalized_axes`, then `S_a = data_shape[a]`. + +3. If `a in normalized_axes`, then `a = normalized_axes[i]` for some `i`. In such case, `S_a = 2 * (data_shape[a] - 1)` if the `signal_size` input is not specified, or, if it is specified, `signal_size[i] = -1`; and `S_a = signal_size[a]` otherwise. + + When `i != len(normalized_axes) - 1`, `S_a` is calculated as `S_a = data_shape[a]` if the `signal_size` input is not specified, or, if it is specified, `signal_size[i] = -1`; and `S_a = signal_size[a]` otherwise. + + When `i = len(normalized_axes) - 1`, `S_a` is calculated as `S_a = 2 * (data_shape[a] - 1)` if the `signal_size` input is not specified, or, if it is specified, `signal_size[i] = -1`; and `S_a = signal_size[a]` otherwise. + +**Types** + +* *T*: any supported floating-point type. + +* *T_IND*: `int64` or `int32`. + +* *T_SIZE*: `int64` or `int32`. + +**Detailed description**: *IRDFT* performs the discrete Fourier transformation of the input tensor, according to the following rules. + +For simplicity, assume that an input tensor `A` has the shape `[B_0, ..., B_{k-1}, M_0, ..., M_{q-1}, 2]`, `axes=[k,...,k + q - 1]`, and `signal_size=[S_0,...,S_{q-1}]`. + +Let `D` be a value of the input tensor `A`. + +Next, put +\f[X[j_0,\dots,j_{k-1},j_k,\dots,j_{k+q-1}]=D[j_0,\dots,j_{k-1},j_k,\dots,j_{k+q-1},0]+iD[j_0,\dots,j_{k-1},j_k,\dots,j_{k+q-1},1]\f] +for all indices `j_0,...,j_{k+q-1}`, where `i` is an imaginary unit, that is `X` is a complex tensor. + +Define the complex tensor `F` with the shape `[B_0, ..., B_{k-1}, 2 * (M_0 - 1), ..., 2 * (M_{q-1} - 1)]` using the formula +\f[F[j_0,\dots,j_{k-1},j_k,\dots,j_p,\dots,j_{k+q-1}] = \begin{cases}X[j_0,\dots,j_{k-1},j_k,\dots,j_p,\dots,j_{k+q-1}],\text{ when }j_p=0,\dots,M_p-1;\\ \overline{X[j_0,\dots,j_{k-1},j_k,\dots,2(M_{p-1} - 1) - j_p,\dots,j_{k+q-1}]},\text{ otherwise.}\end{cases}\f] + +Construct the complex tensor `G` with the shape `[B_0, ..., B_{k-1}, S_0, ..., S_{q-1}]` by the following way. If `S_a > 2 * (M_a - 1)`, then the axis `k + a` of `F` will be padded by zeros; if `S_a < 2 * (M_a - 1)`, then the axis `k + a` of `F` will be trimmed, that is, we will consider only the slice `0: S_a` of this axis; finally, if `S_a = 2 * (M_a - 1)`, then we consider the full axis `k + a` of `F`. + +Let `Y` be a complex tensor with the shape `[B_0, ..., B_{k-1}, S_0, ..., S_{q-1}]` such that +\f[Y[n_0,\dots,n_{k-1},m_0,\dots,m_{q-1}]=\frac{1}{\prod\limits_{j=0}^{q-1}S_j}\sum\limits_{p_0=0}^{S_0}\cdots\sum\limits_{p_{q-1}=0}^{S_{q-1}}X[n_0,\dots,n_{k-1},j_0,\dots,j_{q-1}]\exp\left(2\pi i\sum\limits_{b=0}^{q-1}\frac{m_bj_b}{S_b}\right)\f] +for all indices `n_0,...,n_{k-1}`, `m_0,...,m_{q-1}`. + +Finally, the result of the inverse discrete complex-to-real Fourier transform is a real part of the tensor `Y`. + +Calculations for the generic case of axes and signal sizes are similar. + +**Example**: + +There is no `signal_size` input (4D input tensor): +```xml + + + + 1 + 161 + 161 + 2 + + + 2 + + + + 1 + 161 + 320 + + + +``` + +There is no `signal_size` input (3D input tensor): +```xml + + + + 161 + 161 + 2 + + + 2 + + + + 161 + 320 + + + +``` + + +There is `signal_size` input (4D input tensor): +```xml + + + + 1 + 161 + 161 + 2 + + + 2 + + + 2 + + + + 1 + 512 + 100 + + + +``` + + +There is `signal_size` input (3D input tensor): +```xml + + + + 161 + 161 + 2 + + + 2 + + + 2 + + + + 512 + 100 + + + +``` + + +There is `signal_size` input (5D input tensor, `-1` in `signal_size`, unsorted axes): +```xml + + + + 16 + 768 + 580 + 320 + 2 + + + 3 + + + 3 + + + + 16 + 768 + 1024 + 170 + + + +``` + + +There is `signal_size` input (5D input tensor, `-1` in `signal_size`, unsorted axes, the second example): +```xml + + + + 16 + 768 + 580 + 320 + 2 + + + 3 + + + 3 + + + + 16 + 768 + 2056 + 258 + + + +``` diff --git a/docs/ops/signals/RDFT_9.md b/docs/ops/signals/RDFT_9.md new file mode 100644 index 00000000000..75989ef85dd --- /dev/null +++ b/docs/ops/signals/RDFT_9.md @@ -0,0 +1,210 @@ +# Discrete Fourier Transformation for real-valued input (RDFT) {#openvino_docs_ops_signals_RDFT_9} + +**Versioned name**: *RDFT-9* + +**Category**: *Signal processing* + +**Short description**: *RDFT* operation performs the discrete real-to-complex Fourier transformation of the input tensor by specified dimensions. + +**Attributes**: + + No attributes available. + +**Inputs** + +* **1**: `data` - Input tensor of type *T* with data for the RDFT transformation. **Required.** +* **2**: `axes` - 1D tensor of type *T_IND* specifying dimension indices where RDFT is applied, and `axes` is any unordered list of indices of different dimensions of input tensor, for example, `[0, 4]`, `[4, 0]`, `[4, 2, 1]`, `[1, 2, 3]`, `[-3, 0, -2]`. These indices should be integers from `-r` to `r - 1` inclusively, where `r = rank(data)`. A negative axis `a` is interpreted as an axis `r + a`. Other dimensions do not change. The order of elements in `axes` attribute matters, and is mapped directly to elements in the third input `signal_size`. **Required.** +* **3**: `signal_size` - 1D tensor of type *T_SIZE* describing signal size with respect to axes from the input `axes`. If `signal_size[i] == -1`, then RDFT is calculated for full size of the axis `axes[i]`. If `signal_size[i] > data_shape[axes[i]]`, then input data is zero-padded with respect to the axis `axes[i]` at the end. Finally, `signal_size[i] < data_shape[axes[i]]`, then input data is trimmed with respect to the axis `axes[i]`. More precisely, if `signal_size[i] < data_shape[axes[i]]`, the slice `0: signal_size[i]` of the axis `axes[i]` is considered. Optionally, with default value `[data_shape[a] for a in axes]`. +* **NOTE**: If the input `signal_size` is specified, the size of `signal_size` must be the same as the size of `axes`. + +**Outputs** + +* **1**: Resulting tensor with elements of the same type as input `data` tensor and with rank `r + 1`, where `r = rank(data)`. The shape of the output has the form `[S_0, S_1, ..., S_{r-1}, 2]`, where all `S_a` are calculated as follows: + +1. Calculate `normalized_axes`, where each `normalized_axes[i] = axes[i]`, if `axes[i] >= 0`, and `normalized_axes[i] = axes[i] + r` otherwise. + +2. If `a not in normalized_axes`, then `S_a = data_shape[a]`. + +3. If `a in normalized_axes`, then `a = normalized_axes[i]` for some `i`. + + When `i != len(normalized_axes) - 1`, `S_a` is calculated as `S_a = data_shape[a]` if the `signal_size` input is not specified, or, if it is specified, `signal_size[i] = -1`; and `S_a = signal_size[a]` otherwise. + + When `i = len(normalized_axes) - 1`, `S_a` is calculated as `S_a = data_shape[a] // 2 + 1` if the `signal_size` input is not specified, or, if it is specified, `signal_size[i] = -1`; and `S_a = signal_size[a] // 2 + 1` otherwise. + +**Types** + +* *T*: any supported floating-point type. + +* *T_IND*: `int64` or `int32`. + +* *T_SIZE*: `int64` or `int32`. + +**Detailed description**: *RDFT* performs the discrete Fourier transformation of real-valued input tensor with respect to specified axes. Calculations are performed according to the following rules. + +For simplicity, assume that an input tensor `A` has the shape `[B_0, ..., B_{k-1}, M_0, ..., M_{q-1}]`, `axes=[k,...,k+q-1]`, and `signal_size=[S_0,...,S_{1-1}]`. + +Let `D` be an input tensor `A`, taking into account the `signal_size`, and, hence, `D` has the shape `[B_0, ..., B_{k-1}, S_0, ..., S_{1-1}]`. + +Next, let +\f[X=X[j_0,\dots,j_{k-1},j_k,\dots,j_{k+q-1}]\f] +for all indices `j_0,...,j_{k+q-1}`, be a real-valued input tensor. + +Then the transformation RDFT of the tensor `X` is the tensor `Y` of the shape `[B_0, ..., B_{k-1}, S_0 // 2 + 1, ..., S_{r-1} // 2 + 1]`, such that +\f[Y[n_0,\dots,n_{k-1},m_0,\dots,m_{q-1}]=\sum\limits_{p_0=0}^{S_0}\cdots\sum\limits_{p_{q-1}=0}^{S_{q-1}}X[n_0,\dots,n_{k-1},j_0,\dots,j_{q-1}]\exp\left(-2\pi i\sum\limits_{b=0}^{q-1}\frac{m_bj_b}{S_b}\right)\f] +for all indices `n_0,...,n_{k-1}`, `m_0,...,m_{q-1}`. + +Calculations for the generic case of axes and signal sizes are similar. + +**Example**: + +There is no `signal_size` input (3D input tensor): +```xml + + + + 1 + 320 + 320 + + + 2 + + + + 1 + 320 + 161 + 2 + + + +``` + +There is no `signal_size` input (2D input tensor): +```xml + + + + 320 + 320 + + + 2 + + + + 320 + 161 + 2 + + + +``` + + +There is `signal_size` input (3D input tensor): +```xml + + + + 1 + 320 + 320 + + + 2 + + + 2 + + + + 1 + 512 + 51 + 2 + + + +``` + + +There is `signal_size` input (2D input tensor): +```xml + + + + 320 + 320 + + + 2 + + + 2 + + + + 512 + 51 + 2 + + + +``` + + +There is `signal_size` input (4D input tensor, `-1` in `signal_size`, unsorted axes): +```xml + + + + 16 + 768 + 580 + 320 + + + 3 + + + 3 + + + + 16 + 768 + 513 + 170 + 2 + + + +``` + + +There is `signal_size` input (4D input tensor, `-1` in `signal_size`, unsorted axes, the second example): +```xml + + + + 16 + 768 + 580 + 320 + + + 3 + + + 3 + + + + 16 + 768 + 1029 + 258 + 2 + + + +``` From 554b50eb850f04e50eba6a3537243a2fe0a14207 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 3 Mar 2022 18:01:59 +0300 Subject: [PATCH 173/310] Remove redundant calls from set_argument (#10701) * Remove redundant calls from set_argument * Fixed tests --- src/core/src/node.cpp | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/core/src/node.cpp b/src/core/src/node.cpp index 349ff0cbf13..5c856729dfb 100644 --- a/src/core/src/node.cpp +++ b/src/core/src/node.cpp @@ -174,9 +174,7 @@ void ov::Node::set_arguments(const OutputVector& arguments) { // Add this node as a user of each argument. size_t i = 0; for (auto& output : arguments) { - auto output_node = output.get_node(); - auto& output_descriptor = output_node->m_outputs.at(output.get_index()); - m_inputs.emplace_back(this, i++, output_descriptor); + set_argument(i++, output); } // set_arguments doesn't use replace_output method, so we have to reset cache manually here @@ -203,8 +201,17 @@ ov::descriptor::Output& ov::Node::get_output_descriptor(size_t position) { void ov::Node::set_argument(size_t position, const Output& argument) { auto output_node = argument.get_node(); - auto& output_descriptor = output_node->get_output_descriptor(argument.get_index()); - get_input_descriptor(position).replace_output(output_descriptor); + auto& output_descriptor = output_node->m_outputs.size() > argument.get_index() + ? output_node->m_outputs.at(argument.get_index()) + : output_node->get_output_descriptor(argument.get_index()); + if (position < m_inputs.size()) { + get_input_descriptor(position).replace_output(output_descriptor); + } else { + while (m_inputs.size() < position) { + m_inputs.emplace_back(this, m_inputs.size()); + } + m_inputs.emplace_back(this, position, output_descriptor); + } } void ov::Node::constructor_validate_and_infer_types() { From 1bbd92a8f816c3befde78dc1d5aa41645fd0db80 Mon Sep 17 00:00:00 2001 From: Alexander Kozlov Date: Thu, 3 Mar 2022 18:58:58 +0300 Subject: [PATCH 174/310] Revised Tuning For Performance and Model optimization docs (#10276) * Revised Tuning for performance and Model optimization docs * Fixed links * Fixed link * Applied comments * Fixed one more comment --- docs/documentation.md | 4 +- docs/img/nncf_workflow.png | 3 + .../dldt_optimization_guide.md | 30 +++------ .../model_optimization_guide.md | 34 ++++++++++ docs/optimization_guide/nncf_introduction.md | 63 +++++++++++++++++++ 5 files changed, 111 insertions(+), 23 deletions(-) create mode 100644 docs/img/nncf_workflow.png create mode 100644 docs/optimization_guide/model_optimization_guide.md create mode 100644 docs/optimization_guide/nncf_introduction.md diff --git a/docs/documentation.md b/docs/documentation.md index ea26b3f22ff..9d63bbc168b 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -26,11 +26,11 @@ :caption: Tuning for Performance :hidden: - openvino_docs_performance_benchmarks openvino_docs_optimization_guide_dldt_optimization_guide openvino_docs_MO_DG_Getting_Performance_Numbers - pot_README + openvino_docs_model_optimization_guide openvino_docs_tuning_utilities + openvino_docs_performance_benchmarks .. toctree:: diff --git a/docs/img/nncf_workflow.png b/docs/img/nncf_workflow.png new file mode 100644 index 00000000000..53f3cc334e0 --- /dev/null +++ b/docs/img/nncf_workflow.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a58f31b2043fe9d92892b1f40ed8a7c596c36ef9d1cd1c71adb981009161bf +size 45665 diff --git a/docs/optimization_guide/dldt_optimization_guide.md b/docs/optimization_guide/dldt_optimization_guide.md index 33b39bc1da8..85b899faeea 100644 --- a/docs/optimization_guide/dldt_optimization_guide.md +++ b/docs/optimization_guide/dldt_optimization_guide.md @@ -9,32 +9,20 @@ Performance means how fast the model is in deployment. Two key metrics are used ![](../img/LATENCY_VS_THROUGHPUT.svg) -Latency measures inference time (ms) required to process a single input. When it comes to batch input need to measure throughput (images per second or frames per second, FPS). To calculate throughput, divide number of frames that were processed by the processing time. +Latency measures inference time (ms) required to process a single input. When it comes to batch input need to measure throughput (images per second or frames per second, FPS). To calculate throughput, divide the number of frames that were processed by the processing time. -> **NOTE**: To get performance numbers for OpenVINO, as well as tips how to measure it and compare with native framework, check [Getting performance numbers](../MO_DG/prepare_model/Getting_performance_numbers.md) page. +## How to measure performance +To get performance numbers for OpenVINO, as well as tips how to measure it and compare with native framework, go to [Getting performance numbers](../MO_DG/prepare_model/Getting_performance_numbers.md) page. ## How to Improve Performance -> **NOTE**: Make sure that your model can be successfully inferred with OpenVINO Runtime. +> **NOTE**: Make sure that your model can be successfully inferred with OpenVINO Inference Engine before reffering to the optimization topic. -Inside OpenVINO there are two ways how to get better performance number: during developing and deployment your model. **It is possible to combine both developing and deployment optimizations**. +Inside OpenVINO there are two ways how to get better performance numbers: optimize the model, which is called **model optimization** or tune parameters of execution, which is also **deployment optimization**. Note, that it is possible to combine both types of optimizations. -- **Developing step** includes model modification. Inside developing optimization there are three ways to optimize your model: +- **Model optimization** includes model modification, such as quantization, pruning, optimization of preprocessing, etc. Fore more details, refer to this [document](./model_optimization_guide.md). - - **Post-training Optimization tool** (POT) is designed to optimize the inference of deep learning models by applying special methods without model retraining or fine-tuning, like post-training quantization. +- **Deployment optimization** includes tuning inference parameters and optimizing model execution. To read more visit [Deployment Optimization Guide](../optimization_guide/dldt_deployment_optimization_guide.md). - - **Neural Network Compression Framework (NNCF)** provides a suite of advanced algorithms for Neural Networks inference optimization with minimal accuracy drop, available quantization, pruning and sparsity optimization algorithms. - - - **Model Optimizer** implement some optimization to a model, most of them added by default, but you can configure mean/scale values, batch size RGB vs BGR input channels and other parameters to speed-up preprocess of a model ([Additional Optimization Use Cases](../MO_DG/prepare_model/Additional_Optimizations.md)) - -- **Deployment step** includes tuning inference parameters and optimizing model execution, to read more visit [Deployment Optimization Guide](../optimization_guide/dldt_deployment_optimization_guide.md). - -More detailed workflow: - -![](../img/DEVELOPMENT_FLOW_V3_crunch.svg) - -To understand when to use each development optimization tool, follow this diagram: - -POT is the easiest way to get optimized models and it is also really fast and usually takes several minutes depending on the model size and used HW. NNCF can be considered as an alternative or an addition when the first does not give accurate results. - -![](../img/WHAT_TO_USE.svg) +## Performance benchmarks +To estimate the performance and compare performance numbers, measured on various supported devices, a wide range of public models are available at [Perforance benchmarks](../benchmarks/performance_benchmarks.md) section. \ No newline at end of file diff --git a/docs/optimization_guide/model_optimization_guide.md b/docs/optimization_guide/model_optimization_guide.md new file mode 100644 index 00000000000..3edcc62917a --- /dev/null +++ b/docs/optimization_guide/model_optimization_guide.md @@ -0,0 +1,34 @@ + # Model Optimization Guide {#openvino_docs_model_optimization_guide} + +@sphinxdirective + +.. toctree:: + :maxdepth: 1 + :hidden: + + pot_README + docs_nncf_introduction + +@endsphinxdirective + + Model optimization assumes applying transformations to the model and relevant data flow to improve the inference performance. These transformations are basically offline and can require the availability of training and validation data. It includes such methods as quantization, pruning, preprocessing optimization, etc. OpenVINO provides several tools to optimize models at different steps of model development: + + - **Post-training Optimization tool [(POT)](../../tools/pot/README.md)** is designed to optimize the inference of deep learning models by applying post-training methods that do not require model retraining or fine-tuning, like post-training quantization. + +- **Neural Network Compression Framework [(NNCF)](./nncf_introduction.md)** provides a suite of advanced algorithms for Neural Networks inference optimization with minimal accuracy drop, for example, quantization, pruning algorithms. + +- **Model Optimizer** implements optimization to a model, most of them added by default, but you can configure mean/scale values, batch size RGB vs BGR input channels, and other parameters to speed-up preprocess of a model ([Additional Optimization Use Cases](../MO_DG/prepare_model/Additional_Optimizations.md)) + + +## Detailed workflow: + +![](../img/DEVELOPMENT_FLOW_V3_crunch.svg) + +To understand which development optimization tool you need, refer to the diagram: + +POT is the easiest way to get optimized models, and usually takes several minutes depending on the model size and used HW. NNCF can be considered as an alternative or addition when the first one does not give accurate results. + +![](../img/WHAT_TO_USE.svg) + +## See also +- [Deployment optimization](./dldt_deployment_optimization_guide.md) \ No newline at end of file diff --git a/docs/optimization_guide/nncf_introduction.md b/docs/optimization_guide/nncf_introduction.md new file mode 100644 index 00000000000..6ce2234771b --- /dev/null +++ b/docs/optimization_guide/nncf_introduction.md @@ -0,0 +1,63 @@ +# Neural Network Compression Framework {#docs_nncf_introduction} +This document describes the Neural Network Compression Framework (NNCF) which is being developed as a separate project outside of OpenVINO™ but it is highly aligned with OpenVINO™ in terms of the supported optimization features and models. It is open-sourced and available on [GitHub](https://github.com/openvinotoolkit/nncf). + +## Introduction + Neural Network Compression Framework (NNCF) is aimed at optimizing Deep Neural Network (DNN) by applying optimization methods, such as quantization, pruning, etc., to the original framework model. It mostly provides in-training optimization capabilities which means that optimization methods require model fine-tuning during and after optimization. The diagram below shows the model optimization workflow using NNCF. + ![](../img/nncf_workflow.png) + + ### Features + - Support optimization of PyTorch and TensorFlow 2.x models. + - Support of various optimization algorithms, applied during a model fine-tuning process to achieve a better performance-accuracy trade-off: + + |Compression algorithm|PyTorch|TensorFlow 2.x| + | :--- | :---: | :---: | + |[8- bit quantization](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md) | Supported | Supported | + |[Filter pruning](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Pruning.md) | Supported | Supported | + |[Sparsity](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Sparsity.md) | Supported | Supported | + |[Mixed-precision quantization](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md#mixed_precision_quantization) | Supported | Not supported | + |[Binarization](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Binarization.md) | Supported | Not supported | + + + +- Stacking of optimization methods. For example: 8-bit quaNtization + Filter Pruning. +- Support for [Accuracy-Aware model training](https://github.com/openvinotoolkit/nncf/blob/develop/docs/Usage.md#accuracy-aware-model-training) pipelines via the [Adaptive Compression Level Training](https://github.com/openvinotoolkit/nncf/tree/develop/docs/accuracy_aware_model_training/AdaptiveCompressionLevelTraining.md) and [Early Exit Training](https://github.com/openvinotoolkit/nncf/tree/develop/docs/accuracy_aware_model_training/EarlyExitTrainig.md). +- Automatic, configurable model graph transformation to obtain the compressed model. + > **NOTE**: Limited support for TensorFlow models. Only the models created, using Sequential or Keras Functional API, are supported. +- GPU-accelerated layers for the faster compressed model fine-tuning. +- Distributed training support. +- Configuration file examples for each supported compression algorithm. +- Exporting PyTorch compressed models to ONNX\* checkpoints and TensorFlow compressed models to SavedModel or Frozen Graph format, ready to use with [OpenVINO™ toolkit](https://github.com/openvinotoolkit/). +- Git patches for prominent third-party repositories ([huggingface-transformers](https://github.com/huggingface/transformers)) demonstrating the process of integrating NNCF into custom training pipelines + +## Get started +### Installation +NNCF provides the packages available for installation through the PyPI repository. To install the latest version via pip manager run the following command: +``` +pip install nncf +``` + +### Usage examples +NNCF provides various examples and tutorials that demonstrate usage of optimization methods. + +### Tutorials +- [Quantization-aware training of PyTorch model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/302-pytorch-quantization-aware-training) +- [Quantization-aware training of TensorFlow model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/305-tensorflow-quantization-aware-training) +- (Experimental) [Post-training quantization of PyTorch model](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/112-pytorch-post-training-quantization-nncf) + +### Samples +- PyTorch: + - [Image Classification sample](https://github.com/openvinotoolkit/nncf/blob/develop/examples/torch/classification/README.md) + - [Object Detection sample](https://github.com/openvinotoolkit/nncf/blob/develop/examples/torch/object_detection/README.md) + - [Semantic segmentation sample](https://github.com/openvinotoolkit/nncf/blob/develop/examples/torch/semantic_segmentation/README.md) + +- TensorFlow samples: + - [Image Classification sample](https://github.com/openvinotoolkit/nncf/blob/develop/examples/tensorflow/classification/README.md) + - [Object Detection sample](https://github.com/openvinotoolkit/nncf/blob/develop/examples/tensorflow/object_detection/README.md) + - [Instance Segmentation sample](https://github.com/openvinotoolkit/nncf/blob/develop/examples/tensorflow/segmentation/README.md) + + +## See also +- [Compressed Model Zoo](https://github.com/openvinotoolkit/nncf#nncf-compressed-model-zoo) +- [NNCF in HuggingFace Optimum](https://github.com/dkurt/optimum-openvino) +- [OpenVINO™ Post-training Optimization tool](../../tools/pot/README.md) + From 78c9f5b0a21ddfcf63ca6b5561e14b439f3cde41 Mon Sep 17 00:00:00 2001 From: "Wang, Yang" Date: Fri, 4 Mar 2022 10:04:48 +0800 Subject: [PATCH 175/310] Add coommon test of the key PERFORMANCE_HINT for AUTO plugin API 2.0. (#10505) * Add coommont test of the key PERFORMANCE_HINT for AUTO plugin API 2.0. Signed-off-by: Wang, Yang * Add common test case for config check. Signed-off-by: Wang, Yang * Update. Signed-off-by: Wang, Yang * Update. Signed-off-by: Wang, Yang * Use the implemented property test case. Signed-off-by: Wang, Yang --- .../behavior/ov_plugin/core_integration.cpp | 3 +- .../behavior/ov_plugin/properties_tests.cpp | 36 +++++++++++++++++++ .../behavior/ov_plugin/core_integration.cpp | 2 +- .../behavior/ov_plugin/properties_tests.cpp | 36 +++++++++++++++++++ .../behavior/ov_plugin/core_integration.hpp | 2 +- 5 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp create mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp index d972ab1c3a2..45d1fa4b1e5 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp @@ -81,7 +81,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values("MULTI", "AUTO")); const std::vector multiConfigs = { - {ov::device::priorities(CommonTestUtils::DEVICE_CPU)}}; + {ov::device::priorities(CommonTestUtils::DEVICE_CPU)} +}; INSTANTIATE_TEST_SUITE_P( smoke_OVClassSetDevicePriorityConfigTest, OVClassSetDevicePriorityConfigTest, diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp new file mode 100644 index 00000000000..ec08b0f2872 --- /dev/null +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/properties_tests.hpp" +#include + +using namespace ov::test::behavior; +using namespace InferenceEngine::PluginConfigParams; + +namespace { + +const std::vector cpu_properties = { + {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}, + {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, + {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_CPU), + ::testing::ValuesIn(cpu_properties)), + OVPropertiesDefaultTests::getTestCaseName); + +const std::vector multi_Auto_properties = { + {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)}, + {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, + {ov::device::priorities(CommonTestUtils::DEVICE_CPU), ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_AutoMultiBehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_AUTO, CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(multi_Auto_properties)), + OVPropertiesTests::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp index 1b84fc7dd89..690651c3e51 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/core_integration.cpp @@ -88,7 +88,7 @@ INSTANTIATE_TEST_SUITE_P( const std::vector multiConfigs = { {ov::device::priorities(CommonTestUtils::DEVICE_CPU)}, {ov::device::priorities(CommonTestUtils::DEVICE_GPU)}, - {ov::device::priorities(CommonTestUtils::DEVICE_CPU, CommonTestUtils::DEVICE_GPU)}}; +}; INSTANTIATE_TEST_SUITE_P( smoke_OVClassSetDevicePriorityConfigTest, OVClassSetDevicePriorityConfigTest, diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp new file mode 100644 index 00000000000..15f283f1a92 --- /dev/null +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -0,0 +1,36 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "behavior/ov_plugin/properties_tests.hpp" +#include + +using namespace ov::test::behavior; +using namespace InferenceEngine::PluginConfigParams; + +namespace { + +const std::vector gpu_properties = { + {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)}, + {ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, + {ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)}, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_GPU), + ::testing::ValuesIn(gpu_properties)), + OVPropertiesDefaultTests::getTestCaseName); + +const std::vector auto_multi_properties = { + {ov::device::priorities(CommonTestUtils::DEVICE_GPU), ov::hint::performance_mode(ov::hint::PerformanceMode::UNDEFINED)}, + {ov::device::priorities(CommonTestUtils::DEVICE_GPU), ov::hint::performance_mode(ov::hint::PerformanceMode::THROUGHPUT)}, + {ov::device::priorities(CommonTestUtils::DEVICE_GPU), ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_AutoMultiBehaviorTests, OVPropertiesTests, + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_AUTO, CommonTestUtils::DEVICE_MULTI), + ::testing::ValuesIn(auto_multi_properties)), + OVPropertiesTests::getTestCaseName); +} // namespace diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp index cfcafa86646..67124a62797 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_integration.hpp @@ -373,7 +373,7 @@ TEST_P(OVClassSetModelPriorityConfigTest, SetConfigNoThrow) { EXPECT_EQ(value, ov::hint::Priority::HIGH); } -TEST_P(OVClassSetDevicePriorityConfigTest, SetConfigNoThrow) { +TEST_P(OVClassSetDevicePriorityConfigTest, SetConfigAndCheckGetConfigNoThrow) { ov::Core ie = createCoreWithTemplate(); std::string devicePriority; OV_ASSERT_NO_THROW(ie.set_property(deviceName, configuration)); From fd18632d89b3d38382241f9da9e12d4025725144 Mon Sep 17 00:00:00 2001 From: Mateusz Bencer Date: Fri, 4 Mar 2022 05:24:52 +0100 Subject: [PATCH 176/310] Update --extenions MO doc (#10763) --- tools/mo/openvino/tools/mo/utils/cli_parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/mo/openvino/tools/mo/utils/cli_parser.py b/tools/mo/openvino/tools/mo/utils/cli_parser.py index a363e1298f3..df41ef4f3a9 100644 --- a/tools/mo/openvino/tools/mo/utils/cli_parser.py +++ b/tools/mo/openvino/tools/mo/utils/cli_parser.py @@ -402,9 +402,11 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None): action=DeprecatedStoreTrue, default=False) # we use CanonicalizeDirCheckExistenceAction instead of readable_dirs to handle empty strings common_group.add_argument("--extensions", - help="Directory or a comma separated list of directories with extensions. To disable all " - "extensions including those that are placed at the default location, pass an empty " - "string.", + help="Paths or a comma-separated list of paths to libraries (.so or .dll) " + "with extensions. For the legacy MO path (if `--use_legacy_frontend` is used), " + "a directory or a comma-separated list of directories with extensions are supported. " + "To disable all extensions including those that are placed at the default location, " + "pass an empty string.", default=import_extensions.default_path(), action=CanonicalizePathCheckExistenceAction, type=readable_dirs_or_files_or_empty) From 8a2252b77424f30674ecb6f595eab5787aaa1589 Mon Sep 17 00:00:00 2001 From: yanlan song Date: Fri, 4 Mar 2022 13:13:12 +0800 Subject: [PATCH 177/310] fix multi infer result corrupt issue (#10704) * do not share blob Signed-off-by: fishbell * build error Signed-off-by: fishbell * remove comment codes Signed-off-by: fishbell --- src/plugins/auto/executable_network.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/src/plugins/auto/executable_network.cpp b/src/plugins/auto/executable_network.cpp index e351d052ce6..61139a3ae5c 100644 --- a/src/plugins/auto/executable_network.cpp +++ b/src/plugins/auto/executable_network.cpp @@ -593,7 +593,6 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create const std::vector>& inputs, const std::vector>& outputs) { auto num = _numRequestsCreated++; - size_t sum = 0; InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with; InferenceEngine::RemoteContext::Ptr ctx = nullptr; @@ -615,23 +614,12 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create return std::make_shared(inputs, outputs, request_to_share_blobs_with, ctx); } - // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests - // this allows to potentially save on the data-copy later (if the requests are scheduled in the same order) - for (const auto& device : _devicePrioritiesInitial) { - auto& dev_requests = _workerRequests[device.deviceName]; - if ((num - sum) < dev_requests.size()) { - request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest; - break; - } - sum += dev_requests.size(); - } return std::make_shared(inputs, outputs, request_to_share_blobs_with); } InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) { auto num = _numRequestsCreated++; - size_t sum = 0; InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with; InferenceEngine::RemoteContext::Ptr ctx = nullptr; @@ -652,16 +640,6 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create return std::make_shared(networkInputs, networkOutputs, request_to_share_blobs_with, ctx); } - // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests - // this allows to potentially save on the data-copy later (if the requests are scheduled in the same order) - for (const auto& device : _devicePrioritiesInitial) { - auto& dev_requests = _workerRequests[device.deviceName]; - if ((num - sum) < dev_requests.size()) { - request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest; - break; - } - sum += dev_requests.size(); - } return std::make_shared(networkInputs, networkOutputs, request_to_share_blobs_with); } From 5cee51e9c43a3872fde17b27beecf14340901328 Mon Sep 17 00:00:00 2001 From: hyunback kim Date: Fri, 4 Mar 2022 14:30:07 +0900 Subject: [PATCH 178/310] [GPU] update to check quantize fusing condition in oneDNN (#10680) * [GPU] update the condition for minimize_local_reorders * Update to check needs reorder condition in quantize. Signed-off-by: hyunback --- .../intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp | 2 +- src/plugins/intel_gpu/src/graph/layout_optimizer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 13421c69130..5fe21f21682 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -359,7 +359,7 @@ void minimize_local_reorders(program& p, std::map& auto reorders_cnt = count_reorders(fmt_map, lo, node); if (reorders_cnt.number < best_reorder_cnt.number || - (reorders_cnt.number == best_reorder_cnt.number && reorders_cnt.total_sizes < best_reorder_cnt.total_sizes) ) { + (reorders_cnt.number == best_reorder_cnt.number && reorders_cnt.total_sizes < best_reorder_cnt.total_sizes)) { best_reorder_cnt = reorders_cnt; best_format = new_fmt; } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 12278270388..8646c2a4921 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -371,7 +371,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, } } - if (next.is_type()) + if (next.is_type() && prev.get_users().size() == 1) return true; if (next.is_type()) { From 043a773f61ad31a124fdcfa6e2bc876afb06481e Mon Sep 17 00:00:00 2001 From: Fedor Zharinov Date: Fri, 4 Mar 2022 09:49:03 +0300 Subject: [PATCH 179/310] [Benchmark_app]Check all I/O names (#10745) * Check all I/O names * stylefix --- samples/cpp/benchmark_app/utils.cpp | 4 +- .../utils/include/samples/args_helper.hpp | 66 ++++++++++++++++++- samples/cpp/common/utils/src/args_helper.cpp | 28 -------- 3 files changed, 66 insertions(+), 32 deletions(-) diff --git a/samples/cpp/benchmark_app/utils.cpp b/samples/cpp/benchmark_app/utils.cpp index f943d637afa..c3493642fc5 100644 --- a/samples/cpp/benchmark_app/utils.cpp +++ b/samples/cpp/benchmark_app/utils.cpp @@ -821,7 +821,7 @@ std::string parameter_name_to_tensor_name(const std::string& name, const std::vector>& outputs_info) { if (std::any_of(inputs_info.begin(), inputs_info.end(), [name](const ov::Output& port) { try { - return name == port.get_any_name(); + return port.get_names().count(name) > 0; } catch (const ov::Exception&) { return false; // Some ports might have no names - so this is workaround } @@ -829,7 +829,7 @@ std::string parameter_name_to_tensor_name(const std::string& name, return name; } else if (std::any_of(outputs_info.begin(), outputs_info.end(), [name](const ov::Output& port) { try { - return name == port.get_any_name(); + return port.get_names().count(name) > 0; } catch (const ov::Exception&) { return false; // Some ports might have no names - so this is workaround } diff --git a/samples/cpp/common/utils/include/samples/args_helper.hpp b/samples/cpp/common/utils/include/samples/args_helper.hpp index a3749cbf065..04188e88a5c 100644 --- a/samples/cpp/common/utils/include/samples/args_helper.hpp +++ b/samples/cpp/common/utils/include/samples/args_helper.hpp @@ -45,5 +45,67 @@ void configurePrePostProcessing(std::shared_ptr& function, const std::string& ioml); void printInputAndOutputsInfo(const ov::Model& network); -void printInputAndOutputsInfoShort(const ov::Model& network); -ov::element::Type getPrecision2(const std::string& value); \ No newline at end of file +ov::element::Type getPrecision2(const std::string& value); + +template +void printInputAndOutputsInfoShort(const T& network) { + std::cout << "Network inputs:" << std::endl; + for (auto&& input : network.inputs()) { + std::string in_name; + std::string node_name; + + // Workaround for "tensor has no name" issue + try { + for (const auto& name : input.get_names()) { + in_name += name + " , "; + } + in_name = in_name.substr(0, in_name.size() - 3); + + } catch (const ov::Exception&) { + } + try { + node_name = input.get_node()->get_friendly_name(); + } catch (const ov::Exception&) { + } + + if (in_name == "") { + in_name = "***NO_NAME***"; + } + if (node_name == "") { + node_name = "***NO_NAME***"; + } + + std::cout << " " << in_name << " (node: " << node_name << ") : " << input.get_element_type() << " / " + << ov::layout::get_layout(input).to_string() << std::endl; + } + + std::cout << "Network outputs:" << std::endl; + for (auto&& output : network.outputs()) { + std::string out_name; + std::string node_name; + + // Workaround for "tensor has no name" issue + try { + for (const auto& name : output.get_names()) { + out_name += name + " , "; + } + out_name = out_name.substr(0, out_name.size() - 3); + + } catch (const ov::Exception&) { + } + try { + node_name = output.get_node()->get_input_node_ptr(0)->get_friendly_name(); + } catch (const ov::Exception&) { + } + + if (out_name == "") { + out_name = "***NO_NAME***"; + } + if (node_name == "") { + node_name = "***NO_NAME***"; + } + + std::cout << " " << out_name << " (node: " << node_name << ") : " << output.get_element_type() << " / " + << ov::layout::get_layout(output).to_string() << std::endl; + } +} diff --git a/samples/cpp/common/utils/src/args_helper.cpp b/samples/cpp/common/utils/src/args_helper.cpp index 16e6e72060b..4ff1e14128b 100644 --- a/samples/cpp/common/utils/src/args_helper.cpp +++ b/samples/cpp/common/utils/src/args_helper.cpp @@ -248,34 +248,6 @@ bool isMatchLayoutToDims(InferenceEngine::Layout layout, size_t dimension) { } // namespace -void printInputAndOutputsInfoShort(const ov::Model& network) { - std::cout << "Network inputs:" << std::endl; - for (auto&& input : network.inputs()) { - std::cout << " " << input.get_any_name() << " (node: " << input.get_node()->get_friendly_name() - << ") : " << input.get_element_type() << " / " << ov::layout::get_layout(input).to_string() - << std::endl; - } - - std::cout << "Network outputs:" << std::endl; - for (auto&& output : network.outputs()) { - std::string out_name = "***NO_NAME***"; - std::string node_name = "***NO_NAME***"; - - // Workaround for "tensor has no name" issue - try { - out_name = output.get_any_name(); - } catch (const ov::Exception&) { - } - try { - node_name = output.get_node()->get_input_node_ptr(0)->get_friendly_name(); - } catch (const ov::Exception&) { - } - - std::cout << " " << out_name << " (node: " << node_name << ") : " << output.get_element_type() << " / " - << ov::layout::get_layout(output).to_string() << std::endl; - } -} - void printInputAndOutputsInfo(const ov::Model& network) { slog::info << "model name: " << network.get_friendly_name() << slog::endl; From 082ebbcbf81e5d03b7ce4193d422ed7a2ad18c13 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Fri, 4 Mar 2022 12:52:58 +0300 Subject: [PATCH 180/310] [IE TESTS] Remove NgraphConversionTests (#10770) --- .../conv_bias_fusion.cpp | 14 ---- .../plugin_specific_ngraph_conversion.cpp | 14 ---- .../conv_bias_fusion.cpp | 14 ---- .../plugin_specific_ngraph_conversion.cpp | 14 ---- .../conv_bias_fusion.hpp | 29 ------- .../plugin_specific_ngraph_conversion.hpp | 31 -------- .../conv_bias_fusion.cpp | 60 -------------- .../plugin_specific_ngraph_conversion.cpp | 78 ------------------- 8 files changed, 254 deletions(-) delete mode 100644 src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp delete mode 100644 src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp delete mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp delete mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp delete mode 100644 src/tests/functional/plugin/shared/include/ngraph_conversion_tests/conv_bias_fusion.hpp delete mode 100644 src/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp delete mode 100644 src/tests/functional/plugin/shared/src/ngraph_conversion_tests/conv_bias_fusion.cpp delete mode 100644 src/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp deleted file mode 100644 index da3152a7ccd..00000000000 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "ngraph_conversion_tests/conv_bias_fusion.hpp" - -using namespace NGraphConversionTestsDefinitions; - -namespace { - -INSTANTIATE_TEST_SUITE_P(smoke_Basic, ConvBiasFusion, ::testing::Values("CPU"), ConvBiasFusion::getTestCaseName); - -} // namespace diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp deleted file mode 100644 index 396e5fec43c..00000000000 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp" - -using namespace NGraphConversionTestsDefinitions; - -namespace { - -INSTANTIATE_TEST_SUITE_P(smoke_Basic, PluginSpecificConversion, ::testing::Values("CPU"), PluginSpecificConversion::getTestCaseName); - -} // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp deleted file mode 100644 index 93b59dacd76..00000000000 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/conv_bias_fusion.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "ngraph_conversion_tests/conv_bias_fusion.hpp" - -using namespace NGraphConversionTestsDefinitions; - -namespace { - -INSTANTIATE_TEST_SUITE_P(smoke_Basic, ConvBiasFusion, ::testing::Values(CommonTestUtils::DEVICE_GPU), ConvBiasFusion::getTestCaseName); - -} // namespace diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp deleted file mode 100644 index 1d0ec1ece54..00000000000 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include "ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp" - -using namespace NGraphConversionTestsDefinitions; - -namespace { - -INSTANTIATE_TEST_SUITE_P(smoke_Basic, PluginSpecificConversion, ::testing::Values("GPU"), PluginSpecificConversion::getTestCaseName); - -} // namespace diff --git a/src/tests/functional/plugin/shared/include/ngraph_conversion_tests/conv_bias_fusion.hpp b/src/tests/functional/plugin/shared/include/ngraph_conversion_tests/conv_bias_fusion.hpp deleted file mode 100644 index 4c3708b9077..00000000000 --- a/src/tests/functional/plugin/shared/include/ngraph_conversion_tests/conv_bias_fusion.hpp +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include - -#include "ie_core.hpp" -#include "ngraph/opsets/opset1.hpp" - -#include "functional_test_utils/blob_utils.hpp" -#include "common_test_utils/common_utils.hpp" -#include "shared_test_classes/base/layer_test_utils.hpp" - -namespace NGraphConversionTestsDefinitions { - -class ConvBiasFusion : public CommonTestUtils::TestsCommon, public testing::WithParamInterface { -public: - static std::string getTestCaseName(const testing::TestParamInfo & obj); - -protected: - std::string getOutputName() const; -}; -} // namespace NGraphConversionTestsDefinitions diff --git a/src/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp b/src/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp deleted file mode 100644 index c72864b7b16..00000000000 --- a/src/tests/functional/plugin/shared/include/ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include -#include -#include -#include - -#include "ie_core.hpp" -#include "ngraph/opsets/opset1.hpp" - -#include "functional_test_utils/blob_utils.hpp" -#include "common_test_utils/common_utils.hpp" -#include "shared_test_classes/base/layer_test_utils.hpp" - -namespace NGraphConversionTestsDefinitions { - -class PluginSpecificConversion : public CommonTestUtils::TestsCommon, public testing::WithParamInterface { -public: - std::string device; - - static std::string getTestCaseName(const testing::TestParamInfo & obj); - -protected: - void SetUp() override; -}; -} // namespace NGraphConversionTestsDefinitions diff --git a/src/tests/functional/plugin/shared/src/ngraph_conversion_tests/conv_bias_fusion.cpp b/src/tests/functional/plugin/shared/src/ngraph_conversion_tests/conv_bias_fusion.cpp deleted file mode 100644 index 1a76df9475b..00000000000 --- a/src/tests/functional/plugin/shared/src/ngraph_conversion_tests/conv_bias_fusion.cpp +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "common_test_utils/common_utils.hpp" -#include "ngraph_conversion_tests/conv_bias_fusion.hpp" -#include - -namespace NGraphConversionTestsDefinitions { - - -std::string ConvBiasFusion::getTestCaseName(const testing::TestParamInfo & obj) { - return "Device=" + obj.param; -} - -std::string ConvBiasFusion::getOutputName() const { - if (this->GetParam() == CommonTestUtils::DEVICE_GPU) - return "add_cldnn_output_postprocess"; - else - return "add"; -} - -TEST_P(ConvBiasFusion, ConvBiasFusion) { - std::string device = this->GetParam(); - std::shared_ptr f(nullptr); - { - auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 3, 64, 64}); - auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{6, 3, 1, 1}, {1}); - auto biases = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{6, 1, 1}, {1}); - auto conv = std::make_shared(input, weights, ngraph::Strides{1, 1}, - ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, ngraph::Strides{1, 1}); - auto add = std::make_shared(conv, biases); - - input->set_friendly_name("parameter"); - conv->set_friendly_name("conv"); - add->set_friendly_name("add"); - - f = std::make_shared(ngraph::NodeVector{add}, ngraph::ParameterVector{input}); - } - - auto network = InferenceEngine::CNNNetwork(f); - - InferenceEngine::Core ie; - InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device); - auto net = exeNetwork.GetExecGraphInfo(); - auto function = net.getFunction(); - ASSERT_NE(nullptr, function); - - for (const auto & op : function->get_ops()) { - if (op->get_friendly_name() == getOutputName()) { - auto rtInfo = op->get_rt_info(); - auto it = rtInfo.find("originalLayersNames"); - ASSERT_NE(rtInfo.end(), it); - ASSERT_EQ(it->second.as(), "add,conv"); - break; - } - } -} - -} // namespace NGraphConversionTestsDefinitions \ No newline at end of file diff --git a/src/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp b/src/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp deleted file mode 100644 index ef257834ac1..00000000000 --- a/src/tests/functional/plugin/shared/src/ngraph_conversion_tests/plugin_specific_ngraph_conversion.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "ngraph_conversion_tests/plugin_specific_ngraph_conversion.hpp" - -namespace NGraphConversionTestsDefinitions { - -void PluginSpecificConversion::SetUp() { - device = this->GetParam(); -} - -std::string PluginSpecificConversion::getTestCaseName(const testing::TestParamInfo & obj) { - return "Device=" + obj.param; -} - -TEST_P(PluginSpecificConversion, addOutputAfterLoadNetwork) { - std::shared_ptr f(nullptr); - - { - auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 3, 300, 300}); - auto const1 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1, 1}, {1}); - auto add1 = std::make_shared(input, const1); - add1->set_friendly_name("add1"); - auto const2 = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 1, 1}, {1}); - auto add2 = std::make_shared(add1, const2); - f = std::make_shared(ngraph::NodeVector{add2}, ngraph::ParameterVector{input}); - } - - auto network = InferenceEngine::CNNNetwork(f); - - try { - InferenceEngine::Core ie; - InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device); - network.addOutput("add1"); - InferenceEngine::ExecutableNetwork exeNetwork2 = ie.LoadNetwork(network, device); - } catch (InferenceEngine::Exception& ex) { - FAIL() << ex.what(); - } -} - -TEST_P(PluginSpecificConversion, GeluConversionTest) { - std::shared_ptr f(nullptr); - - { - auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 3, 64, 64}); - auto gelu = std::make_shared(input); - f = std::make_shared(ngraph::NodeVector{gelu}, ngraph::ParameterVector{input}); - } - - auto network = InferenceEngine::CNNNetwork(f); - - InferenceEngine::Core ie; - InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device); - auto net = exeNetwork.GetExecGraphInfo(); - - ASSERT_EQ(net.layerCount(), 3); -} - -TEST_P(PluginSpecificConversion, MatMulConversionTest) { - std::shared_ptr f(nullptr); - - { - auto input = std::make_shared(ngraph::element::f32, ngraph::Shape{1, 3, 64}); - auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{64, 3}, {1}); - auto matmul = std::make_shared(input, weights); - f = std::make_shared(ngraph::NodeVector{matmul}, ngraph::ParameterVector{input}); - } - - auto network = InferenceEngine::CNNNetwork(f); - - InferenceEngine::Core ie; - InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(network, device); - auto net = exeNetwork.GetExecGraphInfo(); - - // TODO: this test is in progress and will be finished when 3D FC will be supported -} -} // namespace NGraphConversionTestsDefinitions From ed702910bdb22a5c2fa4fda962a2675adbbccbbe Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Fri, 4 Mar 2022 13:38:42 +0300 Subject: [PATCH 181/310] Enable clang for transformations (#10778) * Enable clang for transformations * Fixed code style * Fixed build * Fixed macOS --- src/common/transformations/CMakeLists.txt | 2 +- .../include/ngraph_ops/nms_ie_internal.hpp | 3 +- .../ngraph_ops/nms_static_shape_ie.hpp | 37 +- .../include/ngraph_ops/type_relaxed.hpp | 90 +-- .../add_fake_quantize_fusion.hpp | 8 +- .../align_eltwise_input_ranks.hpp | 4 +- .../batch_to_space_fusion.hpp | 8 +- .../common_optimizations/binarize_weights.hpp | 8 +- .../broadcast_elementwise_fusion.hpp | 6 +- .../common_optimizations/clamp_fusion.hpp | 8 +- .../common_optimizations.hpp | 9 +- .../compress_float_constants.hpp | 2 +- .../concat_reduce_fusion.hpp | 6 +- .../common_optimizations/conv_mul_fusion.hpp | 14 +- .../conv_to_binary_conv.hpp | 6 +- .../convert_nms_gather_path_to_unsigned.hpp | 4 +- .../convert_quantize_dequantize.hpp | 8 +- .../depth_to_space_fusion.hpp | 10 +- .../dilated_convolution_converter.hpp | 8 +- .../dimension_tracking.hpp | 40 +- ...isable_random_uniform_constant_folding.hpp | 4 +- .../disable_shapeof_constant_folding.hpp | 7 +- .../division_by_zero_fp16_resolver.hpp | 14 +- .../eliminate_unsqueeze_gather.hpp | 4 +- .../common_optimizations/fq_mul_fusion.hpp | 14 +- .../fq_reshape_fusion.hpp | 10 +- .../common_optimizations/hsigmoid_fusion.hpp | 17 +- .../common_optimizations/hswish_fusion.hpp | 17 +- .../interpolate_sequence_fusion.hpp | 7 +- .../leaky_relu_fusion.hpp | 8 +- .../lin_op_sequence_fusion.hpp | 13 +- .../matmul_const_transposes_extraction.hpp | 7 +- .../matmul_multiply_fusion.hpp | 5 +- .../common_optimizations/mish_fusion.hpp | 9 +- .../moc_transformations.hpp | 9 +- .../common_optimizations/mul_conv_fusion.hpp | 22 +- .../mul_fake_quantize_fusion.hpp | 8 +- .../common_optimizations/mvn_fusion.hpp | 12 +- .../nearest_neighbor_upsampling_fusion.hpp | 6 +- .../common_optimizations/nop_elimination.hpp | 22 +- .../normalize_l2_fusion.hpp | 8 +- .../optimize_strided_slice.hpp | 15 +- .../common_optimizations/pad_fusion.hpp | 15 +- .../pull_transpose_through_fq.hpp | 8 +- .../relu_fake_quantize_fusion.hpp | 5 +- .../remove_concat_zero_dim_input.hpp | 7 +- .../remove_filtering_boxes_by_size.hpp | 10 +- ...move_multi_subgraph_op_dangling_params.hpp | 7 +- .../reshape_sequence_fusion.hpp | 8 +- .../common_optimizations/ric_fusion.hpp | 11 +- .../shuffle_channels_fusion.hpp | 8 +- .../simplify_shape_of_sub_graph.hpp | 17 +- ...ip_gather_before_transpose_and_reshape.hpp | 2 +- .../common_optimizations/softmax_fusion.hpp | 5 +- .../common_optimizations/softplus_fusion.hpp | 7 +- .../softplus_to_mish_fusion.hpp | 9 +- .../space_to_batch_fusion.hpp | 8 +- ...plit_concat_pair_to_interpolate_fusion.hpp | 7 +- .../split_squeeze_concat_fusion.hpp | 7 +- .../strides_optimization.hpp | 22 +- .../common_optimizations/swish_fusion.hpp | 15 +- ...anspose_reshape_elimination_for_matmul.hpp | 7 +- .../transpose_sinking.hpp | 16 +- .../transpose_to_reshape.hpp | 7 +- .../weights_dequantize_to_fake_quantize.hpp | 7 +- .../wrap_interpolate_into_transposes.hpp | 7 +- .../control_flow/unroll_if.hpp | 2 + .../control_flow/unroll_tensor_iterator.hpp | 8 +- .../transformations/convert_precision.hpp | 36 +- ...decompression_convert_constant_folding.hpp | 2 +- .../include/transformations/fix_rt_info.hpp | 5 +- .../transformations/init_node_info.hpp | 7 +- ...convert_constant_folding_on_const_path.hpp | 8 +- .../batch_norm_decomposition.hpp | 9 +- .../bidirectional_sequences_decomposition.hpp | 8 +- .../op_conversions/convert_batch_to_space.hpp | 8 +- .../op_conversions/convert_broadcast3.hpp | 8 +- .../convert_broadcast_to_tiles.hpp | 8 +- .../convert_deformable_conv_v8_to_v1.hpp | 3 +- .../op_conversions/convert_depth_to_space.hpp | 8 +- .../op_conversions/convert_divide.hpp | 10 +- .../op_conversions/convert_gather_0d.hpp | 10 +- .../convert_gather_downgrade.hpp | 3 +- .../op_conversions/convert_gather_upgrade.hpp | 3 +- .../op_conversions/convert_gelu.hpp | 8 +- .../convert_interpolate1_to_interpolate4.hpp | 10 +- .../convert_matrix_nms_to_matrix_nms_ie.hpp | 9 +- .../convert_maxpool_downgrade.hpp | 2 +- .../convert_maxpool_upgrade.hpp | 2 +- .../convert_minimum_to_power_and_max.hpp | 8 +- .../op_conversions/convert_mod.hpp | 8 +- ...rt_multiclass_nms_to_multiclass_nms_ie.hpp | 9 +- .../op_conversions/convert_mvn1_to_mvn6.hpp | 3 +- .../op_conversions/convert_negative.hpp | 8 +- .../convert_nms_to_nms_ie_internal.hpp | 9 +- .../convert_pad_to_group_conv.hpp | 8 +- .../convert_previous_nms_to_nms_5.hpp | 12 +- .../convert_reduce_to_pooling.hpp | 72 ++- .../convert_scatter_elements_to_scatter.hpp | 8 +- .../convert_sequences_to_tensor_iterator.hpp | 12 +- .../op_conversions/convert_shapeof3.hpp | 8 +- .../convert_shuffle_channels3.hpp | 8 +- .../convert_slice_to_strided_slice.hpp | 5 +- .../convert_softmax_downgrade.hpp | 3 +- .../convert_softmax_upgrade.hpp | 3 +- .../op_conversions/convert_space_to_batch.hpp | 8 +- .../op_conversions/convert_space_to_depth.hpp | 8 +- .../op_conversions/convert_subtract.hpp | 8 +- .../convert_ti_to_sequences.hpp | 15 +- .../op_conversions/convert_topk3.hpp | 8 +- .../op_conversions/fq_decomposition.hpp | 10 +- .../gather_normalize_negative_indices.hpp | 4 +- .../op_conversions/gelu7_downgrade.hpp | 4 +- .../op_conversions/gru_cell_decomposition.hpp | 8 +- .../op_conversions/hsigmoid_decomposition.hpp | 4 +- .../op_conversions/hswish_decomposition.hpp | 4 +- .../log_softmax_decomposition.hpp | 4 +- .../lstm_cell_decomposition.hpp | 8 +- .../op_conversions/mvn6_decomposition.hpp | 7 +- .../normalize_l2_decomposition.hpp | 9 +- .../reduce_l1_decomposition.hpp | 9 +- .../reduce_l2_decomposition.hpp | 9 +- .../op_conversions/rnn_cell_decomposition.hpp | 8 +- .../simplify_ctc_greedy_decoder_seq_len.hpp | 6 +- .../op_conversions/softmax_decomposition.hpp | 6 +- .../op_conversions/softplus_decomposition.hpp | 7 +- .../convert_opset2_to_opset1.hpp | 4 +- .../convert_opset3_to_opset2.hpp | 4 +- .../resolve_names_collisions.hpp | 3 +- .../transformations/rt_info/attributes.hpp | 8 +- .../transformations/rt_info/decompression.hpp | 12 +- .../rt_info/disable_constant_folding.hpp | 11 +- .../rt_info/disable_fp16_compression.hpp | 5 +- .../rt_info/fused_names_attribute.hpp | 18 +- .../rt_info/nms_selected_indices.hpp | 15 +- .../rt_info/nonconvertible_divide.hpp | 5 +- .../rt_info/preprocessing_attribute.hpp | 6 +- .../rt_info/primitives_priority_attribute.hpp | 14 +- .../rt_info/strides_property.hpp | 5 +- .../broadcast_const_range_replacement.hpp | 8 +- .../smart_reshape/matmul_sr.hpp | 9 +- .../smart_reshape/mimic_set_batch_size.hpp | 3 +- .../proposal_scales_stridedslice.hpp | 3 +- .../smart_reshape/reshape_to_1D.hpp | 3 +- .../smart_reshape/set_batch_size.hpp | 5 +- .../smart_reshape/smart_reshape.hpp | 5 +- .../smart_reshape/strided_slice_squeeze.hpp | 12 +- .../include/transformations/utils/utils.hpp | 94 +-- .../src/ngraph_ops/nms_ie_internal.cpp | 52 +- .../src/ngraph_ops/nms_static_shape_ie.cpp | 6 +- .../add_fake_quantize_fusion.cpp | 58 +- .../align_eltwise_input_ranks.cpp | 5 +- .../batch_to_space_fusion.cpp | 78 ++- .../common_optimizations/binarize_weights.cpp | 125 ++-- .../broadcast_elementwise_fusion.cpp | 43 +- .../common_optimizations/clamp_fusion.cpp | 22 +- .../common_optimizations.cpp | 185 +++--- .../compress_float_constants.cpp | 11 +- .../concat_reduce_fusion.cpp | 27 +- .../common_optimizations/conv_mul_fusion.cpp | 145 +++-- .../conv_to_binary_conv.cpp | 101 +-- .../convert_nms_gather_path_to_unsigned.cpp | 158 ++--- .../convert_quantize_dequantize.cpp | 52 +- .../depth_to_space_fusion.cpp | 56 +- .../dilated_convolution_converter.cpp | 58 +- .../dimension_tracking.cpp | 7 +- .../disable_shapeof_constant_folding.cpp | 10 +- .../common_optimizations/divide_fusion.cpp | 14 +- .../division_by_zero_fp16_resolver.cpp | 10 +- .../eliminate_unsqueeze_gather.cpp | 23 +- .../common_optimizations/fq_mul_fusion.cpp | 57 +- .../fq_reshape_fusion.cpp | 68 +- .../common_optimizations/gelu_fusion.cpp | 123 ++-- .../common_optimizations/hsigmoid_fusion.cpp | 150 +++-- .../common_optimizations/hswish_fusion.cpp | 108 ++-- .../interpolate_sequence_fusion.cpp | 85 ++- .../leaky_relu_fusion.cpp | 24 +- .../lin_op_sequence_fusion.cpp | 35 +- .../mark_precision_sensitive_divides.cpp | 7 +- .../mark_precision_sensitive_subgraphs.cpp | 8 +- .../matmul_const_transposes_extraction.cpp | 23 +- .../matmul_multiply_fusion.cpp | 35 +- .../common_optimizations/mish_fusion.cpp | 13 +- .../moc_transformations.cpp | 104 ++-- .../common_optimizations/mul_conv_fusion.cpp | 76 ++- .../mul_fake_quantize_fusion.cpp | 43 +- .../common_optimizations/mvn_fusion.cpp | 130 ++-- .../nearest_neighbor_upsampling_fusion.cpp | 229 ++++--- .../common_optimizations/nop_elimination.cpp | 92 ++- .../normalize_l2_fusion.cpp | 26 +- .../optimize_strided_slice.cpp | 78 ++- .../common_optimizations/pad_fusion.cpp | 203 +++--- .../pull_transpose_through_fq.cpp | 27 +- .../random_uniform_fusion.cpp | 10 +- .../relu_fake_quantize_fusion.cpp | 18 +- .../remove_concat_zero_dim_input.cpp | 40 +- .../remove_filtering_boxes_by_size.cpp | 50 +- ...move_multi_subgraph_op_dangling_params.cpp | 45 +- .../reshape_sequence_fusion.cpp | 64 +- .../common_optimizations/ric_fusion.cpp | 264 ++++---- .../shuffle_channels_fusion.cpp | 78 ++- .../simplify_shape_of_sub_graph.cpp | 51 +- ...ip_gather_before_transpose_and_reshape.cpp | 17 +- .../common_optimizations/softmax_fusion.cpp | 36 +- .../common_optimizations/softplus_fusion.cpp | 25 +- .../softplus_to_mish_fusion.cpp | 13 +- .../space_to_batch_fusion.cpp | 86 +-- ...plit_concat_pair_to_interpolate_fusion.cpp | 91 ++- .../split_squeeze_concat_fusion.cpp | 56 +- .../strides_optimization.cpp | 83 ++- .../common_optimizations/subtract_fusion.cpp | 13 +- .../common_optimizations/swish_fusion.cpp | 46 +- ...anspose_reshape_elimination_for_matmul.cpp | 82 ++- .../transpose_sinking.cpp | 127 ++-- .../transpose_to_reshape.cpp | 32 +- .../weights_dequantize_to_fake_quantize.cpp | 45 +- .../wrap_interpolate_into_transposes.cpp | 46 +- .../control_flow/unroll_if.cpp | 9 +- .../control_flow/unroll_tensor_iterator.cpp | 86 +-- ...decompression_convert_constant_folding.cpp | 4 +- ...convert_constant_folding_on_const_path.cpp | 17 +- .../batch_norm_decomposition.cpp | 57 +- .../bidirectional_sequences_decomposition.cpp | 238 +++---- .../op_conversions/convert_batch_to_space.cpp | 57 +- .../op_conversions/convert_broadcast3.cpp | 50 +- .../convert_broadcast_to_tiles.cpp | 21 +- .../convert_deformable_conv_v8_to_v1.cpp | 29 +- .../op_conversions/convert_depth_to_space.cpp | 55 +- .../op_conversions/convert_divide.cpp | 30 +- .../op_conversions/convert_gather_0d.cpp | 20 +- .../convert_gather_downgrade.cpp | 5 +- .../op_conversions/convert_gather_upgrade.cpp | 5 +- .../op_conversions/convert_gelu.cpp | 17 +- .../convert_interpolate1_to_interpolate4.cpp | 43 +- .../convert_matrix_nms_to_matrix_nms_ie.cpp | 20 +- .../convert_maxpool_downgrade.cpp | 17 +- .../convert_maxpool_upgrade.cpp | 4 +- .../convert_minimum_to_power_and_max.cpp | 26 +- .../op_conversions/convert_mod.cpp | 14 +- ...rt_multiclass_nms_to_multiclass_nms_ie.cpp | 20 +- .../op_conversions/convert_mvn1_to_mvn6.cpp | 16 +- .../op_conversions/convert_negative.cpp | 15 +- .../convert_nms_to_nms_ie_internal.cpp | 82 +-- .../convert_pad_to_group_conv.cpp | 38 +- .../convert_previous_nms_to_nms_5.cpp | 232 ++++--- .../convert_prior_box_v8_to_v0.cpp | 4 +- .../convert_reduce_to_pooling.cpp | 25 +- .../convert_scatter_elements_to_scatter.cpp | 62 +- .../convert_sequences_to_tensor_iterator.cpp | 588 +++++++++--------- .../op_conversions/convert_shapeof3.cpp | 10 +- .../convert_shuffle_channels3.cpp | 43 +- .../convert_slice_to_strided_slice.cpp | 51 +- .../convert_softmax_downgrade.cpp | 4 +- .../convert_softmax_upgrade.cpp | 4 +- .../op_conversions/convert_space_to_batch.cpp | 41 +- .../op_conversions/convert_space_to_depth.cpp | 27 +- .../op_conversions/convert_subtract.cpp | 26 +- .../convert_ti_to_sequences.cpp | 225 +++---- .../op_conversions/convert_topk3.cpp | 19 +- .../detection_output_upgrade.cpp | 22 +- .../op_conversions/einsum_decomposition.cpp | 192 ++++-- .../op_conversions/fq_decomposition.cpp | 39 +- .../gather_normalize_negative_indices.cpp | 18 +- .../op_conversions/gelu7_downgrade.cpp | 7 +- .../op_conversions/gru_cell_decomposition.cpp | 34 +- .../op_conversions/hsigmoid_decomposition.cpp | 15 +- .../op_conversions/hswish_decomposition.cpp | 12 +- .../log_softmax_decomposition.cpp | 17 +- .../lstm_cell_decomposition.cpp | 21 +- .../op_conversions/mvn6_decomposition.cpp | 12 +- .../normalize_l2_decomposition.cpp | 33 +- .../reduce_l1_decomposition.cpp | 21 +- .../reduce_l2_decomposition.cpp | 24 +- .../op_conversions/rnn_cell_decomposition.cpp | 12 +- .../simplify_ctc_greedy_decoder_seq_len.cpp | 78 ++- .../op_conversions/softmax_decomposition.cpp | 15 +- .../op_conversions/softplus_decomposition.cpp | 13 +- .../convert_opset2_to_opset1.cpp | 9 +- .../convert_opset3_to_opset2.cpp | 11 +- .../resolve_names_collisions.cpp | 9 +- .../rt_info/nms_selected_indices.cpp | 5 +- .../broadcast_const_range_replacement.cpp | 46 +- .../proposal_scales_stridedslice.cpp | 8 +- .../smart_reshape/strided_slice_squeeze.cpp | 9 +- .../src/transformations/utils/utils.cpp | 137 ++-- 285 files changed, 4899 insertions(+), 4192 deletions(-) diff --git a/src/common/transformations/CMakeLists.txt b/src/common/transformations/CMakeLists.txt index b676441e892..5e2c2b339d1 100644 --- a/src/common/transformations/CMakeLists.txt +++ b/src/common/transformations/CMakeLists.txt @@ -30,7 +30,7 @@ target_link_libraries(${TARGET_NAME}_obj PRIVATE ngraph_reference openvino::itt target_include_directories(${TARGET_NAME}_obj PRIVATE $ "${CMAKE_CURRENT_SOURCE_DIR}/src") -add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME}_obj) +add_clang_format_target(${TARGET_NAME}_clang FOR_TARGETS ${TARGET_NAME}_obj) ie_mark_target_as_cc(${TARGET_NAME}_obj) diff --git a/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp b/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp index 34a5b6ec7ab..edd3b0f0e03 100644 --- a/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp +++ b/src/common/transformations/include/ngraph_ops/nms_ie_internal.hpp @@ -6,7 +6,6 @@ #include #include - #include #include "ngraph/coordinate_diff.hpp" @@ -48,7 +47,7 @@ public: bool visit_attributes(AttributeVisitor& visitor) override; - std::shared_ptr clone_with_new_inputs(const OutputVector & new_args) const override; + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; int m_center_point_box; bool m_sort_result_descending = true; diff --git a/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp b/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp index cb7ee61fdb5..573cba61849 100644 --- a/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp +++ b/src/common/transformations/include/ngraph_ops/nms_static_shape_ie.hpp @@ -4,15 +4,25 @@ #pragma once -#include -#include #include +#include #include - #include +#include #include "ngraph/op/op.hpp" +namespace ov { +namespace op { +namespace v8 { + +class MulticlassNms; +class MatrixNms; + +} // namespace v8 +} // namespace op +} // namespace ov + namespace ngraph { namespace op { namespace internal { @@ -31,9 +41,8 @@ public: /// \param boxes Node producing the box coordinates /// \param scores Node producing the box scores /// \param attrs Attributes of the operation - NmsStaticShapeIE(const Output& boxes, - const Output& scores, - const Attributes& attrs) : BaseNmsOp(boxes, scores, attrs) { + NmsStaticShapeIE(const Output& boxes, const Output& scores, const Attributes& attrs) + : BaseNmsOp(boxes, scores, attrs) { this->constructor_validate_and_infer_types(); } void validate_and_infer_types() override; @@ -42,7 +51,8 @@ public: } private: - typedef struct {} init_rt_result; + typedef struct { + } init_rt_result; init_rt_result init_rt_info() { BaseNmsOp::get_rt_info()["opset"] = "ie_internal_opset"; @@ -98,7 +108,9 @@ void NmsStaticShapeIE::validate_and_infer_types() { } template -const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info() const { return get_type_info_static(); } +const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info() const { + return get_type_info_static(); +} template const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info_static() { @@ -108,14 +120,17 @@ const ::ngraph::Node::type_info_t& NmsStaticShapeIE::get_type_info_st // but currently it will not pass conversion ot Legacy Opset correctly static const std::string name = BaseNmsOpTypeInfoPtr->name; - static const ::ngraph::Node::type_info_t type_info_static{ - name.c_str(), BaseNmsOpTypeInfoPtr->version, "ie_internal_opset", BaseNmsOpTypeInfoPtr}; + static const ::ngraph::Node::type_info_t type_info_static{name.c_str(), + BaseNmsOpTypeInfoPtr->version, + "ie_internal_opset", + BaseNmsOpTypeInfoPtr}; return type_info_static; } #ifndef OPENVINO_STATIC_LIBRARY template -const ::ngraph::Node::type_info_t NmsStaticShapeIE::type_info = NmsStaticShapeIE::get_type_info_static(); +const ::ngraph::Node::type_info_t NmsStaticShapeIE::type_info = + NmsStaticShapeIE::get_type_info_static(); #endif #ifdef __clang__ diff --git a/src/common/transformations/include/ngraph_ops/type_relaxed.hpp b/src/common/transformations/include/ngraph_ops/type_relaxed.hpp index 0b0c6735643..d7de519a8fc 100644 --- a/src/common/transformations/include/ngraph_ops/type_relaxed.hpp +++ b/src/common/transformations/include/ngraph_ops/type_relaxed.hpp @@ -4,15 +4,14 @@ #pragma once -#include -#include #include -#include +#include #include - -#include - #include +#include +#include +#include + #include "ngraph/op/op.hpp" #include "ngraph/variant.hpp" @@ -24,12 +23,10 @@ class NGRAPH_API TypeRelaxedBase { public: virtual ~TypeRelaxedBase(); - explicit TypeRelaxedBase( - const element::TypeVector& _input_data_types = {}, - const element::TypeVector& _output_data_types = {}) : - m_input_data_types(_input_data_types), - m_output_data_types(_output_data_types) { - } + explicit TypeRelaxedBase(const element::TypeVector& _input_data_types = {}, + const element::TypeVector& _output_data_types = {}) + : m_input_data_types(_input_data_types), + m_output_data_types(_output_data_types) {} /// \return Data type that will be set for output with a given index outputIndex. /// If output with a specified index outputIndex hasn't been set before, element::undefined will returned. @@ -83,7 +80,7 @@ public: } protected: - void remember_input_data_types(Node &node, element::TypeVector &old_input_types) { + void remember_input_data_types(Node& node, element::TypeVector& old_input_types) { // Remember all input data types for (size_t i = 0; i < node.get_input_size(); ++i) { old_input_types.push_back(node.get_input_element_type(i)); @@ -100,7 +97,7 @@ protected: } } - void restore_input_data_types(Node &node, const element::TypeVector &old_input_types) { + void restore_input_data_types(Node& node, const element::TypeVector& old_input_types) { // Restore original input data types for (size_t i = 0; i < node.get_input_size(); ++i) { OPENVINO_SUPPRESS_DEPRECATED_START @@ -133,9 +130,10 @@ protected: visitor.on_attribute("output_data_types", m_output_data_types); } - typedef struct {} init_rt_result; + typedef struct { + } init_rt_result; - init_rt_result init_rt_info(Node &node) const { + init_rt_result init_rt_info(Node& node) const { node.get_rt_info()["opset"] = "type_relaxed_opset"; return {}; } @@ -200,29 +198,26 @@ public: TypeRelaxed() = default; - TypeRelaxed( - const BaseOp& base_op, - element::Type overridden_type) : - TypeRelaxed(base_op, - element::TypeVector(base_op.get_input_size(), overridden_type), - element::TypeVector(base_op.get_output_size(), overridden_type)) { - } + TypeRelaxed(const BaseOp& base_op, element::Type overridden_type) + : TypeRelaxed(base_op, + element::TypeVector(base_op.get_input_size(), overridden_type), + element::TypeVector(base_op.get_output_size(), overridden_type)) {} - explicit TypeRelaxed( - const BaseOp& base_op, - const element::TypeVector& _input_data_types = {}, - const element::TypeVector& _output_data_types = {}) : - BaseOp(base_op), TypeRelaxedBase(_input_data_types, _output_data_types) { + explicit TypeRelaxed(const BaseOp& base_op, + const element::TypeVector& _input_data_types = {}, + const element::TypeVector& _output_data_types = {}) + : BaseOp(base_op), + TypeRelaxedBase(_input_data_types, _output_data_types) { init(); } /// Creating a new TypeRelaxed operation by calling one of the original op ctors forwarding arguments directly. - template - TypeRelaxed( - const element::TypeVector& _input_data_types, - const element::TypeVector& _output_data_types, - Args&& ... args) : - BaseOp(std::forward(args)...), TypeRelaxedBase(_input_data_types, _output_data_types) { + template + TypeRelaxed(const element::TypeVector& _input_data_types, + const element::TypeVector& _output_data_types, + Args&&... args) + : BaseOp(std::forward(args)...), + TypeRelaxedBase(_input_data_types, _output_data_types) { init(); } @@ -261,7 +256,7 @@ bool TypeRelaxed::evaluate(const HostTensorVector& outputs, const HostTe convert->set_destination_type(expected_input_type); casted_inputs[i] = std::make_shared(expected_input_type, inputs[i]->get_shape()); - if (!convert->evaluate({ casted_inputs[i] }, { inputs[i] })) { + if (!convert->evaluate({casted_inputs[i]}, {inputs[i]})) { return false; } } @@ -273,7 +268,8 @@ bool TypeRelaxed::evaluate(const HostTensorVector& outputs, const HostTe if (expected_output_type == element::undefined || expected_output_type == m_original_output_data_types[i]) { original_outputs[i] = outputs[i]; } else { - original_outputs[i] = std::make_shared(m_original_output_data_types[i], BaseOp::get_output_partial_shape(i)); + original_outputs[i] = + std::make_shared(m_original_output_data_types[i], BaseOp::get_output_partial_shape(i)); } } @@ -284,14 +280,16 @@ bool TypeRelaxed::evaluate(const HostTensorVector& outputs, const HostTe for (size_t i = 0; i < BaseOp::get_output_size(); ++i) { const auto expected_output_type = get_overridden_output_type(i); - if (expected_output_type != element::undefined && original_outputs[i]->get_element_type() != expected_output_type) { + if (expected_output_type != element::undefined && + original_outputs[i]->get_element_type() != expected_output_type) { if (convert == nullptr) { convert = std::make_shared(); } convert->set_destination_type(expected_output_type); - const auto casted_output = std::make_shared(expected_output_type, original_outputs[i]->get_shape()); - if (!convert->evaluate({ outputs[i] }, { original_outputs[i] })) { + const auto casted_output = + std::make_shared(expected_output_type, original_outputs[i]->get_shape()); + if (!convert->evaluate({outputs[i]}, {original_outputs[i]})) { return false; } } @@ -314,12 +312,12 @@ void TypeRelaxed::validate_and_infer_types() { restore_input_data_types(*this, old_input_types); } - template std::shared_ptr TypeRelaxed::clone_with_new_inputs(const OutputVector& new_args) const { std::lock_guard lock(type_relax_mutex); // copy then modify inputs - std::shared_ptr new_node = std::make_shared>((BaseOp&)(*this), m_input_data_types, m_output_data_types); + std::shared_ptr new_node = + std::make_shared>((BaseOp&)(*this), m_input_data_types, m_output_data_types); for (size_t i = 0; i < new_node->get_input_size(); ++i) { new_node->input(i).replace_source_output(new_args[i]); } @@ -336,13 +334,17 @@ bool TypeRelaxed::visit_attributes(AttributeVisitor& visitor) { } template -const ::ngraph::Node::type_info_t& TypeRelaxed::get_type_info() const { return get_type_info_static(); } +const ::ngraph::Node::type_info_t& TypeRelaxed::get_type_info() const { + return get_type_info_static(); +} template const ::ngraph::Node::type_info_t& TypeRelaxed::get_type_info_static() { auto baseOpTypeInfoPtr = &BaseOp::get_type_info_static(); - static const ::ngraph::Node::type_info_t type_info_static{ - baseOpTypeInfoPtr->name, baseOpTypeInfoPtr->version, baseOpTypeInfoPtr->version_id, baseOpTypeInfoPtr}; + static const ::ngraph::Node::type_info_t type_info_static{baseOpTypeInfoPtr->name, + baseOpTypeInfoPtr->version, + baseOpTypeInfoPtr->version_id, + baseOpTypeInfoPtr}; return type_info_static; } diff --git a/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp index 8aac9a9c497..5453aba6676 100644 --- a/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/add_fake_quantize_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -26,7 +24,7 @@ class TRANSFORMATIONS_API AddFakeQuantizeFusion; * Restrictions: * - second input to Add is a Constant */ -class ngraph::pass::AddFakeQuantizeFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::AddFakeQuantizeFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; AddFakeQuantizeFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp index 16469f1fb95..ba93c0ca3b8 100644 --- a/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/align_eltwise_input_ranks.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include /** * @ingroup ie_transformation_common_api @@ -15,7 +15,7 @@ namespace ngraph { namespace pass { -class TRANSFORMATIONS_API AlignEltwiseInputRanks: public MatcherPass { +class TRANSFORMATIONS_API AlignEltwiseInputRanks : public MatcherPass { public: NGRAPH_RTTI_DECLARATION; AlignEltwiseInputRanks(); diff --git a/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp index c9b03a3e6a6..fae7826198b 100644 --- a/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/batch_to_space_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -30,7 +28,7 @@ class TRANSFORMATIONS_API BatchToSpaceFusion; * - DepthToSpaceMode must be BLOCKS_FIRST */ -class ngraph::pass::BatchToSpaceFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::BatchToSpaceFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; BatchToSpaceFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp b/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp index cff18174b78..6a1e857f2ef 100644 --- a/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/binarize_weights.hpp @@ -4,17 +4,18 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { class TRANSFORMATIONS_API BinarizeWeights; -} // namespace pass -} // namespace ngraph +} // namespace pass +} // namespace ngraph +// clang-format off /** * @ingroup ie_transformation_common_api * @brief This transformation converts weights to -1/+1 form @@ -71,6 +72,7 @@ class TRANSFORMATIONS_API BinarizeWeights; * Normalization factors are chosen based output_high value. * If it's zero - norm factor is equal to output_low and output_high otherwise */ +// clang-format on class ngraph::pass::BinarizeWeights : public ngraph::pass::MatcherPass { public: diff --git a/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp index 20d2784686c..a7f87a97b63 100644 --- a/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/broadcast_elementwise_fusion.hpp @@ -4,10 +4,10 @@ #pragma once -#include - #include #include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -24,7 +24,7 @@ class TRANSFORMATIONS_API BroadcastElementwiseFusion; * are equal neighboring input shape of ElementWise. */ -class ngraph::pass::BroadcastElementwiseFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::BroadcastElementwiseFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; BroadcastElementwiseFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp index 00e19979d3d..5e555279a2d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/clamp_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -28,7 +26,7 @@ class TRANSFORMATIONS_API ClampFusion; * - one of the parameters to Minimum is a scalar constant */ -class ngraph::pass::ClampFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::ClampFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ClampFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/common_optimizations.hpp b/src/common/transformations/include/transformations/common_optimizations/common_optimizations.hpp index c7d23ef2682..a7b6cd8f092 100644 --- a/src/common/transformations/include/transformations/common_optimizations/common_optimizations.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/common_optimizations.hpp @@ -4,13 +4,10 @@ #pragma once -#include #include - -#include - #include - +#include +#include namespace ngraph { namespace pass { @@ -20,7 +17,7 @@ class TRANSFORMATIONS_API CommonOptimizations; } // namespace pass } // namespace ngraph -class ngraph::pass::CommonOptimizations: public ngraph::pass::FunctionPass { +class ngraph::pass::CommonOptimizations : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& f) override; diff --git a/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp b/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp index 7fcfcd12509..0ece6dc9e35 100644 --- a/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/compress_float_constants.hpp @@ -4,8 +4,8 @@ #pragma once -#include "transformations_visibility.hpp" #include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" namespace ov { namespace pass { diff --git a/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp index e6f4f7b3716..da8c7a225de 100644 --- a/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/concat_reduce_fusion.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { @@ -32,7 +32,7 @@ public: * @ingroup ie_transformation_common_api * @brief PullSqueezeThroughEltwise transformation propagates Squeeze up through binary elementwise operations: */ -class ngraph::pass::PullSqueezeThroughEltwise: public ngraph::pass::MatcherPass { +class ngraph::pass::PullSqueezeThroughEltwise : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PullSqueezeThroughEltwise(); @@ -74,7 +74,7 @@ public: * by a single Minimum/Maximum with 2 inputs and tries to eliminate Squeeze/Unsqueeze layers before and after Min/Max. */ -class ngraph::pass::ConcatReduceFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::ConcatReduceFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; ConcatReduceFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp index 38095b08b33..bc56afa8d59 100644 --- a/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/conv_mul_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - +#include #include +#include namespace ngraph { namespace pass { @@ -22,25 +20,25 @@ class TRANSFORMATIONS_API GroupConvolutionBackpropDataMultiplyFusion; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvolutionMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvolutionMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvolutionMultiplyFusion(); }; -class ngraph::pass::GroupConvolutionMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::GroupConvolutionMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; GroupConvolutionMultiplyFusion(); }; -class ngraph::pass::ConvolutionBackpropDataMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvolutionBackpropDataMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvolutionBackpropDataMultiplyFusion(); }; -class ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; GroupConvolutionBackpropDataMultiplyFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp b/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp index fa58a06a4a5..f995f09ce1b 100644 --- a/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/conv_to_binary_conv.hpp @@ -4,16 +4,16 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { class TRANSFORMATIONS_API ConvToBinaryConv; -} // namespace pass -} // namespace ngraph +} // namespace pass +} // namespace ngraph /** * @ingroup ie_transformation_common_api diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp index 2e48c5a81e0..6bae6beee9e 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { @@ -25,7 +25,7 @@ class TRANSFORMATIONS_API ConvertNmsGatherPathToUnsigned; * Gather-8 will accept UINT32_MAX which is always outside of the bounds * and corresponding output for such indices in gather always will be filled with zeros. */ -class ngraph::pass::ConvertNmsGatherPathToUnsigned: public ngraph::pass::GraphRewrite { +class ngraph::pass::ConvertNmsGatherPathToUnsigned : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; ConvertNmsGatherPathToUnsigned(); diff --git a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp index 18f976f8b56..8f268e190f6 100644 --- a/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/convert_quantize_dequantize.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -30,7 +28,7 @@ class TRANSFORMATIONS_API ConvertQuantizeDequantize; * - 'zero_point' and 'scale' must be broadcastable to FakeQuantize's output */ -class ngraph::pass::ConvertQuantizeDequantize: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertQuantizeDequantize : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertQuantizeDequantize(); diff --git a/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp index eaaf76dd733..0bc433a65f6 100644 --- a/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/depth_to_space_fusion.hpp @@ -4,17 +4,15 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { - class TRANSFORMATIONS_API DepthToSpaceFusion; +class TRANSFORMATIONS_API DepthToSpaceFusion; } // namespace pass } // namespace ngraph @@ -41,7 +39,7 @@ namespace pass { * */ -class ngraph::pass::DepthToSpaceFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::DepthToSpaceFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; DepthToSpaceFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp b/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp index 480aee9cd4b..c9994063674 100644 --- a/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/dilated_convolution_converter.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -28,7 +26,7 @@ class TRANSFORMATIONS_API DilatedConvolutionConverter; * - pads in SpaceToBatch must have 0 on first and second position */ -class ngraph::pass::DilatedConvolutionConverter: public ngraph::pass::MatcherPass { +class ngraph::pass::DilatedConvolutionConverter : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; DilatedConvolutionConverter(); diff --git a/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp b/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp index a1137a70233..f6ad7c8656b 100644 --- a/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/dimension_tracking.hpp @@ -5,11 +5,9 @@ #pragma once #include - -#include - -#include #include +#include +#include using P2Btype = std::unordered_map, std::unordered_set>; @@ -21,11 +19,14 @@ class TRANSFORMATIONS_API FindBatch; } // namespace pass } // namespace ov -class ov::pass::FindBatch: public ov::pass::ModelPass { +class ov::pass::FindBatch : public ov::pass::ModelPass { public: OPENVINO_RTTI("FindBatch"); - FindBatch(bool detach_detection_output = false, bool track = true) : track(track), detach_do(detach_detection_output) {} + FindBatch(bool detach_detection_output = false, bool track = true) + : track(track), + detach_do(detach_detection_output) {} bool run_on_model(const std::shared_ptr& m) override; + protected: bool track = true, detach_do = false; }; @@ -34,14 +35,19 @@ namespace ov { class DimensionTracker; namespace batch_util { - void mark_batch(const std::shared_ptr ¶meter, P2Btype &map, const std::unordered_set &batches); - void mark_no_batch(const std::shared_ptr ¶meter, P2Btype &map); - void mark_layout_independent_batch(const std::shared_ptr ¶meter, const std::shared_ptr & result, P2Btype &map); - void mark_with_unique_dimension_labels(const std::shared_ptr &m, const ov::DimensionTracker &dt); - void restore_original_dimensions( - const std::map, ov::PartialShape>& parameter_to_shape, bool leave_batch_dynamic = true); - bool check_batch_tracks_through_all_the_nodes(const std::shared_ptr& m); - P2Btype find_batch(const std::shared_ptr &m); - bool detach_detection_output(const std::shared_ptr& f); -} // namespace batch_util -} // namespace ov +void mark_batch(const std::shared_ptr& parameter, + P2Btype& map, + const std::unordered_set& batches); +void mark_no_batch(const std::shared_ptr& parameter, P2Btype& map); +void mark_layout_independent_batch(const std::shared_ptr& parameter, + const std::shared_ptr& result, + P2Btype& map); +void mark_with_unique_dimension_labels(const std::shared_ptr& m, const ov::DimensionTracker& dt); +void restore_original_dimensions( + const std::map, ov::PartialShape>& parameter_to_shape, + bool leave_batch_dynamic = true); +bool check_batch_tracks_through_all_the_nodes(const std::shared_ptr& m); +P2Btype find_batch(const std::shared_ptr& m); +bool detach_detection_output(const std::shared_ptr& f); +} // namespace batch_util +} // namespace ov diff --git a/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp b/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp index 32b9792098c..2da8555d6c9 100644 --- a/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp @@ -5,10 +5,8 @@ #pragma once #include - -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp b/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp index 14b4909e259..1e729ecff1a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/disable_shapeof_constant_folding.hpp @@ -5,10 +5,8 @@ #pragma once #include - -#include - #include +#include namespace ngraph { namespace pass { @@ -18,8 +16,7 @@ class TRANSFORMATIONS_API DisableShapeOfConstantFolding; } // namespace pass } // namespace ngraph - -class ngraph::pass::DisableShapeOfConstantFolding: public ngraph::pass::MatcherPass { +class ngraph::pass::DisableShapeOfConstantFolding : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; DisableShapeOfConstantFolding(); diff --git a/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp b/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp index 45b0fba0061..ee85b1e2f15 100644 --- a/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp @@ -4,11 +4,11 @@ #pragma once -#include #include - -#include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ov { @@ -24,11 +24,11 @@ class TRANSFORMATIONS_API DivisionByZeroFP16Resolver; * @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps); input_1/Add(input_2, eps); * and input_1*Pow(Maximum[Add](input_2, eps), -z) patterns to prevent division by zero. * - * eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero. - * We should keep in such patterns eps >= fp16 minimal normalized value so that - * CompressFloatConstants should not cast them into zero during compression into f16. + * eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to + * be zero. We should keep in such patterns eps >= fp16 minimal normalized value so that CompressFloatConstants should + * not cast them into zero during compression into f16. */ -class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass { +class ov::pass::DivisionByZeroFP16Resolver : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; DivisionByZeroFP16Resolver(); diff --git a/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp b/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp index 45824181755..45467edf817 100644 --- a/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/eliminate_unsqueeze_gather.hpp @@ -4,10 +4,10 @@ #pragma once -#include - #include #include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { diff --git a/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp index b2ee0c823dd..afc8b7c1278 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp @@ -5,19 +5,17 @@ #pragma once #include -#include - -#include - #include +#include +#include namespace ngraph { namespace pass { class TRANSFORMATIONS_API FakeQuantizeMulFusion; -} // namespace pass -} // namespace ngraph +} // namespace pass +} // namespace ngraph /** * @ingroup ie_transformation_common_api @@ -28,6 +26,6 @@ class TRANSFORMATIONS_API FakeQuantizeMulFusion; class ngraph::pass::FakeQuantizeMulFusion : public ngraph::pass::MatcherPass { public: - NGRAPH_RTTI_DECLARATION; - FakeQuantizeMulFusion(); + NGRAPH_RTTI_DECLARATION; + FakeQuantizeMulFusion(); }; diff --git a/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp index b96a185a4ca..05bb7733e0e 100644 --- a/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/fq_reshape_fusion.hpp @@ -5,19 +5,17 @@ #pragma once #include -#include - -#include - #include +#include +#include namespace ngraph { namespace pass { class TRANSFORMATIONS_API FakeQuantizeReshapeFusion; -} // namespace pass -} // namespace ngraph +} // namespace pass +} // namespace ngraph /** * @ingroup ie_transformation_common_api diff --git a/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp index 7a200d36160..a6d97f6b01f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/hsigmoid_fusion.hpp @@ -5,10 +5,9 @@ #pragma once #include -#include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -27,7 +26,7 @@ class TRANSFORMATIONS_API HSigmoidFusionWithClampDiv; * @ingroup ie_transformation_common_api * @brief HSigmoidFusion transformation replaces a sub-graph ((min(Relu(x + 3), 6)) / 6) with a HSigmoid op. */ -class ngraph::pass::HSigmoidFusionWithReluDiv: public ngraph::pass::MatcherPass { +class ngraph::pass::HSigmoidFusionWithReluDiv : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSigmoidFusionWithReluDiv(); @@ -37,7 +36,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSigmoidFusion transformation replaces a sub-graph ((min(Relu(x + 3), 6)) * const(1/6)) with a HSigmoid op. */ -class ngraph::pass::HSigmoidFusionWithReluMul: public ngraph::pass::MatcherPass { +class ngraph::pass::HSigmoidFusionWithReluMul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSigmoidFusionWithReluMul(); @@ -47,7 +46,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSigmoidFusion transformation replaces a sub-graph (min(max(x + 3, 0), 6) / 6) with a HSigmoid op. */ -class ngraph::pass::HSigmoidFusionWithoutRelu: public ngraph::pass::MatcherPass { +class ngraph::pass::HSigmoidFusionWithoutRelu : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSigmoidFusionWithoutRelu(); @@ -57,7 +56,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSigmoidFusion transformation replaces a sub-graph (Clamp(x + 3, 0, 6) * const(1/6)) with a HSigmoid op. */ -class ngraph::pass::HSigmoidFusionWithClampMul: public ngraph::pass::MatcherPass { +class ngraph::pass::HSigmoidFusionWithClampMul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSigmoidFusionWithClampMul(); @@ -67,7 +66,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSigmoidFusion transformation replaces a sub-graph (Clamp(x + 3, 0, 6) * / 6) with a HSigmoid op. */ -class ngraph::pass::HSigmoidFusionWithClampDiv: public ngraph::pass::MatcherPass { +class ngraph::pass::HSigmoidFusionWithClampDiv : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSigmoidFusionWithClampDiv(); @@ -77,7 +76,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSigmoidFusion transformation replaces various sub-graphs with a HSigmoid op. */ -class ngraph::pass::HSigmoidFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::HSigmoidFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; HSigmoidFusion() { diff --git a/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp index 769725abca1..68427f19bb0 100644 --- a/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/hswish_fusion.hpp @@ -5,10 +5,9 @@ #pragma once #include -#include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -22,12 +21,11 @@ class TRANSFORMATIONS_API HSwishFusionWithClamp; } // namespace pass } // namespace ngraph - /** * @ingroup ie_transformation_common_api * @brief HSwishFusion transformation replaces a sub-graph (x * (min(Relu(x + 3), 6))) / 6 with a HSwish op. */ -class ngraph::pass::HSwishFusionWithReluDiv: public ngraph::pass::MatcherPass { +class ngraph::pass::HSwishFusionWithReluDiv : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSwishFusionWithReluDiv(); @@ -37,7 +35,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSwishFusion transformation replaces a sub-graph (x * (min(Relu(x + 3), 6)) * const(1/6) with a HSwish op. */ -class ngraph::pass::HSwishFusionWithReluMul: public ngraph::pass::MatcherPass { +class ngraph::pass::HSwishFusionWithReluMul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSwishFusionWithReluMul(); @@ -47,18 +45,17 @@ public: * @ingroup ie_transformation_common_api * @brief HSwishFusion transformation replaces a sub-graph x * HSigmoid(x) with a HSwish op. */ -class ngraph::pass::HSwishFusionWithHSigmoid: public ngraph::pass::MatcherPass { +class ngraph::pass::HSwishFusionWithHSigmoid : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSwishFusionWithHSigmoid(); }; - /** * @ingroup ie_transformation_common_api * @brief HSwishFusion transformation replaces a sub-graph (Clamp(x + 3, 0, 6) * x) with a HSwish * 6. */ -class ngraph::pass::HSwishFusionWithClamp: public ngraph::pass::MatcherPass { +class ngraph::pass::HSwishFusionWithClamp : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSwishFusionWithClamp(); @@ -68,7 +65,7 @@ public: * @ingroup ie_transformation_common_api * @brief HSwishFusion transformation replaces various sub-graphs with a HSwish op. */ -class ngraph::pass::HSwishFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::HSwishFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; HSwishFusion() { diff --git a/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp index 83268a143d8..7440b4bc2c8 100644 --- a/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/interpolate_sequence_fusion.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { diff --git a/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp index 3988166826a..9808cfac046 100644 --- a/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/leaky_relu_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -25,7 +23,7 @@ class TRANSFORMATIONS_API LeakyReluFusion; * Multiply->Maximum to LeakyRelu */ -class ngraph::pass::LeakyReluFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::LeakyReluFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; LeakyReluFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp index 341d84d138f..a7a09a05894 100644 --- a/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/lin_op_sequence_fusion.hpp @@ -5,10 +5,9 @@ #pragma once #include -#include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -21,19 +20,19 @@ class TRANSFORMATIONS_API MultiplyMultiplyFusion; } // namespace pass } // namespace ngraph -class ngraph::pass::AddMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::AddMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; AddMultiplyFusion(); }; -class ngraph::pass::AddAddFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::AddAddFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; AddAddFusion(); }; -class ngraph::pass::MultiplyMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MultiplyMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MultiplyMultiplyFusion(); @@ -43,7 +42,7 @@ public: * @ingroup ie_transformation_common_api * @brief LinOpSequenceFusion transformation fuses linear operation sequence. */ -class ngraph::pass::LinOpSequenceFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::LinOpSequenceFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; LinOpSequenceFusion() { diff --git a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp index f528c666e3f..2a451f18a5a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/matmul_const_transposes_extraction.hpp @@ -4,18 +4,19 @@ #pragma once -#include #include +#include /** * @ingroup ie_transformation_common_api - * @brief Resolves transpose_b key from MatMul operation if corresponding input is constant or FakeQuantize by inserting Transpose + * @brief Resolves transpose_b key from MatMul operation if corresponding input is constant or FakeQuantize by inserting + * Transpose */ namespace ngraph { namespace pass { -class TRANSFORMATIONS_API MatMulConstTransposesExtraction: public MatcherPass { +class TRANSFORMATIONS_API MatMulConstTransposesExtraction : public MatcherPass { public: NGRAPH_RTTI_DECLARATION; MatMulConstTransposesExtraction(); diff --git a/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp index 120d7805ba9..921fe8ffbb4 100644 --- a/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/matmul_multiply_fusion.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { @@ -57,7 +56,7 @@ class TRANSFORMATIONS_API MatMulMultiplyFusion; * | MatMul | * +--------+ */ -class ngraph::pass::MatMulMultiplyFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MatMulMultiplyFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MatMulMultiplyFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp index f91581d7714..546cde5a366 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mish_fusion.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -26,7 +25,7 @@ class TRANSFORMATIONS_API MishFusion; * @brief MishFusion transformation replaces group of * operations: x * tanh(log(exp(x) + 1)) to Mish op. */ -class ngraph::pass::MishFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MishFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MishFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/moc_transformations.hpp b/src/common/transformations/include/transformations/common_optimizations/moc_transformations.hpp index 19504b77988..57c3dbcdbb7 100644 --- a/src/common/transformations/include/transformations/common_optimizations/moc_transformations.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/moc_transformations.hpp @@ -5,10 +5,8 @@ #pragma once #include - -#include - #include +#include namespace ngraph { namespace pass { @@ -24,7 +22,7 @@ class TRANSFORMATIONS_API MOCTransformations; * with transformations pipeline but now it remains empty. */ -class ngraph::pass::MOCTransformations: public ngraph::pass::FunctionPass { +class ngraph::pass::MOCTransformations : public ngraph::pass::FunctionPass { bool m_use_shapes; bool m_low_precision_enabled; @@ -38,7 +36,8 @@ public: * low_precision sub-graphs as is. */ explicit MOCTransformations(bool use_shapes, bool low_precision_enabled = true) - : m_use_shapes(use_shapes), m_low_precision_enabled(low_precision_enabled) {} + : m_use_shapes(use_shapes), + m_low_precision_enabled(low_precision_enabled) {} bool run_on_model(const std::shared_ptr& m) override; }; diff --git a/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp index 14ddb53a782..ca48dd5665a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mul_conv_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - +#include #include +#include namespace ngraph { namespace pass { @@ -73,31 +71,31 @@ class TRANSFORMATIONS_API MultiplyGroupConvolutionBackpropDataFusion; * Restrictions: * - weights' shape is static * - if the constant input to Multiply has the same rank as 'input', the constant first dimension has to be 1 - * - constant input to Multiply has to be broadcastable to weights when 'Convolution Op' is either Convolution or GroupConvolution - * - shape of a constant input to Multiply has to be in one of following forms: (1), (1, 1, ..., 1), (C, 1, ..., 1), (1, C, 1, ..., 1) - * when 'Convolution Op' is either ConvolutionBackpropData or GroupConvolutionBackpropData + * - constant input to Multiply has to be broadcastable to weights when 'Convolution Op' is either Convolution or + * GroupConvolution + * - shape of a constant input to Multiply has to be in one of following forms: (1), (1, 1, ..., 1), (C, 1, ..., 1), (1, + * C, 1, ..., 1) when 'Convolution Op' is either ConvolutionBackpropData or GroupConvolutionBackpropData */ - -class ngraph::pass::MultiplyConvolutionFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MultiplyConvolutionFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MultiplyConvolutionFusion(); }; -class ngraph::pass::MultiplyGroupConvolutionFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MultiplyGroupConvolutionFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MultiplyGroupConvolutionFusion(); }; -class ngraph::pass::MultiplyConvolutionBackpropDataFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MultiplyConvolutionBackpropDataFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MultiplyConvolutionBackpropDataFusion(); }; -class ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MultiplyGroupConvolutionBackpropDataFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp index d1325633720..e4c5b4fb02c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mul_fake_quantize_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -26,7 +24,7 @@ class TRANSFORMATIONS_API MulFakeQuantizeFusion; * Restrictions: * - second input to Mul is a Constant */ -class ngraph::pass::MulFakeQuantizeFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::MulFakeQuantizeFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; MulFakeQuantizeFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp index 82d4c81b43e..07a96f4c1dc 100644 --- a/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/mvn_fusion.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -37,7 +36,8 @@ public: /** * @ingroup ie_transformation_common_api * @brief MVNFusion transformation replaces group of - * operations: gamma * (x - ReduceMean(x, axes)) / (Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) + eps) + beta to MVN op. + * operations: gamma * (x - ReduceMean(x, axes)) / (Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) + eps) + beta to MVN + * op. */ class ngraph::pass::MVNFusionWithConstantsInside : public ngraph::pass::MatcherPass { public: @@ -49,7 +49,7 @@ public: * @ingroup ie_transformation_common_api * @brief MVNFusion transformation replaces various sub-graphs with a MVN op. */ -class ngraph::pass::MVNFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::MVNFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; MVNFusion() { diff --git a/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp index c7b5e2ca5b9..a3aa43cec8a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp @@ -4,13 +4,11 @@ #pragma once -#include #include - -#include - #include #include +#include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp index fc6350e21e2..280c71d6c75 100644 --- a/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/nop_elimination.hpp @@ -4,13 +4,11 @@ #pragma once -#include #include - -#include - #include #include +#include +#include namespace ngraph { namespace pass { @@ -32,7 +30,7 @@ class TRANSFORMATIONS_API NopElimination; * @ingroup ie_transformation_common_api * @brief EliminatePad eliminates pad that does nothing */ -class ngraph::pass::EliminatePad: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminatePad : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminatePad(); @@ -42,7 +40,7 @@ public: * @ingroup ie_transformation_common_api * @brief EliminateConvert eliminates convert that does nothing */ -class ngraph::pass::EliminateConvert: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminateConvert : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminateConvert(); @@ -52,7 +50,7 @@ public: * @ingroup ie_transformation_common_api * @brief EliminateConvertNonZero eliminates convert before NonZero */ -class ngraph::pass::EliminateConvertNonZero: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminateConvertNonZero : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminateConvertNonZero(); @@ -62,7 +60,7 @@ public: * @ingroup ie_transformation_common_api * @brief EliminateConcat eliminates concat that does nothing */ -class ngraph::pass::EliminateConcat: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminateConcat : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminateConcat(); @@ -72,7 +70,7 @@ public: * @ingroup ie_transformation_common_api * @brief EliminateSplit eliminates split that does nothing */ -class ngraph::pass::EliminateSplit: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminateSplit : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminateSplit(); @@ -92,7 +90,7 @@ public: * @ingroup ie_transformation_common_api * @brief EliminateTranspose eliminates transpose that does nothing */ -class ngraph::pass::EliminateTranspose: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminateTranspose : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminateTranspose(); @@ -102,13 +100,13 @@ public: * @ingroup ie_transformation_common_api * @brief EliminateEltwise eliminates eltwise ops that do nothing */ -class ngraph::pass::EliminateEltwise: public ngraph::pass::MatcherPass { +class ngraph::pass::EliminateEltwise : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; EliminateEltwise(); }; -class ngraph::pass::NopElimination: public GraphRewrite { +class ngraph::pass::NopElimination : public GraphRewrite { public: NGRAPH_RTTI_DECLARATION; NopElimination(bool use_shape_for_elimination = true); diff --git a/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp index 981cbff71bf..1c1ae3bca11 100644 --- a/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/normalize_l2_fusion.hpp @@ -4,11 +4,11 @@ #pragma once -#include #include - -#include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -25,7 +25,7 @@ class TRANSFORMATIONS_API NormalizeL2Fusion; * x/(max(sqrt(sum(x[j0, ..., jN]**2), eps)) with a NormalizeL2 op. * x/(add(sqrt(sum(x[j0, ..., jN]**2), eps)) with a NormalizeL2 op. */ -class ngraph::pass::NormalizeL2Fusion: public ngraph::pass::MatcherPass { +class ngraph::pass::NormalizeL2Fusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; NormalizeL2Fusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp index 5c8c075c407..2d249da957a 100644 --- a/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/optimize_strided_slice.hpp @@ -4,15 +4,13 @@ #pragma once -#include #include - -#include - #include #include #include #include +#include +#include namespace ngraph { namespace pass { @@ -25,13 +23,12 @@ class TRANSFORMATIONS_API GroupedStridedSliceOptimizer; } // namespace pass } // namespace ngraph - /** * @ingroup ie_transformation_common_api * @brief UselessStridedSliceEraser transformation removes StridedSlice operations * with equal input and output shapes. */ -class ngraph::pass::UselessStridedSliceEraser: public ngraph::pass::FunctionPass { +class ngraph::pass::UselessStridedSliceEraser : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; @@ -43,7 +40,7 @@ public: * operations with first StridedSlice in this group. All SrtideSlices in this group * must be equal and consume the same output port. */ -class ngraph::pass::SharedStridedSliceEraser: public ngraph::pass::FunctionPass { +class ngraph::pass::SharedStridedSliceEraser : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; @@ -55,7 +52,7 @@ public: * operations with VariadicSplit. All StridedSlice operations must slice data * with the same axis and stride = 1. */ -class ngraph::pass::GroupedStridedSliceOptimizer: public ngraph::pass::FunctionPass { +class ngraph::pass::GroupedStridedSliceOptimizer : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; @@ -66,7 +63,7 @@ public: * @brief StridedSliceOptimization transformation executes all transformations * related to StridedSlice optimizations. */ -class ngraph::pass::StridedSliceOptimization: public ngraph::pass::FunctionPass { +class ngraph::pass::StridedSliceOptimization : public ngraph::pass::FunctionPass { public: StridedSliceOptimization(bool use_shapes = true); diff --git a/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp index ac93ae23cfe..82c1a09ec4e 100644 --- a/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/pad_fusion.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { @@ -29,7 +28,7 @@ class TRANSFORMATIONS_API PadFusionGroupConvolutionBackpropData; * - pad value is 0 * - exclude_pad in AvgPool is set to false or pads_begin, pads_end are set to zero */ -class ngraph::pass::PadFusionAvgPool: public ngraph::pass::MatcherPass { +class ngraph::pass::PadFusionAvgPool : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PadFusionAvgPool(); @@ -42,7 +41,7 @@ public: * - pad mode is op::PadMode::CONSTANT * - pad value is 0 */ -class ngraph::pass::PadFusionConvolution: public ngraph::pass::MatcherPass { +class ngraph::pass::PadFusionConvolution : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PadFusionConvolution(); @@ -56,7 +55,7 @@ public: * - pad value is 0 * - pads in ConvolutionBackpropData are greater than pads in Pad node */ -class ngraph::pass::PadFusionConvolutionBackpropData: public ngraph::pass::MatcherPass { +class ngraph::pass::PadFusionConvolutionBackpropData : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PadFusionConvolutionBackpropData(); @@ -69,7 +68,7 @@ public: * - pad mode is op::PadMode::CONSTANT * - pad value is 0 */ -class ngraph::pass::PadFusionGroupConvolution: public ngraph::pass::MatcherPass { +class ngraph::pass::PadFusionGroupConvolution : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PadFusionGroupConvolution(); @@ -83,13 +82,13 @@ public: * - pad value is 0 * - pads in GroupConvolutionBackpropData are greater than pads in Pad node */ -class ngraph::pass::PadFusionGroupConvolutionBackpropData: public ngraph::pass::MatcherPass { +class ngraph::pass::PadFusionGroupConvolutionBackpropData : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PadFusionGroupConvolutionBackpropData(); }; -class ngraph::pass::PadFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::PadFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; PadFusion() { diff --git a/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp b/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp index ca06de1c1d3..529cdeeaebd 100644 --- a/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/pull_transpose_through_fq.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API PullTransposeThroughFQUp; } // namespace pass } // namespace ngraph -class ngraph::pass::PullTransposeThroughFQUp: public ngraph::pass::MatcherPass { +class ngraph::pass::PullTransposeThroughFQUp : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; PullTransposeThroughFQUp(); diff --git a/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp index 5d7261ce00c..64044be125b 100644 --- a/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/relu_fake_quantize_fusion.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { @@ -24,7 +23,7 @@ class TRANSFORMATIONS_API ReluFakeQuantizeFusion; * - 'input_low' has non negative values */ -class ngraph::pass::ReluFakeQuantizeFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::ReluFakeQuantizeFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ReluFakeQuantizeFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp b/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp index c323dad169b..8c66d608034 100644 --- a/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/remove_concat_zero_dim_input.hpp @@ -4,11 +4,10 @@ #pragma once -#include #include - -#include #include +#include +#include namespace ov { namespace pass { @@ -24,7 +23,7 @@ class TRANSFORMATIONS_API RemoveConcatZeroDimInput; * removes input of Concat if the tensor size is equal to 0 */ -class ov::pass::RemoveConcatZeroDimInput: public ov::pass::MatcherPass { +class ov::pass::RemoveConcatZeroDimInput : public ov::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; RemoveConcatZeroDimInput(); diff --git a/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp b/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp index 0de47638899..7372684ca73 100644 --- a/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/remove_filtering_boxes_by_size.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -20,13 +18,13 @@ class TRANSFORMATIONS_API RemoveFilteringBoxesBySize; } // namespace pass } // namespace ngraph -class ngraph::pass::FuseFilteringBoxesBySize: public ngraph::pass::GraphRewrite { +class ngraph::pass::FuseFilteringBoxesBySize : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; FuseFilteringBoxesBySize(); }; -class ngraph::pass::RemoveFilteringBoxesBySize: public ngraph::pass::MatcherPass { +class ngraph::pass::RemoveFilteringBoxesBySize : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; RemoveFilteringBoxesBySize(); diff --git a/src/common/transformations/include/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp b/src/common/transformations/include/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp index 0be7ef92b16..a0154cad5ee 100644 --- a/src/common/transformations/include/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp @@ -4,11 +4,10 @@ #pragma once -#include #include - -#include #include +#include +#include namespace ov { namespace pass { @@ -25,7 +24,7 @@ class TRANSFORMATIONS_API RemoveMultiSubGraphOpDanglingParams; * in the bodies of a MultiSubGraphOp */ -class ov::pass::RemoveMultiSubGraphOpDanglingParams: public ov::pass::MatcherPass { +class ov::pass::RemoveMultiSubGraphOpDanglingParams : public ov::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; RemoveMultiSubGraphOpDanglingParams(); diff --git a/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp index 4d54950fed4..cc9aaf7d096 100644 --- a/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/reshape_sequence_fusion.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { @@ -18,10 +17,11 @@ class TRANSFORMATIONS_API ReshapeSequenceFusion; /** * @ingroup ie_transformation_common_api - * @brief ReshapeSequenceFusion fuses sequence of Reshape operation into single Reshape or eliminates full redundant sequence + * @brief ReshapeSequenceFusion fuses sequence of Reshape operation into single Reshape or eliminates full redundant + * sequence */ -class ngraph::pass::ReshapeSequenceFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::ReshapeSequenceFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ReshapeSequenceFusion(bool use_shape_for_elimination = true); diff --git a/src/common/transformations/include/transformations/common_optimizations/ric_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/ric_fusion.hpp index 3f30fde5301..2d5f38c6a0f 100644 --- a/src/common/transformations/include/transformations/common_optimizations/ric_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/ric_fusion.hpp @@ -4,11 +4,10 @@ #pragma once -#include - -#include -#include #include +#include +#include +#include namespace ngraph { namespace pass { @@ -23,8 +22,8 @@ class TRANSFORMATIONS_API ReverseInputChannelsFusion; * @brief ReverseInputChannelsFusion */ -class ngraph::pass::ReverseInputChannelsFusion: public ov::pass::ModelPass { +class ngraph::pass::ReverseInputChannelsFusion : public ov::pass::ModelPass { public: NGRAPH_RTTI_DECLARATION; - bool run_on_model(const std::shared_ptr &) override; + bool run_on_model(const std::shared_ptr&) override; }; diff --git a/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp index 153a2a48c5d..ae89107a676 100644 --- a/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/shuffle_channels_fusion.hpp @@ -4,17 +4,15 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { - class TRANSFORMATIONS_API ShuffleChannelsFusion; +class TRANSFORMATIONS_API ShuffleChannelsFusion; } // namespace pass } // namespace ngraph diff --git a/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp b/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp index 72c2925fffb..8ee4432499d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/simplify_shape_of_sub_graph.hpp @@ -4,15 +4,13 @@ #pragma once -#include #include - -#include - #include #include #include #include +#include +#include namespace ngraph { namespace pass { @@ -27,14 +25,13 @@ class TRANSFORMATIONS_API SimplifySecondInputOfReshape; } // namespace pass } // namespace ngraph - /** * @ingroup ie_transformation_common_api * @brief SharedShapeOf transformation replaces group of ShapeOf * operations with the first ShapeOf in this group. All ShapeOfs in this group * must be equal and consume the same output port. */ -class ngraph::pass::SharedShapeOf: public ngraph::pass::FunctionPass { +class ngraph::pass::SharedShapeOf : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; @@ -46,7 +43,7 @@ public: * operations with the first Gather in this group and updated indices input * in case all Gathers in the group are consumed by the same Concat in incremental order. */ -class ngraph::pass::GroupedGatherElimination: public ngraph::pass::MatcherPass { +class ngraph::pass::GroupedGatherElimination : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; GroupedGatherElimination(); @@ -56,7 +53,7 @@ public: * @ingroup ie_transformation_common_api * @brief SimplifyShapeOfSubGraph transformation runs specific optimizations of shape sub-graphs */ -class ngraph::pass::SimplifyShapeOfSubGraph: public ngraph::pass::FunctionPass { +class ngraph::pass::SimplifyShapeOfSubGraph : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; @@ -66,7 +63,7 @@ public: * @ingroup ie_transformation_common_api * @brief GatherNopElimination transformation optimizes out useless Gather operations */ -class ngraph::pass::GatherNopElimination: public ngraph::pass::MatcherPass { +class ngraph::pass::GatherNopElimination : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; GatherNopElimination(); @@ -78,7 +75,7 @@ public: * Other cases into Concat of shapeof/gather(data) + shapeof(indices) transformation optimizes out * useless Gather operations */ -class ngraph::pass::SimplifyGatherShapeOf: public ngraph::pass::MatcherPass { +class ngraph::pass::SimplifyGatherShapeOf : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SimplifyGatherShapeOf(); diff --git a/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp b/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp index 1259f96def6..5de5c88fba7 100644 --- a/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp @@ -19,7 +19,7 @@ class TRANSFORMATIONS_API SkipGatherBeforeTransposeAndReshape; /** * @ingroup ie_transformation_common_api - * @brief SkipGatherBeforeTransposeAndReshape transformation removes Gather from the Gather->Transpose->Reshape sequence + * @brief SkipGatherBeforeTransposeAndReshape transformation removes Gather from the Gather->Transpose->Reshape sequence * in case when input has batch=1 and gather has axis=0 and indices={0}. * Also, this transformation corrects a transpose constant to save semantic. */ diff --git a/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp index a9b9bcddc96..01e2741b598 100644 --- a/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/softmax_fusion.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { @@ -70,7 +69,7 @@ class TRANSFORMATIONS_API SoftmaxFusion; * - ReduceMax and ReduceSum axes must be scalar constants and they have to point to the same axis */ -class ngraph::pass::SoftmaxFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::SoftmaxFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SoftmaxFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp index 8a2e42815eb..bd3fe1ed1e5 100644 --- a/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/softplus_fusion.hpp @@ -4,11 +4,10 @@ #pragma once -#include #include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -23,7 +22,7 @@ class TRANSFORMATIONS_API SoftPlusFusion; * @brief SoftPlusFusion transformation replaces group of * operations: log(exp(x) + 1) to SoftPlus op. */ -class ngraph::pass::SoftPlusFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::SoftPlusFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SoftPlusFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp index 400ab8d3335..2884df168e1 100644 --- a/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/softplus_to_mish_fusion.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -26,7 +25,7 @@ class TRANSFORMATIONS_API SoftPlusToMishFusion; * @brief SoftPlusToMishFusion transformation replaces group of * operations: x * tanh(softplus(x)) to Mish op. */ -class ngraph::pass::SoftPlusToMishFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::SoftPlusToMishFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SoftPlusToMishFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp index eff278cd737..a784f68cc50 100644 --- a/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/space_to_batch_fusion.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -31,7 +29,7 @@ class TRANSFORMATIONS_API SpaceToBatchFusion; * - SpaceToDepthMode must be BLOCKS_FIRST */ -class ngraph::pass::SpaceToBatchFusion: public ngraph::pass::MatcherPass { +class ngraph::pass::SpaceToBatchFusion : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SpaceToBatchFusion(); diff --git a/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp index 09b15ceb2fc..b49e1e5485c 100644 --- a/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { diff --git a/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp index a140df729a2..91ade68c8d7 100644 --- a/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/split_squeeze_concat_fusion.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { diff --git a/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp b/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp index 6ccdaff3510..9aa7ec677f6 100644 --- a/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/strides_optimization.hpp @@ -5,10 +5,10 @@ #pragma once #include -#include -#include -#include #include +#include +#include +#include namespace ngraph { namespace pass { @@ -27,7 +27,7 @@ class TRANSFORMATIONS_API StridesOptimization; * or inserts pooling between current node and its consumers if the consumers have different StridesProp attributes. * Strides can be propagated if Convolution kernel is {1, 1, ...} */ -class ngraph::pass::ConvStridesPropagation: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvStridesPropagation : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvStridesPropagation(); @@ -35,10 +35,11 @@ public: /** * @ingroup ie_transformation_common_api - * @brief SupportedNodesStridesPropagation either propagates stride (greater than 1) from current node up through the graph - * or inserts pooling between current node and its consumers if the consumers have different StridesProp attributes. + * @brief SupportedNodesStridesPropagation either propagates stride (greater than 1) from current node up through the + * graph or inserts pooling between current node and its consumers if the consumers have different StridesProp + * attributes. */ -class ngraph::pass::SupportedNodesStridesPropagation: public ngraph::pass::MatcherPass { +class ngraph::pass::SupportedNodesStridesPropagation : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SupportedNodesStridesPropagation(); @@ -49,7 +50,7 @@ public: * @brief UnsupportedNodesStridesPropagation inserts pooling between current node and its consumers * if the consumers have different StridesProp attributes. */ -class ngraph::pass::UnsupportedNodesStridesPropagation: public ngraph::pass::MatcherPass { +class ngraph::pass::UnsupportedNodesStridesPropagation : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; UnsupportedNodesStridesPropagation(); @@ -57,9 +58,10 @@ public: /** * @ingroup ie_transformation_common_api - * @brief StridesOptimization transformation works backward on function and propagates strides up through the graph if possible + * @brief StridesOptimization transformation works backward on function and propagates strides up through the graph if + * possible */ -class ngraph::pass::StridesOptimization: public ngraph::pass::BackwardGraphRewrite { +class ngraph::pass::StridesOptimization : public ngraph::pass::BackwardGraphRewrite { public: NGRAPH_RTTI_DECLARATION; StridesOptimization() { diff --git a/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp index 8506ce9ed84..5feaff7e376 100644 --- a/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/swish_fusion.hpp @@ -5,10 +5,9 @@ #pragma once #include -#include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -26,7 +25,7 @@ class TRANSFORMATIONS_API SwishFusionWithoutBeta; * @ingroup ie_transformation_common_api * @brief SwishFusionWithSigmoid replaces a sub-graphs x * Sigmoid(x) with a Swish op. */ -class ngraph::pass::SwishFusionWithSigmoid: public ngraph::pass::MatcherPass { +class ngraph::pass::SwishFusionWithSigmoid : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SwishFusionWithSigmoid(); @@ -36,7 +35,7 @@ public: * @ingroup ie_transformation_common_api * @brief SwishFusionWithSigmoid replaces a sub-graphs x * Sigmoid(x * beta) with a Swish op. */ -class ngraph::pass::SwishFusionWithSigmoidWithBeta: public ngraph::pass::MatcherPass { +class ngraph::pass::SwishFusionWithSigmoidWithBeta : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SwishFusionWithSigmoidWithBeta(); @@ -46,7 +45,7 @@ public: * @ingroup ie_transformation_common_api * @brief SwishFusionWithSigmoid replaces a sub-graphs x / (1.0 + exp(-x * beta)) with a Swish op. */ -class ngraph::pass::SwishFusionWithBeta: public ngraph::pass::MatcherPass { +class ngraph::pass::SwishFusionWithBeta : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SwishFusionWithBeta(); @@ -56,7 +55,7 @@ public: * @ingroup ie_transformation_common_api * @brief SwishFusionWithSigmoid replaces a sub-graphs x / (1.0 + exp(-x)) with a Swish op. */ -class ngraph::pass::SwishFusionWithoutBeta: public ngraph::pass::MatcherPass { +class ngraph::pass::SwishFusionWithoutBeta : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SwishFusionWithoutBeta(); @@ -66,7 +65,7 @@ public: * @ingroup ie_transformation_common_api * @brief SwishFusion transformation replaces various sub-graphs with a Swish op. */ -class ngraph::pass::SwishFusion: public ngraph::pass::GraphRewrite { +class ngraph::pass::SwishFusion : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; SwishFusion() { diff --git a/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp b/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp index abf0fb89ac9..5f63b7f1fa3 100644 --- a/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.hpp @@ -4,12 +4,11 @@ #pragma once -#include #include - -#include "transformations_visibility.hpp" +#include #include "ngraph/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" namespace ngraph { namespace pass { @@ -25,7 +24,7 @@ class TRANSFORMATIONS_API TransposeReshapeEliminationForMatmul; * align input and output dimension ranks before second MatMul input and after MatMul output * (for example, after Einsum Decomposition inside TensorFlow 1 and nGraph EinsumDecomposition transformation) */ -class ngraph::pass::TransposeReshapeEliminationForMatmul: public ngraph::pass::MatcherPass { +class ngraph::pass::TransposeReshapeEliminationForMatmul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; TransposeReshapeEliminationForMatmul(); diff --git a/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp b/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp index f0aeabbc01f..2ccc8a06ddd 100644 --- a/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/transpose_sinking.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -38,7 +37,8 @@ public: /** * @ingroup ie_transformation_common_api - * @brief TransposeFQReduction transformation sinks Transpose through FakeQuantize in case it is followed by reduction or squeeze + * @brief TransposeFQReduction transformation sinks Transpose through FakeQuantize in case it is followed by reduction + * or squeeze */ class ngraph::pass::TransposeFQReduction : public ngraph::pass::MatcherPass { public: @@ -68,8 +68,8 @@ public: /** * @ingroup ie_transformation_common_api - * @brief TransposeFuse transformation eliminates 2 consequtive Transposes if they result in no changes to input or fuses them - * to single Transpose if input gets changed + * @brief TransposeFuse transformation eliminates 2 consequtive Transposes if they result in no changes to input or + * fuses them to single Transpose if input gets changed */ class ngraph::pass::TransposeFuse : public ngraph::pass::MatcherPass { public: @@ -81,7 +81,7 @@ public: * @ingroup ie_transformation_common_api * @brief TransposeSinking transformation sinks Transposes through known operations */ -class ngraph::pass::TransposeSinking: public ngraph::pass::GraphRewrite { +class ngraph::pass::TransposeSinking : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; TransposeSinking() { diff --git a/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp b/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp index c71b8269321..db2fc8bda6d 100644 --- a/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/transpose_to_reshape.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { diff --git a/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp b/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp index 68913c46355..37970157474 100644 --- a/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp @@ -4,11 +4,10 @@ #pragma once -#include #include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -26,7 +25,7 @@ class TRANSFORMATIONS_API WeightsDequantizeToFakeQuantize; * Constant (i8) -> Convert (to fp) -> FakeQuantize -> * deducing levels and FakeQuantize limits according to actual values in the weights Constant */ -class ngraph::pass::WeightsDequantizeToFakeQuantize: public ngraph::pass::MatcherPass { +class ngraph::pass::WeightsDequantizeToFakeQuantize : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; WeightsDequantizeToFakeQuantize(); diff --git a/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp b/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp index 5b6e5da2555..445aa745baa 100644 --- a/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp +++ b/src/common/transformations/include/transformations/common_optimizations/wrap_interpolate_into_transposes.hpp @@ -4,11 +4,10 @@ #pragma once -#include #include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -33,7 +32,7 @@ class TRANSFORMATIONS_API WrapInterpolateIntoTransposes; * with respect to spatial dimensions, but TensorFlow frontend gives Interpolate with * axes {1, 2} for 4D tensors. */ -class ngraph::pass::WrapInterpolateIntoTransposes: public ngraph::pass::MatcherPass { +class ngraph::pass::WrapInterpolateIntoTransposes : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; WrapInterpolateIntoTransposes(); diff --git a/src/common/transformations/include/transformations/control_flow/unroll_if.hpp b/src/common/transformations/include/transformations/control_flow/unroll_if.hpp index 7fb5840dc29..fbc5ddbe405 100644 --- a/src/common/transformations/include/transformations/control_flow/unroll_if.hpp +++ b/src/common/transformations/include/transformations/control_flow/unroll_if.hpp @@ -15,12 +15,14 @@ class TRANSFORMATIONS_API UnrollIf; } // namespace pass } // namespace ngraph +// clang-format off /** * @ingroup ie_transformation_common_api * @brief The transformation replaces 'If' operations with one of the internal functions (bodies) if the provided condition is constant. * The condition is true: 'If' op is replaced with then_body * The condition is false 'If' op is replaced with else_body */ +// clang-format on class ngraph::pass::UnrollIf : public ngraph::pass::FunctionPass { public: diff --git a/src/common/transformations/include/transformations/control_flow/unroll_tensor_iterator.hpp b/src/common/transformations/include/transformations/control_flow/unroll_tensor_iterator.hpp index 7b80cb145f2..055c41cf805 100644 --- a/src/common/transformations/include/transformations/control_flow/unroll_tensor_iterator.hpp +++ b/src/common/transformations/include/transformations/control_flow/unroll_tensor_iterator.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -27,7 +25,7 @@ class TRANSFORMATIONS_API UnrollTensorIterator; * are added to the network. */ -class ngraph::pass::UnrollTensorIterator: public ngraph::pass::FunctionPass { +class ngraph::pass::UnrollTensorIterator : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/convert_precision.hpp b/src/common/transformations/include/transformations/convert_precision.hpp index 595b5eb6850..f4634e0143c 100644 --- a/src/common/transformations/include/transformations/convert_precision.hpp +++ b/src/common/transformations/include/transformations/convert_precision.hpp @@ -4,19 +4,16 @@ #pragma once -#include -#include #include -#include - -#include - -#include +#include #include -#include -#include #include - +#include +#include +#include +#include +#include +#include namespace ngraph { namespace pass { @@ -71,23 +68,28 @@ class NGRAPH_API ConvertPrecision; * LessEqual */ -using type_to_fuse_map = std::unordered_map&, ngraph::element::Type, size_t idx)>>; +using type_to_fuse_map = + std::unordered_map&, ngraph::element::Type, size_t idx)>>; using precisions_array = std::vector>; class ngraph::pass::ConvertPrecision : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; - ConvertPrecision(ngraph::element::Type_t from, ngraph::element::Type_t to, type_to_fuse_map additional_type_to_fuse_map = {}) + ConvertPrecision(ngraph::element::Type_t from, + ngraph::element::Type_t to, + type_to_fuse_map additional_type_to_fuse_map = {}) : FunctionPass(), - m_precisions(precisions_array {{ from, to }}), - m_additional_type_to_fuse_map(additional_type_to_fuse_map) {} + m_precisions(precisions_array{{from, to}}), + m_additional_type_to_fuse_map(additional_type_to_fuse_map) {} - ConvertPrecision(const precisions_array& precisions, const type_to_fuse_map & additional_type_to_fuse_map = {}) + ConvertPrecision(const precisions_array& precisions, const type_to_fuse_map& additional_type_to_fuse_map = {}) : FunctionPass(), - m_precisions(precisions), - m_additional_type_to_fuse_map(additional_type_to_fuse_map) {} + m_precisions(precisions), + m_additional_type_to_fuse_map(additional_type_to_fuse_map) {} bool run_on_model(const std::shared_ptr& m) override; + private: precisions_array m_precisions; type_to_fuse_map m_additional_type_to_fuse_map; diff --git a/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp b/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp index e30e2859421..46698e60ab7 100644 --- a/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp +++ b/src/common/transformations/include/transformations/disable_decompression_convert_constant_folding.hpp @@ -4,8 +4,8 @@ #pragma once -#include "transformations_visibility.hpp" #include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" namespace ov { namespace pass { diff --git a/src/common/transformations/include/transformations/fix_rt_info.hpp b/src/common/transformations/include/transformations/fix_rt_info.hpp index 4b5a2a4d1b5..fcc1a82ae83 100644 --- a/src/common/transformations/include/transformations/fix_rt_info.hpp +++ b/src/common/transformations/include/transformations/fix_rt_info.hpp @@ -9,10 +9,9 @@ * @file init_node_info.hpp */ -#include #include - #include +#include /** * @brief ngraph namespace @@ -37,7 +36,7 @@ class NGRAPH_API FixRtInfo; * * Used to extract runtime attributes from shared pointer to `ov::RuntimeAttributeWrapper` to standard or trivial types */ -class ngraph::pass::FixRtInfo: public ngraph::pass::FunctionPass { +class ngraph::pass::FixRtInfo : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/init_node_info.hpp b/src/common/transformations/include/transformations/init_node_info.hpp index 26ad8ef93e0..0ed26da0b49 100644 --- a/src/common/transformations/include/transformations/init_node_info.hpp +++ b/src/common/transformations/include/transformations/init_node_info.hpp @@ -9,10 +9,9 @@ * @file init_node_info.hpp */ -#include #include - #include +#include /** * @brief ngraph namespace @@ -32,14 +31,14 @@ class NGRAPH_API InitNodeInfo; /** * @ingroup ie_transformation_common_api * @brief InitNodeInfo transformation helps to set runtime info attributes in a single place. - * + * * Every runtime info attribute that needs to be initialized should be registered * in run_on_function method. Also do not forget to override init methods for registered * attribute. * This transformations should be called first in transformation pipeline. If attribute was * already set initialization will be skipped for this node. */ -class ngraph::pass::InitNodeInfo: public ngraph::pass::FunctionPass { +class ngraph::pass::InitNodeInfo : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp b/src/common/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp index e81152ee1a3..9cf348e605c 100644 --- a/src/common/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp +++ b/src/common/transformations/include/transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp @@ -5,10 +5,9 @@ #pragma once #include -#include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -21,6 +20,5 @@ class TRANSFORMATIONS_API DisableConvertConstantFoldingOnConstPath; class ngraph::pass::DisableConvertConstantFoldingOnConstPath : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; - DisableConvertConstantFoldingOnConstPath( - const element::TypeVector & inputPrecisions = {}); + DisableConvertConstantFoldingOnConstPath(const element::TypeVector& inputPrecisions = {}); }; diff --git a/src/common/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp index 4b8514136db..4eb17023cdd 100644 --- a/src/common/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/batch_norm_decomposition.hpp @@ -5,13 +5,10 @@ #pragma once #include - -#include - #include - -#include #include +#include +#include using namespace std; @@ -23,7 +20,7 @@ class TRANSFORMATIONS_API BatchNormDecomposition; } // namespace pass } // namespace ngraph -class ngraph::pass::BatchNormDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::BatchNormDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; BatchNormDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp index b1c453901e1..711589da3e8 100644 --- a/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/bidirectional_sequences_decomposition.hpp @@ -4,13 +4,11 @@ #pragma once -#include #include -#include - -#include - #include +#include +#include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_batch_to_space.hpp b/src/common/transformations/include/transformations/op_conversions/convert_batch_to_space.hpp index 94fc6fb2ff4..087ba8004f2 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_batch_to_space.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_batch_to_space.hpp @@ -4,13 +4,11 @@ #pragma once -#include #include - -#include - #include #include +#include +#include namespace ngraph { namespace pass { @@ -33,7 +31,7 @@ class TRANSFORMATIONS_API ConvertBatchToSpace; * */ -class ngraph::pass::ConvertBatchToSpace: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertBatchToSpace : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; explicit ConvertBatchToSpace(bool convert_by_elements = true) : MatcherPass() { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_broadcast3.hpp b/src/common/transformations/include/transformations/op_conversions/convert_broadcast3.hpp index 76be1fa6278..6f42c62924e 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_broadcast3.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_broadcast3.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertBroadcast3; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertBroadcast3: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertBroadcast3 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertBroadcast3(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_broadcast_to_tiles.hpp b/src/common/transformations/include/transformations/op_conversions/convert_broadcast_to_tiles.hpp index 8b0524c53f6..56a9462302b 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_broadcast_to_tiles.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_broadcast_to_tiles.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertBroadcastToTiles; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertBroadcastToTiles: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertBroadcastToTiles : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertBroadcastToTiles(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp b/src/common/transformations/include/transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp index 1d6be40e04e..4b8bb451ec2 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_depth_to_space.hpp b/src/common/transformations/include/transformations/op_conversions/convert_depth_to_space.hpp index 92c53785002..1f8953cb257 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_depth_to_space.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_depth_to_space.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertDepthToSpace; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertDepthToSpace: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertDepthToSpace : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertDepthToSpace(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_divide.hpp b/src/common/transformations/include/transformations/op_conversions/convert_divide.hpp index 9052e0ac4e6..e8a59d95a18 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_divide.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_divide.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -20,13 +18,13 @@ class TRANSFORMATIONS_API ConvertDivideWithConstant; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertDivide: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertDivide : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertDivide(); }; -class ngraph::pass::ConvertDivideWithConstant: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertDivideWithConstant : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertDivideWithConstant(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_gather_0d.hpp b/src/common/transformations/include/transformations/op_conversions/convert_gather_0d.hpp index d28613d6313..ca74418e731 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_gather_0d.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_gather_0d.hpp @@ -4,13 +4,10 @@ #pragma once -#include #include - -#include - #include - +#include +#include namespace ngraph { namespace pass { @@ -22,7 +19,8 @@ class TRANSFORMATIONS_API ConvertGather0D; /** * @ingroup ie_transformation_common_api - * @brief ConvertGather0D decomposes v1::Gather operation into v0::Unsqueeze + v1::Gather + v0::Squeeze pattern when gather indices is scalar + * @brief ConvertGather0D decomposes v1::Gather operation into v0::Unsqueeze + v1::Gather + v0::Squeeze pattern when + * gather indices is scalar */ class ngraph::pass::ConvertGather0D : public ngraph::pass::MatcherPass { public: diff --git a/src/common/transformations/include/transformations/op_conversions/convert_gather_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_gather_downgrade.hpp index 9688d88ff27..1b44f809711 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_gather_downgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_gather_downgrade.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_gather_upgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_gather_upgrade.hpp index d96dc7cd217..effb5649a7f 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_gather_upgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_gather_upgrade.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_gelu.hpp b/src/common/transformations/include/transformations/op_conversions/convert_gelu.hpp index 9357aa06c23..8d247aa595e 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_gelu.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_gelu.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include #include "ngraph/op/gelu.hpp" @@ -21,7 +19,7 @@ class TRANSFORMATIONS_API ConvertGELU; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertGELU: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertGELU : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertGELU(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp b/src/common/transformations/include/transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp index b4ae689e96b..6ad05c2e9f7 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp @@ -4,13 +4,11 @@ #pragma once -#include -#include #include - -#include - #include +#include +#include +#include namespace ngraph { namespace pass { @@ -24,7 +22,7 @@ class TRANSFORMATIONS_API ConvertInterpolate1ToInterpolate4; * @ingroup ie_transformation_common_api * @brief ConvertInterpolate1ToInterpolate4 covert v0:interpolate into v4::Interpolate. */ -class ngraph::pass::ConvertInterpolate1ToInterpolate4: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertInterpolate1ToInterpolate4 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertInterpolate1ToInterpolate4(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp b/src/common/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp index e1970e17a1b..21a04ef16f1 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp @@ -4,12 +4,11 @@ #pragma once -#include -#include #include - -#include #include +#include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +18,7 @@ class TRANSFORMATIONS_API ConvertMatrixNmsToMatrixNmsIE; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertMatrixNmsToMatrixNmsIE : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertMatrixNmsToMatrixNmsIE(bool force_i32_output_type = true); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_maxpool_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_maxpool_downgrade.hpp index 6a5dad82169..7d7e10aca2c 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_maxpool_downgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_maxpool_downgrade.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_maxpool_upgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_maxpool_upgrade.hpp index 9a38a8189f1..919bf917423 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_maxpool_upgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_maxpool_upgrade.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_minimum_to_power_and_max.hpp b/src/common/transformations/include/transformations/op_conversions/convert_minimum_to_power_and_max.hpp index 79cc81d9648..e1025e1ef79 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_minimum_to_power_and_max.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_minimum_to_power_and_max.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertMinimum; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertMinimum: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertMinimum : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertMinimum(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_mod.hpp b/src/common/transformations/include/transformations/op_conversions/convert_mod.hpp index 78c3ac9bf5c..69b18636f42 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_mod.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_mod.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertMod; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertMod: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertMod : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertMod(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp b/src/common/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp index 14fce022787..90c7da24daf 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp @@ -4,12 +4,11 @@ #pragma once -#include -#include #include - -#include #include +#include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +18,7 @@ class TRANSFORMATIONS_API ConvertMulticlassNmsToMulticlassNmsIE; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertMulticlassNmsToMulticlassNmsIE(bool force_i32_output_type = true); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_mvn1_to_mvn6.hpp b/src/common/transformations/include/transformations/op_conversions/convert_mvn1_to_mvn6.hpp index cdb03a83173..aaece983f6b 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_mvn1_to_mvn6.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_mvn1_to_mvn6.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_negative.hpp b/src/common/transformations/include/transformations/op_conversions/convert_negative.hpp index 6e762fc2b71..9a542bca8bf 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_negative.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_negative.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertNegative; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertNegative: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertNegative : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertNegative(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp b/src/common/transformations/include/transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp index 1e22735bab4..4f40940c82e 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp @@ -4,12 +4,11 @@ #pragma once -#include -#include #include - -#include #include +#include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +18,7 @@ class TRANSFORMATIONS_API ConvertNMSToNMSIEInternal; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertNMSToNMSIEInternal: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertNMSToNMSIEInternal : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertNMSToNMSIEInternal(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_pad_to_group_conv.hpp b/src/common/transformations/include/transformations/op_conversions/convert_pad_to_group_conv.hpp index 140f820d2ec..583c875b1b9 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_pad_to_group_conv.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_pad_to_group_conv.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -28,7 +26,7 @@ class TRANSFORMATIONS_API ConvertPadToGroupConvolution; * 3. Input shape rank must be static and greater than 3 */ -class ngraph::pass::ConvertPadToGroupConvolution: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertPadToGroupConvolution : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertPadToGroupConvolution(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_previous_nms_to_nms_5.hpp b/src/common/transformations/include/transformations/op_conversions/convert_previous_nms_to_nms_5.hpp index c72efba58f2..1a98a5b1c62 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_previous_nms_to_nms_5.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_previous_nms_to_nms_5.hpp @@ -5,10 +5,9 @@ #pragma once #include -#include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -20,21 +19,20 @@ class TRANSFORMATIONS_API ConvertNMS4ToNMS5; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertNMS1ToNMS5: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertNMS1ToNMS5 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertNMS1ToNMS5(); }; -class ngraph::pass::ConvertNMS3ToNMS5: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertNMS3ToNMS5 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertNMS3ToNMS5(); }; -class ngraph::pass::ConvertNMS4ToNMS5: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertNMS4ToNMS5 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertNMS4ToNMS5(); }; - diff --git a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp index 525ced51d88..cca24e57133 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_reduce_to_pooling.hpp @@ -4,19 +4,16 @@ #pragma once -#include -#include #include - -#include - +#include #include -#include #include -#include -#include +#include #include - +#include +#include +#include +#include namespace ngraph { namespace pass { @@ -35,25 +32,25 @@ public: ngraph::matcher_pass_callback convert_reduce_to_pooling(); }; -class ngraph::pass::ConvertReduceMeanToPooling: public ConvertReduceBase { +class ngraph::pass::ConvertReduceMeanToPooling : public ConvertReduceBase { public: NGRAPH_RTTI_DECLARATION; ConvertReduceMeanToPooling(); }; -class ngraph::pass::ConvertReduceMaxToPooling: public ConvertReduceBase { +class ngraph::pass::ConvertReduceMaxToPooling : public ConvertReduceBase { public: NGRAPH_RTTI_DECLARATION; ConvertReduceMaxToPooling(); }; -class ngraph::pass::ConvertReduceSumToPooling: public ConvertReduceBase { +class ngraph::pass::ConvertReduceSumToPooling : public ConvertReduceBase { public: NGRAPH_RTTI_DECLARATION; ConvertReduceSumToPooling(); }; -class ngraph::pass::ConvertReduceToPooling: public ngraph::pass::GraphRewrite { +class ngraph::pass::ConvertReduceToPooling : public ngraph::pass::GraphRewrite { public: NGRAPH_RTTI_DECLARATION; ConvertReduceToPooling() { @@ -74,7 +71,8 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { auto input = reduce->input_value(0); - auto axes_node = std::dynamic_pointer_cast(reduce->input_value(1).get_node_shared_ptr()); + auto axes_node = + std::dynamic_pointer_cast(reduce->input_value(1).get_node_shared_ptr()); if (!axes_node) { return false; } @@ -97,11 +95,16 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { auto input_shape = input.get_shape(); // If Reduce op reduces only 1 dims we replace it with Reshape - if (std::all_of(axes_vector.begin(), axes_vector.end(), - [&input_shape](const int64_t & axis) { return input_shape[axis] == 1; })) { + if (std::all_of(axes_vector.begin(), axes_vector.end(), [&input_shape](const int64_t& axis) { + return input_shape[axis] == 1; + })) { const auto reshape_shape = reduce->output(0).get_shape(); - auto reshape = std::make_shared(input, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{reshape_shape.size()}, reshape_shape), true); + auto reshape = std::make_shared( + input, + ngraph::opset1::Constant::create(ngraph::element::i64, + ngraph::Shape{reshape_shape.size()}, + reshape_shape), + true); reshape->set_friendly_name(reduce->get_friendly_name()); copy_runtime_info(reduce, reshape); @@ -111,7 +114,7 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { // Check that axes are consecutive otherwise this transformation is not applicable for (size_t i = 1; i < axes_vector.size(); ++i) { - if (axes_vector[i] - axes_vector[i-1] != 1) { + if (axes_vector[i] - axes_vector[i - 1] != 1) { return false; } } @@ -169,7 +172,7 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { kernel.push_back(1); } for (auto& axis : axes_vector) { - kernel[axis-2] = input_shape[axis]; + kernel[axis - 2] = input_shape[axis]; } shape_end = reduce->output(0).get_shape(); } @@ -192,8 +195,10 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { ngraph::NodeVector new_ops; if (!shape_begin.empty() && shape_begin != input.get_shape()) { - input = std::make_shared(input, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{shape_begin.size()}, shape_begin), true); + input = std::make_shared( + input, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{shape_begin.size()}, shape_begin), + true); input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/reshape_begin"); new_ops.push_back(input.get_node_shared_ptr()); } @@ -229,18 +234,19 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { } input = std::make_shared(input, - strides, - pads_begin, - pads_end, - kernel, - true, - ngraph::op::RoundingType::FLOOR); + strides, + pads_begin, + pads_end, + kernel, + true, + ngraph::op::RoundingType::FLOOR); input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/pool"); new_ops.push_back(input.get_node_shared_ptr()); - input = std::make_shared(input, - ngraph::opset1::Constant::create(input.get_element_type(), ngraph::Shape{1}, {reduction_dims_count})); + input = std::make_shared( + input, + ngraph::opset1::Constant::create(input.get_element_type(), ngraph::Shape{1}, {reduction_dims_count})); input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name() + "/mul"); new_ops.push_back(input.get_node_shared_ptr()); @@ -253,8 +259,10 @@ ngraph::matcher_pass_callback ConvertReduceBase::convert_reduce_to_pooling() { } if (shape_end != input.get_shape()) { - input = std::make_shared(input, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{shape_end.size()}, shape_end), true); + input = std::make_shared( + input, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{shape_end.size()}, shape_end), + true); new_ops.push_back(input.get_node_shared_ptr()); } input.get_node_shared_ptr()->set_friendly_name(reduce->get_friendly_name()); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_scatter_elements_to_scatter.hpp b/src/common/transformations/include/transformations/op_conversions/convert_scatter_elements_to_scatter.hpp index 5d4a819fd6f..7e4a5e76ec5 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_scatter_elements_to_scatter.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_scatter_elements_to_scatter.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -23,7 +21,7 @@ class TRANSFORMATIONS_API ConvertScatterElementsToScatter; * @ingroup ie_transformation_common_api * @brief ConvertScatterElementsToScatter convert opset3::ScatterElementsUpdate to opset3::ScatterUpdate. */ -class ngraph::pass::ConvertScatterElementsToScatter: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertScatterElementsToScatter : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertScatterElementsToScatter(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp b/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp index 13cfaad76ee..e3cc75b5f57 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -28,7 +26,7 @@ class TRANSFORMATIONS_API ConvertSequenceToTensorIterator; * * */ -class ngraph::pass::ConvertRNNSequenceToTensorIterator: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertRNNSequenceToTensorIterator : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertRNNSequenceToTensorIterator(); @@ -40,7 +38,7 @@ public: * * */ -class ngraph::pass::ConvertGRUSequenceToTensorIterator: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertGRUSequenceToTensorIterator : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertGRUSequenceToTensorIterator(); @@ -52,7 +50,7 @@ public: * * */ -class ngraph::pass::ConvertLSTMSequenceToTensorIterator: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertLSTMSequenceToTensorIterator : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertLSTMSequenceToTensorIterator(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_shapeof3.hpp b/src/common/transformations/include/transformations/op_conversions/convert_shapeof3.hpp index 108d2a8cb8d..bd46611853a 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_shapeof3.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_shapeof3.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertShapeOf3; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertShapeOf3: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertShapeOf3 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertShapeOf3(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_shuffle_channels3.hpp b/src/common/transformations/include/transformations/op_conversions/convert_shuffle_channels3.hpp index 8080c20cf37..fe76ccc3ffc 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_shuffle_channels3.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_shuffle_channels3.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertShuffleChannels3; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertShuffleChannels3 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertShuffleChannels3(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_slice_to_strided_slice.hpp b/src/common/transformations/include/transformations/op_conversions/convert_slice_to_strided_slice.hpp index 0e6e00d42dd..1cc2521e66b 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_slice_to_strided_slice.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_slice_to_strided_slice.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { @@ -15,12 +15,11 @@ class TRANSFORMATIONS_API SliceToStridedSlice; } // namespace pass } // namespace ngraph - /** * @ingroup ie_transformation_common_api * @brief SliceToStridedSlice transformation convert v8::Slice to v1::StridedSlice */ -class ngraph::pass::SliceToStridedSlice: public ngraph::pass::MatcherPass { +class ngraph::pass::SliceToStridedSlice : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SliceToStridedSlice(bool use_shapes); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_softmax_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_softmax_downgrade.hpp index 380e44a3cfd..907576d7b92 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_softmax_downgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_softmax_downgrade.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_softmax_upgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_softmax_upgrade.hpp index 00c42ffeee6..603a036d3e1 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_softmax_upgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_softmax_upgrade.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_space_to_batch.hpp b/src/common/transformations/include/transformations/op_conversions/convert_space_to_batch.hpp index 7a1109526e4..574e93baa88 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_space_to_batch.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_space_to_batch.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -32,7 +30,7 @@ class TRANSFORMATIONS_API ConvertSpaceToBatch; * */ -class ngraph::pass::ConvertSpaceToBatch: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertSpaceToBatch : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; explicit ConvertSpaceToBatch(bool convert_by_elements = true) : MatcherPass() { diff --git a/src/common/transformations/include/transformations/op_conversions/convert_space_to_depth.hpp b/src/common/transformations/include/transformations/op_conversions/convert_space_to_depth.hpp index 2ab8be9350e..572f49ba92e 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_space_to_depth.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_space_to_depth.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertSpaceToDepth; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertSpaceToDepth: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertSpaceToDepth : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertSpaceToDepth(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_subtract.hpp b/src/common/transformations/include/transformations/op_conversions/convert_subtract.hpp index cb2cd33bc6a..cd18c4c279f 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_subtract.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_subtract.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertSubtract; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertSubtract: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertSubtract : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertSubtract(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp b/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp index 5aff6ed4276..8e13bc21997 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_ti_to_sequences.hpp @@ -4,15 +4,10 @@ #pragma once -#include #include - -#include -#include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -31,7 +26,7 @@ class TRANSFORMATIONS_API ConvertTensorIteratorToSequence; * converts this pattern to LSTMSequence layer and replaces them TensorIterator. */ -class ngraph::pass::ConvertTensorIteratorToLSTMSequence: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertTensorIteratorToLSTMSequence : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertTensorIteratorToLSTMSequence(); @@ -43,7 +38,7 @@ public: * converts this pattern to RNNSequence layer and replaces them TensorIterator. */ -class ngraph::pass::ConvertTensorIteratorToRNNSequence: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertTensorIteratorToRNNSequence : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertTensorIteratorToRNNSequence(); @@ -55,7 +50,7 @@ public: * converts this pattern to GRUSequence layer and replaces them TensorIterator. */ -class ngraph::pass::ConvertTensorIteratorToGRUSequence: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertTensorIteratorToGRUSequence : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertTensorIteratorToGRUSequence(); diff --git a/src/common/transformations/include/transformations/op_conversions/convert_topk3.hpp b/src/common/transformations/include/transformations/op_conversions/convert_topk3.hpp index 0de713b0456..c2575583f35 100644 --- a/src/common/transformations/include/transformations/op_conversions/convert_topk3.hpp +++ b/src/common/transformations/include/transformations/op_conversions/convert_topk3.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -19,7 +17,7 @@ class TRANSFORMATIONS_API ConvertTopK3; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertTopK3: public ngraph::pass::MatcherPass { +class ngraph::pass::ConvertTopK3 : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ConvertTopK3(); diff --git a/src/common/transformations/include/transformations/op_conversions/fq_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/fq_decomposition.hpp index 1f827a7de3f..ae0df4c61ec 100644 --- a/src/common/transformations/include/transformations/op_conversions/fq_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/fq_decomposition.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { @@ -25,14 +25,16 @@ class TRANSFORMATIONS_API FakeQuantizeDecomposition; * elif x > max(input_low, input_high): * output = output_high * else: - * output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + output_low + * output = round((x - input_low) / (input_high - input_low) * (levels-1)) / (levels-1) * (output_high - output_low) + + * output_low * * expand brackets into round: * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * div on (levels-1) and mult on (output_high - output_low) => mult on (output_high - output_low) / (levels-1) * * => - * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + output_low + * round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - + * output_low) / (levels-1) + output_low * * This transformation doesn't support following cases: * 1. At least one 'range' input is not Constant @@ -40,7 +42,7 @@ class TRANSFORMATIONS_API FakeQuantizeDecomposition; * */ -class ngraph::pass::FakeQuantizeDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::FakeQuantizeDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; FakeQuantizeDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp b/src/common/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp index 7b14086df32..cb63d082103 100644 --- a/src/common/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp +++ b/src/common/transformations/include/transformations/op_conversions/gather_normalize_negative_indices.hpp @@ -4,13 +4,13 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { - class TRANSFORMATIONS_API GatherNegativeConstIndicesNormalize; +class TRANSFORMATIONS_API GatherNegativeConstIndicesNormalize; } // namespace pass } // namespace ngraph diff --git a/src/common/transformations/include/transformations/op_conversions/gelu7_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/gelu7_downgrade.hpp index 7d122f0d937..9380426e505 100644 --- a/src/common/transformations/include/transformations/op_conversions/gelu7_downgrade.hpp +++ b/src/common/transformations/include/transformations/op_conversions/gelu7_downgrade.hpp @@ -4,13 +4,13 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { - class TRANSFORMATIONS_API Gelu7Downgrade; +class TRANSFORMATIONS_API Gelu7Downgrade; } // namespace pass } // namespace ngraph diff --git a/src/common/transformations/include/transformations/op_conversions/gru_cell_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/gru_cell_decomposition.hpp index f52b4de7e15..58508d22eb5 100644 --- a/src/common/transformations/include/transformations/op_conversions/gru_cell_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/gru_cell_decomposition.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -35,7 +33,7 @@ class TRANSFORMATIONS_API GRUCellDecomposition; * * */ -class ngraph::pass::GRUCellDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::GRUCellDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; GRUCellDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/hsigmoid_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/hsigmoid_decomposition.hpp index b4f559f1e86..194d224ca58 100644 --- a/src/common/transformations/include/transformations/op_conversions/hsigmoid_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/hsigmoid_decomposition.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { @@ -19,7 +19,7 @@ class TRANSFORMATIONS_API HSigmoidDecomposition; * @ingroup ie_transformation_common_api * @brief HSigmoidDecomposition transformation into sub-graph (min(Relu(x + 3), 6) * const(1/6). */ -class ngraph::pass::HSigmoidDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::HSigmoidDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSigmoidDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/hswish_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/hswish_decomposition.hpp index 7019ab371f5..f3a37120b07 100644 --- a/src/common/transformations/include/transformations/op_conversions/hswish_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/hswish_decomposition.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { @@ -19,7 +19,7 @@ class TRANSFORMATIONS_API HSwishDecomposition; * @ingroup ie_transformation_common_api * @brief HSwishDecomposition transformation into sub-graph x * (min(Relu(x + 3), 6) * const(1/6). */ -class ngraph::pass::HSwishDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::HSwishDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; HSwishDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/log_softmax_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/log_softmax_decomposition.hpp index 6963bc70830..b71278227a2 100644 --- a/src/common/transformations/include/transformations/op_conversions/log_softmax_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/log_softmax_decomposition.hpp @@ -4,13 +4,13 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { - class TRANSFORMATIONS_API LogSoftmaxDecomposition; +class TRANSFORMATIONS_API LogSoftmaxDecomposition; } // namespace pass } // namespace ngraph diff --git a/src/common/transformations/include/transformations/op_conversions/lstm_cell_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/lstm_cell_decomposition.hpp index 1ae74025b31..78d103abf8c 100644 --- a/src/common/transformations/include/transformations/op_conversions/lstm_cell_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/lstm_cell_decomposition.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -36,7 +34,7 @@ class TRANSFORMATIONS_API LSTMCellDecomposition; * * */ -class ngraph::pass::LSTMCellDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::LSTMCellDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; LSTMCellDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp index 26a63b11399..287856f90a1 100644 --- a/src/common/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/mvn6_decomposition.hpp @@ -4,13 +4,13 @@ #pragma once -#include #include +#include namespace ngraph { namespace pass { - class TRANSFORMATIONS_API MVN6Decomposition; +class TRANSFORMATIONS_API MVN6Decomposition; } // namespace pass } // namespace ngraph @@ -18,7 +18,8 @@ namespace pass { /** * @ingroup ie_transformation_common_api * @brief MVN6Decomposition transformation into sub-graph x - ReduceMean(x, axes) if normalize_variance is false and - * into sub-graph (x - ReduceMean(x, axes)) / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) if normalize_variance is true. + * into sub-graph (x - ReduceMean(x, axes)) / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) if normalize_variance is + * true. */ class ngraph::pass::MVN6Decomposition : public ngraph::pass::MatcherPass { public: diff --git a/src/common/transformations/include/transformations/op_conversions/normalize_l2_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/normalize_l2_decomposition.hpp index b62b4844f49..39dcf457e61 100644 --- a/src/common/transformations/include/transformations/op_conversions/normalize_l2_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/normalize_l2_decomposition.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -25,7 +24,7 @@ class TRANSFORMATIONS_API NormalizeL2Decomposition; * @ingroup ie_transformation_common_api * @brief Decomposes NormalizeL2 into subgraph */ -class ngraph::pass::NormalizeL2Decomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::NormalizeL2Decomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; NormalizeL2Decomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/reduce_l1_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/reduce_l1_decomposition.hpp index cde3b793c4f..79d151c2830 100644 --- a/src/common/transformations/include/transformations/op_conversions/reduce_l1_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/reduce_l1_decomposition.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -25,7 +24,7 @@ class TRANSFORMATIONS_API ReduceL1Decomposition; * @ingroup ie_transformation_common_api * @brief Decomposes ReduceL1 into ReduceSum(abs(x)). */ -class ngraph::pass::ReduceL1Decomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::ReduceL1Decomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ReduceL1Decomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/reduce_l2_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/reduce_l2_decomposition.hpp index a8abc08e88a..d1a5d784583 100644 --- a/src/common/transformations/include/transformations/op_conversions/reduce_l2_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/reduce_l2_decomposition.hpp @@ -4,13 +4,12 @@ #pragma once -#include #include - -#include - #include #include +#include +#include + #include "ngraph/pattern/matcher.hpp" namespace ngraph { @@ -25,7 +24,7 @@ class TRANSFORMATIONS_API ReduceL2Decomposition; * @ingroup ie_transformation_common_api * @brief Decomposes ReduceL2 into sqrt(ReduceSum(x * x)). */ -class ngraph::pass::ReduceL2Decomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::ReduceL2Decomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ReduceL2Decomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/rnn_cell_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/rnn_cell_decomposition.hpp index 1c01942b5a7..ecda79e3e4a 100644 --- a/src/common/transformations/include/transformations/op_conversions/rnn_cell_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/rnn_cell_decomposition.hpp @@ -4,12 +4,10 @@ #pragma once -#include #include - -#include - #include +#include +#include namespace ngraph { namespace pass { @@ -30,7 +28,7 @@ class TRANSFORMATIONS_API RNNCellDecomposition; * * */ -class ngraph::pass::RNNCellDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::RNNCellDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; RNNCellDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp b/src/common/transformations/include/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp index 00d95cbc85f..17d91ab33c2 100644 --- a/src/common/transformations/include/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp +++ b/src/common/transformations/include/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp @@ -5,10 +5,8 @@ #pragma once #include - -#include - #include +#include namespace ngraph { namespace pass { @@ -36,7 +34,7 @@ class TRANSFORMATIONS_API SimplifyCTCGreedyDecoderSeqLen; * * The transformation works only for case when the blank_index input == C-1, where C is the number of classes. */ -class ngraph::pass::SimplifyCTCGreedyDecoderSeqLen: public ngraph::pass::MatcherPass { +class ngraph::pass::SimplifyCTCGreedyDecoderSeqLen : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SimplifyCTCGreedyDecoderSeqLen(); diff --git a/src/common/transformations/include/transformations/op_conversions/softmax_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/softmax_decomposition.hpp index cce4b3f5a1b..c097a19d29c 100644 --- a/src/common/transformations/include/transformations/op_conversions/softmax_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/softmax_decomposition.hpp @@ -5,10 +5,8 @@ #pragma once #include - -#include - #include +#include namespace ngraph { namespace pass { @@ -68,7 +66,7 @@ class TRANSFORMATIONS_API SoftmaxDecomposition; * */ -class ngraph::pass::SoftmaxDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::SoftmaxDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SoftmaxDecomposition(); diff --git a/src/common/transformations/include/transformations/op_conversions/softplus_decomposition.hpp b/src/common/transformations/include/transformations/op_conversions/softplus_decomposition.hpp index 1f8a70ec3ca..8274a7da19c 100644 --- a/src/common/transformations/include/transformations/op_conversions/softplus_decomposition.hpp +++ b/src/common/transformations/include/transformations/op_conversions/softplus_decomposition.hpp @@ -4,11 +4,10 @@ #pragma once -#include #include - -#include #include +#include +#include namespace ngraph { namespace pass { @@ -23,7 +22,7 @@ class TRANSFORMATIONS_API SoftPlusDecomposition; * @brief SoftPlusDecomposition transformation replaces SoftPlus op to * group of operations: log(exp(x) + 1). */ -class ngraph::pass::SoftPlusDecomposition: public ngraph::pass::MatcherPass { +class ngraph::pass::SoftPlusDecomposition : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; SoftPlusDecomposition(); diff --git a/src/common/transformations/include/transformations/opset_conversions/convert_opset2_to_opset1.hpp b/src/common/transformations/include/transformations/opset_conversions/convert_opset2_to_opset1.hpp index d20b1364b98..7852fdc106c 100644 --- a/src/common/transformations/include/transformations/opset_conversions/convert_opset2_to_opset1.hpp +++ b/src/common/transformations/include/transformations/opset_conversions/convert_opset2_to_opset1.hpp @@ -5,8 +5,8 @@ #pragma once #include -#include #include +#include namespace ngraph { namespace pass { @@ -16,7 +16,7 @@ class TRANSFORMATIONS_API ConvertOpSet2ToOpSet1; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertOpSet2ToOpSet1: public ngraph::pass::FunctionPass { +class ngraph::pass::ConvertOpSet2ToOpSet1 : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/opset_conversions/convert_opset3_to_opset2.hpp b/src/common/transformations/include/transformations/opset_conversions/convert_opset3_to_opset2.hpp index 7cbc7c1db91..80313a4ee9d 100644 --- a/src/common/transformations/include/transformations/opset_conversions/convert_opset3_to_opset2.hpp +++ b/src/common/transformations/include/transformations/opset_conversions/convert_opset3_to_opset2.hpp @@ -5,8 +5,8 @@ #pragma once #include -#include #include +#include namespace ngraph { namespace pass { @@ -16,7 +16,7 @@ class TRANSFORMATIONS_API ConvertOpSet3ToOpSet2; } // namespace pass } // namespace ngraph -class ngraph::pass::ConvertOpSet3ToOpSet2: public ngraph::pass::FunctionPass { +class ngraph::pass::ConvertOpSet3ToOpSet2 : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/resolve_names_collisions.hpp b/src/common/transformations/include/transformations/resolve_names_collisions.hpp index 795efb59ff6..f87f74b7a5b 100644 --- a/src/common/transformations/include/transformations/resolve_names_collisions.hpp +++ b/src/common/transformations/include/transformations/resolve_names_collisions.hpp @@ -14,7 +14,7 @@ namespace pass { * @ingroup ie_transformation_common_api * @brief ResolveNameCollisions transformation helps to fix names collisions * if some internal nodes or nodes with autogenerated names have conflicts with other nodes from the original graph - * + * * Every transformation call can change the graph structure and create some additional operations, * autogenerated name is used if new operation doesn't have friendly name. * This transformations should be called after the transformation pipeline in order to fix names collisions. @@ -27,4 +27,3 @@ public: } // namespace pass } // namespace ov - diff --git a/src/common/transformations/include/transformations/rt_info/attributes.hpp b/src/common/transformations/include/transformations/rt_info/attributes.hpp index b52038f4138..8f500d956a8 100644 --- a/src/common/transformations/include/transformations/rt_info/attributes.hpp +++ b/src/common/transformations/include/transformations/rt_info/attributes.hpp @@ -9,19 +9,19 @@ #include #include #include -#include #include +#include #include +#include #include #include #include #include #include #include +#include #include #include -#include -#include #include #include @@ -36,7 +36,7 @@ public: private: template void register_factory() { - m_factory_registry.emplace(T::get_type_info_static(), [] () -> Any { + m_factory_registry.emplace(T::get_type_info_static(), []() -> Any { return T{}; }); } diff --git a/src/common/transformations/include/transformations/rt_info/decompression.hpp b/src/common/transformations/include/transformations/rt_info/decompression.hpp index 57753e81736..a7d9c0d106d 100644 --- a/src/common/transformations/include/transformations/rt_info/decompression.hpp +++ b/src/common/transformations/include/transformations/rt_info/decompression.hpp @@ -5,16 +5,16 @@ #pragma once #include + #include #include -#include #include +#include #include "openvino/core/node.hpp" #include "openvino/core/runtime_attribute.hpp" #include "transformations_visibility.hpp" - namespace ov { TRANSFORMATIONS_API void mark_as_decompression(const std::shared_ptr& node); @@ -34,9 +34,13 @@ public: Decompression() = default; - bool visit_attributes(AttributeVisitor& visitor) override { return true; } + bool visit_attributes(AttributeVisitor& visitor) override { + return true; + } - bool is_copyable() const override { return false; } + bool is_copyable() const override { + return false; + } }; } // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/disable_constant_folding.hpp b/src/common/transformations/include/transformations/rt_info/disable_constant_folding.hpp index e4e4bb8b526..50017e69048 100644 --- a/src/common/transformations/include/transformations/rt_info/disable_constant_folding.hpp +++ b/src/common/transformations/include/transformations/rt_info/disable_constant_folding.hpp @@ -5,21 +5,22 @@ #pragma once #include + #include #include -#include -#include - #include #include +#include +#include #include + #include "openvino/pass/constant_folding.hpp" namespace ov { -using pass::disable_constant_folding; -using pass::enable_constant_folding; using pass::constant_folding_is_disabled; +using pass::disable_constant_folding; using pass::DisableConstantFolding; +using pass::enable_constant_folding; } // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp b/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp index cfcfda69363..d245ce23d0c 100644 --- a/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp +++ b/src/common/transformations/include/transformations/rt_info/disable_fp16_compression.hpp @@ -8,7 +8,6 @@ #include "openvino/core/runtime_attribute.hpp" #include "transformations_visibility.hpp" - namespace ov { TRANSFORMATIONS_API void disable_fp16_compression(const std::shared_ptr& node); @@ -28,7 +27,9 @@ public: DisableFP16Compression() = default; - bool is_copyable() const override { return false; } + bool is_copyable() const override { + return false; + } }; } // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/fused_names_attribute.hpp b/src/common/transformations/include/transformations/rt_info/fused_names_attribute.hpp index 8e7d9a54629..38d17031e83 100644 --- a/src/common/transformations/include/transformations/rt_info/fused_names_attribute.hpp +++ b/src/common/transformations/include/transformations/rt_info/fused_names_attribute.hpp @@ -10,17 +10,17 @@ #pragma once #include + #include #include -#include -#include - +#include #include #include #include -#include -#include "openvino/core/runtime_attribute.hpp" +#include +#include +#include "openvino/core/runtime_attribute.hpp" namespace ngraph { @@ -44,7 +44,7 @@ public: * @brief Constructs a new object consisting of a single name * * @param[in] name The name */ - explicit FusedNames(const std::string &name) { + explicit FusedNames(const std::string& name) { fused_names.insert(name); } @@ -52,7 +52,7 @@ public: * @brief Unites current set of already fused names with another FusedNames object * @param[in] names Another object to fuse with */ - void fuseWith(const FusedNames &names); + void fuseWith(const FusedNames& names); /** * @brief return string with operation names separated by coma in alphabetical order @@ -79,7 +79,7 @@ public: * @brief getFusedNames return string with operation names separated by coma in alphabetical order * @param[in] node The node will be used to get FusedNames attribute */ -NGRAPH_API std::string getFusedNames(const std::shared_ptr & node); +NGRAPH_API std::string getFusedNames(const std::shared_ptr& node); /** * @ingroup ie_runtime_attr_api @@ -87,6 +87,6 @@ NGRAPH_API std::string getFusedNames(const std::shared_ptr & node) * @param[in] node The node will be used to get FusedNames attribute * @return vector of strings */ -NGRAPH_API std::vector getFusedNamesVector(const std::shared_ptr & node); +NGRAPH_API std::vector getFusedNamesVector(const std::shared_ptr& node); } // namespace ngraph diff --git a/src/common/transformations/include/transformations/rt_info/nms_selected_indices.hpp b/src/common/transformations/include/transformations/rt_info/nms_selected_indices.hpp index 9928eaad0dd..2cad2935b5e 100644 --- a/src/common/transformations/include/transformations/rt_info/nms_selected_indices.hpp +++ b/src/common/transformations/include/transformations/rt_info/nms_selected_indices.hpp @@ -5,27 +5,30 @@ #pragma once #include + #include #include -#include -#include - #include #include +#include +#include #include + #include "openvino/core/runtime_attribute.hpp" namespace ov { -TRANSFORMATIONS_API bool has_nms_selected_indices(const Node * node); +TRANSFORMATIONS_API bool has_nms_selected_indices(const Node* node); -TRANSFORMATIONS_API void set_nms_selected_indices(Node * node); +TRANSFORMATIONS_API void set_nms_selected_indices(Node* node); class TRANSFORMATIONS_API NmsSelectedIndices : ov::RuntimeAttribute { public: OPENVINO_RTTI("nms_selected_indices", "0"); NmsSelectedIndices() = default; - bool is_copyable() const override { return false; } + bool is_copyable() const override { + return false; + } }; } // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/nonconvertible_divide.hpp b/src/common/transformations/include/transformations/rt_info/nonconvertible_divide.hpp index 4c29ee2ccc4..b1f78f7e0e7 100644 --- a/src/common/transformations/include/transformations/rt_info/nonconvertible_divide.hpp +++ b/src/common/transformations/include/transformations/rt_info/nonconvertible_divide.hpp @@ -8,7 +8,6 @@ #include "openvino/core/runtime_attribute.hpp" #include "transformations_visibility.hpp" - namespace ov { TRANSFORMATIONS_API void disable_divide_conversion(const std::shared_ptr& node); @@ -28,7 +27,9 @@ public: NonconvertibleDivide() = default; - bool is_copyable() const override { return false; } + bool is_copyable() const override { + return false; + } }; } // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/preprocessing_attribute.hpp b/src/common/transformations/include/transformations/rt_info/preprocessing_attribute.hpp index 9d45c93bd76..91752f275cd 100644 --- a/src/common/transformations/include/transformations/rt_info/preprocessing_attribute.hpp +++ b/src/common/transformations/include/transformations/rt_info/preprocessing_attribute.hpp @@ -23,6 +23,8 @@ class NGRAPH_API PreprocessingAttribute : public ov::RuntimeAttribute { public: OPENVINO_RTTI("preprocessing", "0"); PreprocessingAttribute() = default; - bool visit_attributes(AttributeVisitor& visitor) override { return true; }; + bool visit_attributes(AttributeVisitor& visitor) override { + return true; + }; }; -} // namespace ov +} // namespace ov diff --git a/src/common/transformations/include/transformations/rt_info/primitives_priority_attribute.hpp b/src/common/transformations/include/transformations/rt_info/primitives_priority_attribute.hpp index d5b4f984849..9042ad540e6 100644 --- a/src/common/transformations/include/transformations/rt_info/primitives_priority_attribute.hpp +++ b/src/common/transformations/include/transformations/rt_info/primitives_priority_attribute.hpp @@ -10,13 +10,13 @@ #pragma once #include + #include #include -#include -#include - #include #include +#include +#include #include "openvino/core/runtime_attribute.hpp" @@ -26,7 +26,7 @@ namespace ov { * @brief getPrimitivesPriority return string with primitive priorities value * @param[in] node The node will be used to get PrimitivesPriority attribute */ -NGRAPH_API std::string getPrimitivesPriority(const std::shared_ptr & node); +NGRAPH_API std::string getPrimitivesPriority(const std::shared_ptr& node); class NGRAPH_API PrimitivesPriority : public ov::RuntimeAttribute { public: @@ -34,11 +34,11 @@ public: PrimitivesPriority() = default; - PrimitivesPriority(const std::string &value) : value(value) {} + PrimitivesPriority(const std::string& value) : value(value) {} - Any merge(const ngraph::NodeVector & nodes) const override; + Any merge(const ngraph::NodeVector& nodes) const override; - bool visit_attributes(AttributeVisitor & visitor) override; + bool visit_attributes(AttributeVisitor& visitor) override; std::string to_string() const override; diff --git a/src/common/transformations/include/transformations/rt_info/strides_property.hpp b/src/common/transformations/include/transformations/rt_info/strides_property.hpp index 682d1150a11..5204543687a 100644 --- a/src/common/transformations/include/transformations/rt_info/strides_property.hpp +++ b/src/common/transformations/include/transformations/rt_info/strides_property.hpp @@ -4,10 +4,11 @@ #pragma once -#include #include +#include #include #include + #include "openvino/core/runtime_attribute.hpp" namespace ov { @@ -21,4 +22,4 @@ public: StridesPropagation(const ngraph::Strides& value) : value{value} {} ngraph::Strides value; }; -} // namespace ov +} // namespace ov diff --git a/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp b/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp index 6c0f449f9ea..8826269fc05 100644 --- a/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/broadcast_const_range_replacement.hpp @@ -4,9 +4,8 @@ #pragma once -#include - #include +#include namespace ngraph { namespace pass { @@ -18,10 +17,11 @@ class TRANSFORMATIONS_API BroadcastConstRangeReplacement; /** * @ingroup ie_transformation_common_api - * @brief BroadcastConstRangeReplacement replaces Constant filled with range values starting from 0 and replaces it with Range op + * @brief BroadcastConstRangeReplacement replaces Constant filled with range values starting from 0 and replaces it with + * Range op */ -class ngraph::pass::BroadcastConstRangeReplacement: public ngraph::pass::MatcherPass { +class ngraph::pass::BroadcastConstRangeReplacement : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; BroadcastConstRangeReplacement(); diff --git a/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp b/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp index f949e8aa52c..6491f1a7f98 100644 --- a/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/matmul_sr.hpp @@ -4,9 +4,8 @@ #pragma once -#include #include - +#include #include namespace ngraph { @@ -27,17 +26,17 @@ class NGRAPH_API TransposeMatMul; * - MatMul(any_input, Reshape(any_input, any_input)) */ -class ngraph::pass::ReshapeAMatMul: public ngraph::pass::MatcherPass { +class ngraph::pass::ReshapeAMatMul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ReshapeAMatMul(); }; -class ngraph::pass::ReshapeBMatMul: public ngraph::pass::MatcherPass { +class ngraph::pass::ReshapeBMatMul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; ReshapeBMatMul(); }; -class ngraph::pass::TransposeMatMul: public ngraph::pass::MatcherPass { +class ngraph::pass::TransposeMatMul : public ngraph::pass::MatcherPass { public: NGRAPH_RTTI_DECLARATION; TransposeMatMul(); diff --git a/src/common/transformations/include/transformations/smart_reshape/mimic_set_batch_size.hpp b/src/common/transformations/include/transformations/smart_reshape/mimic_set_batch_size.hpp index 050016f8880..6c326a5b39d 100644 --- a/src/common/transformations/include/transformations/smart_reshape/mimic_set_batch_size.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/mimic_set_batch_size.hpp @@ -6,9 +6,8 @@ #include #include -#include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp b/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp index df9d2c467fa..876312acf64 100644 --- a/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/proposal_scales_stridedslice.hpp @@ -4,10 +4,9 @@ #pragma once -#include #include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp b/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp index e7608b55a33..dda1f18d654 100644 --- a/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/reshape_to_1D.hpp @@ -4,10 +4,9 @@ #pragma once -#include #include - #include +#include namespace ngraph { namespace pass { diff --git a/src/common/transformations/include/transformations/smart_reshape/set_batch_size.hpp b/src/common/transformations/include/transformations/smart_reshape/set_batch_size.hpp index b306ce479c4..c4f7fb79fce 100644 --- a/src/common/transformations/include/transformations/smart_reshape/set_batch_size.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/set_batch_size.hpp @@ -4,10 +4,9 @@ #pragma once -#include #include - #include +#include namespace ngraph { namespace pass { @@ -22,7 +21,7 @@ class NGRAPH_API SetBatchSize; * @brief Generic caller for all the transformations responsible to make model reshape-able by batch dimension */ -class ngraph::pass::SetBatchSize: public ngraph::pass::FunctionPass { +class ngraph::pass::SetBatchSize : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/smart_reshape/smart_reshape.hpp b/src/common/transformations/include/transformations/smart_reshape/smart_reshape.hpp index 45c1ff1bcc7..7893fe065f8 100644 --- a/src/common/transformations/include/transformations/smart_reshape/smart_reshape.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/smart_reshape.hpp @@ -4,10 +4,9 @@ #pragma once -#include #include - #include +#include namespace ngraph { namespace pass { @@ -17,7 +16,7 @@ class NGRAPH_API SmartReshape; } // namespace pass } // namespace ngraph -class ngraph::pass::SmartReshape: public ngraph::pass::FunctionPass { +class ngraph::pass::SmartReshape : public ngraph::pass::FunctionPass { public: NGRAPH_RTTI_DECLARATION; bool run_on_model(const std::shared_ptr& m) override; diff --git a/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp b/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp index b8985cad652..b6d3fa907a5 100644 --- a/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp +++ b/src/common/transformations/include/transformations/smart_reshape/strided_slice_squeeze.hpp @@ -4,10 +4,9 @@ #pragma once -#include #include - #include +#include namespace ngraph { namespace pass { @@ -21,7 +20,8 @@ class NGRAPH_API SharedSqueeze; /** * @ingroup ie_transformation_common_api - * @brief StridedSliceSqueeze transformation looks for SS -> Squeeze and corrects SS inputs and attributes for SS output to be squeeze-able + * @brief StridedSliceSqueeze transformation looks for SS -> Squeeze and corrects SS inputs and attributes for SS output + * to be squeeze-able */ class ngraph::pass::StridedSliceSqueeze : public ngraph::pass::MatcherPass { @@ -32,7 +32,8 @@ public: /** * @ingroup ie_transformation_common_api - * @brief StridedSliceSqueeze transformation looks for Squeeze -> SSe and corrects SS inputs and attributes for SS output to be squeeze-able + * @brief StridedSliceSqueeze transformation looks for Squeeze -> SSe and corrects SS inputs and attributes for SS + * output to be squeeze-able */ class ngraph::pass::SqueezeStridedSlice : public ngraph::pass::MatcherPass { @@ -43,7 +44,8 @@ public: /** * @ingroup ie_transformation_common_api - * @brief SharedSqueeze transformation looks for shared Squeezes and leaves only one Squeeze reconnecting all the outputs to it + * @brief SharedSqueeze transformation looks for shared Squeezes and leaves only one Squeeze reconnecting all the + * outputs to it */ class ngraph::pass::SharedSqueeze : public ngraph::pass::FunctionPass { diff --git a/src/common/transformations/include/transformations/utils/utils.hpp b/src/common/transformations/include/transformations/utils/utils.hpp index a6c017e1e30..f62b7e51f43 100644 --- a/src/common/transformations/include/transformations/utils/utils.hpp +++ b/src/common/transformations/include/transformations/utils/utils.hpp @@ -4,32 +4,32 @@ #pragma once -#include -#include #include -#include -#include -#include -#include +#include +#include +#include #include +#include #include #include #include - -#include -#include #include +#include +#include #include +#include +#include namespace ngraph { namespace op { namespace util { template -bool normalize_single_value(std::vector vec, float & value) { - for (const auto & val : vec) { - if (val != *vec.begin()) return false; +bool normalize_single_value(std::vector vec, float& value) { + for (const auto& val : vec) { + if (val != *vec.begin()) + return false; } float ref_val = static_cast(*vec.begin()); @@ -43,8 +43,8 @@ bool normalize_single_value(std::vector vec, float & value) { } template -bool has_op_with_type(const std::shared_ptr &function) { - for (const auto & op : function->get_ops()) { +bool has_op_with_type(const std::shared_ptr& function) { + for (const auto& op : function->get_ops()) { if (std::dynamic_pointer_cast(op)) { return true; } @@ -104,21 +104,18 @@ bool has_constant_value(const std::shared_ptr& node, return false; } - const bool is_scalar_or_single_elem = is_scalar(constant->get_shape()) || - shape_size(constant->get_shape()) == 1; + const bool is_scalar_or_single_elem = is_scalar(constant->get_shape()) || shape_size(constant->get_shape()) == 1; if (!is_scalar_or_single_elem) { return false; } - if (constant->get_element_type() == ngraph::element::f16 || - constant->get_element_type() == ngraph::element::f32 || - constant->get_element_type() == ngraph::element::f64 || - constant->get_element_type() == ngraph::element::bf16) { - const auto data = constant->cast_vector(); - if (std::fabs(data[0] - value) > epsilon) { - return false; - } - } else { + if (constant->get_element_type() == ngraph::element::f16 || constant->get_element_type() == ngraph::element::f32 || + constant->get_element_type() == ngraph::element::f64 || constant->get_element_type() == ngraph::element::bf16) { + const auto data = constant->cast_vector(); + if (std::fabs(data[0] - value) > epsilon) { + return false; + } + } else { const auto data = constant->cast_vector(); if (data[0] != value) { return false; @@ -143,44 +140,47 @@ bool has_constant_value(const std::shared_ptr& node, const auto const_values = constant->cast_vector(); - if (constant->get_element_type() == ngraph::element::f16 || - constant->get_element_type() == ngraph::element::f32 || - constant->get_element_type() == ngraph::element::f64 || - constant->get_element_type() == ngraph::element::bf16) { - return std::equal(const_values.cbegin(), const_values.cend(), values.cbegin(), - [&] (T lhs, T rhs) { return std::fabs(lhs - rhs) < epsilon; }); + if (constant->get_element_type() == ngraph::element::f16 || constant->get_element_type() == ngraph::element::f32 || + constant->get_element_type() == ngraph::element::f64 || constant->get_element_type() == ngraph::element::bf16) { + return std::equal(const_values.cbegin(), const_values.cend(), values.cbegin(), [&](T lhs, T rhs) { + return std::fabs(lhs - rhs) < epsilon; + }); } return const_values == values; } -TRANSFORMATIONS_API bool get_single_value(const std::shared_ptr & const_node, float & value); +TRANSFORMATIONS_API bool get_single_value(const std::shared_ptr& const_node, float& value); -TRANSFORMATIONS_API std::shared_ptr normalize_constant(const std::shared_ptr & constant, - const PartialShape & shape); +TRANSFORMATIONS_API std::shared_ptr normalize_constant(const std::shared_ptr& constant, + const PartialShape& shape); TRANSFORMATIONS_API std::shared_ptr broadcastTo(const Output& input, const Shape& shape); -TRANSFORMATIONS_API std::shared_ptr reshapeTo(const Output & input, const Shape& shape); +TRANSFORMATIONS_API std::shared_ptr reshapeTo(const Output& input, const Shape& shape); -TRANSFORMATIONS_API bool constantIsEqualTo(const std::shared_ptr& const_node, float value, float eps = 1e-5); +TRANSFORMATIONS_API bool constantIsEqualTo(const std::shared_ptr& const_node, + float value, + float eps = 1e-5); -TRANSFORMATIONS_API bool has_f16_constants(const std::shared_ptr &function); +TRANSFORMATIONS_API bool has_f16_constants(const std::shared_ptr& function); -TRANSFORMATIONS_API bool check_for_broadcast(const ngraph::PartialShape &ref_shape, const ngraph::PartialShape &other_shape); +TRANSFORMATIONS_API bool check_for_broadcast(const ngraph::PartialShape& ref_shape, + const ngraph::PartialShape& other_shape); TRANSFORMATIONS_API std::shared_ptr activation(const std::string& activation_name, const ngraph::Output& apply_to); -TRANSFORMATIONS_API bool is_seq_len_provided(const std::shared_ptr &seq_len_input, int64_t max_seq_len); +TRANSFORMATIONS_API bool is_seq_len_provided(const std::shared_ptr& seq_len_input, int64_t max_seq_len); TRANSFORMATIONS_API std::shared_ptr try_fold_unary_output(const std::shared_ptr& node); TRANSFORMATIONS_API std::shared_ptr clone_try_fold(const std::shared_ptr& node, const OutputVector& inputs); -TRANSFORMATIONS_API bool shapes_equal_except_dynamic_expected_batch(const ngraph::PartialShape& expected, const ngraph::PartialShape& actual); +TRANSFORMATIONS_API bool shapes_equal_except_dynamic_expected_batch(const ngraph::PartialShape& expected, + const ngraph::PartialShape& actual); -TRANSFORMATIONS_API void visit_shape_path(ov::Node * node, +TRANSFORMATIONS_API void visit_shape_path(ov::Node* node, std::unordered_set& visited, std::function func); @@ -191,7 +191,7 @@ std::shared_ptr make_try_fold(Args&&... args) { } template -Output eltwise_fold(const Output & input0, const Output & input1) { +Output eltwise_fold(const Output& input0, const Output& input1) { auto eltwise = std::make_shared(input0, input1); OutputVector output(eltwise->get_output_size()); if (!eltwise->constant_fold(output, {input0, input1})) { @@ -206,14 +206,18 @@ Output eltwise_fold(const Output & input0, const Output & inpu TRANSFORMATIONS_API std::vector> get_node_target_inputs(const std::shared_ptr& node); TRANSFORMATIONS_API std::shared_ptr node_to_get_shape_value_of_indices_from_shape_node( - const std::shared_ptr& shape_node, const std::vector& indices); + const std::shared_ptr& shape_node, + const std::vector& indices); TRANSFORMATIONS_API std::shared_ptr node_to_get_shape_value_of_indices_from_shape_source( - const ngraph::Output& shape_source, const std::vector& indices); + const ngraph::Output& shape_source, + const std::vector& indices); TRANSFORMATIONS_API bool is_dequantization_subgraph(const Output& node); -TRANSFORMATIONS_API bool can_eliminate_eltwise_node(const std::shared_ptr& eltwise, const Output& constant, const Output& non_constant_input); +TRANSFORMATIONS_API bool can_eliminate_eltwise_node(const std::shared_ptr& eltwise, + const Output& constant, + const Output& non_constant_input); } // namespace util } // namespace op } // namespace ngraph diff --git a/src/common/transformations/src/ngraph_ops/nms_ie_internal.cpp b/src/common/transformations/src/ngraph_ops/nms_ie_internal.cpp index 12dcbea44c5..8af20ba8a86 100644 --- a/src/common/transformations/src/ngraph_ops/nms_ie_internal.cpp +++ b/src/common/transformations/src/ngraph_ops/nms_ie_internal.cpp @@ -2,10 +2,11 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include #include "ngraph_ops/nms_ie_internal.hpp" + +#include +#include + #include "itt.hpp" using namespace std; @@ -22,9 +23,11 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out bool sort_result_descending, const ngraph::element::Type& output_type, const ngraph::element::Type& score_output_type) - : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}), - m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type), - m_scores_output_type(score_output_type) { + : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold}), + m_center_point_box(center_point_box), + m_sort_result_descending(sort_result_descending), + m_output_type(output_type), + m_scores_output_type(score_output_type) { constructor_validate_and_infer_types(); } @@ -38,22 +41,36 @@ op::internal::NonMaxSuppressionIEInternal::NonMaxSuppressionIEInternal(const Out bool sort_result_descending, const ngraph::element::Type& output_type, const ngraph::element::Type& score_output_type) - : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}), - m_center_point_box(center_point_box), m_sort_result_descending(sort_result_descending), m_output_type(output_type), - m_scores_output_type(score_output_type) { + : Op({boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, soft_nms_sigma}), + m_center_point_box(center_point_box), + m_sort_result_descending(sort_result_descending), + m_output_type(output_type), + m_scores_output_type(score_output_type) { constructor_validate_and_infer_types(); } -std::shared_ptr op::internal::NonMaxSuppressionIEInternal::clone_with_new_inputs(const ngraph::OutputVector &new_args) const { +std::shared_ptr op::internal::NonMaxSuppressionIEInternal::clone_with_new_inputs( + const ngraph::OutputVector& new_args) const { INTERNAL_OP_SCOPE(internal_NonMaxSuppressionIEInternal_clone_with_new_inputs); if (new_args.size() == 6) { - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), - new_args.at(4), new_args.at(5), m_center_point_box, m_sort_result_descending, - m_output_type); + return make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + new_args.at(5), + m_center_point_box, + m_sort_result_descending, + m_output_type); } else if (new_args.size() == 5) { - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3), - new_args.at(4), m_center_point_box, m_sort_result_descending, - m_output_type); + return make_shared(new_args.at(0), + new_args.at(1), + new_args.at(2), + new_args.at(3), + new_args.at(4), + m_center_point_box, + m_sort_result_descending, + m_output_type); } throw ngraph::ngraph_error("Unsupported number of inputs: " + std::to_string(new_args.size())); } @@ -104,8 +121,7 @@ void op::internal::NonMaxSuppressionIEInternal::validate_and_infer_types() { const auto num_classes = scores_ps[1].get_length(); const auto max_output_boxes_per_class = max_boxes_output_from_input(); - out_shape[0] = std::min(num_boxes, max_output_boxes_per_class) * num_classes * - scores_ps[0].get_length(); + out_shape[0] = std::min(num_boxes, max_output_boxes_per_class) * num_classes * scores_ps[0].get_length(); } } diff --git a/src/common/transformations/src/ngraph_ops/nms_static_shape_ie.cpp b/src/common/transformations/src/ngraph_ops/nms_static_shape_ie.cpp index de4b2609eeb..db1669e186d 100644 --- a/src/common/transformations/src/ngraph_ops/nms_static_shape_ie.cpp +++ b/src/common/transformations/src/ngraph_ops/nms_static_shape_ie.cpp @@ -2,10 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - +// clang-format off #include "ngraph/ops.hpp" #include "ngraph_ops/nms_static_shape_ie.hpp" +// clang-format on + +#include namespace ngraph { namespace op { diff --git a/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp index 427993670d1..9811947e555 100644 --- a/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/add_fake_quantize_fusion.cpp @@ -3,17 +3,16 @@ // #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::AddFakeQuantizeFusion, "AddFakeQuantizeFusion", 0); @@ -21,8 +20,8 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() { MATCHER_SCOPE(AddFakeQuantizeFusion); auto input_pattern = ngraph::pattern::any_input(); auto const_pattern = ngraph::pattern::wrap_type(); - auto add_pattern = ngraph::pattern::wrap_type({input_pattern, const_pattern}, - pattern::consumers_count(1)); + auto add_pattern = + ngraph::pattern::wrap_type({input_pattern, const_pattern}, pattern::consumers_count(1)); auto fq_pattern = ngraph::pattern::wrap_type({add_pattern, ngraph::pattern::any_input(), ngraph::pattern::any_input(), @@ -34,11 +33,13 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() { const auto& type = input.get_element_type(); if (type.bitwidth() < element::f32.bitwidth()) return false; - auto fq = std::dynamic_pointer_cast(pattern_value_map.at(fq_pattern).get_node_shared_ptr()); + auto fq = + std::dynamic_pointer_cast(pattern_value_map.at(fq_pattern).get_node_shared_ptr()); if (!fq) return false; const auto& add_node = pattern_value_map.at(add_pattern).get_node_shared_ptr(); - auto add_const = std::dynamic_pointer_cast(pattern_value_map.at(const_pattern).get_node_shared_ptr()); + auto add_const = + std::dynamic_pointer_cast(pattern_value_map.at(const_pattern).get_node_shared_ptr()); if (!add_const) return false; @@ -71,8 +72,10 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() { if (diff > 0) { // Reshape constants like (C, 1, 1) to (1, C, 1, 1) const_shape.insert(const_shape.begin(), diff, 1); - new_const = std::make_shared(new_const, - op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false); + new_const = std::make_shared( + new_const, + op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), + false); } // disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1) @@ -84,23 +87,21 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() { // Convolution+Add or MatMul+Add can be fused later // so don't fuse Add+FQ in that situation const auto& add_inputs = add_node->input_values(); - bool add_parent_is_conv_or_mm = std::any_of(add_inputs.begin(), add_inputs.end(), - [] (const Output& node) -> bool { - auto node_ptr = node.get_node(); - return is_type(node_ptr) || - is_type(node_ptr) || - is_type(node_ptr) || - is_type(node_ptr) || - is_type(node_ptr); - }); + bool add_parent_is_conv_or_mm = + std::any_of(add_inputs.begin(), add_inputs.end(), [](const Output& node) -> bool { + auto node_ptr = node.get_node(); + return is_type(node_ptr) || is_type(node_ptr) || + is_type(node_ptr) || + is_type(node_ptr) || is_type(node_ptr); + }); if (add_parent_is_conv_or_mm) return false; auto fq_users = fq->get_users(); // Concat LPT transformation supports per tensor quantization only - bool fq_user_is_concat = std::any_of(fq_users.begin(), fq_users.end(), - [] (const std::shared_ptr node_ptr) -> bool { - return is_type(node_ptr); - }); + bool fq_user_is_concat = + std::any_of(fq_users.begin(), fq_users.end(), [](const std::shared_ptr node_ptr) -> bool { + return is_type(node_ptr); + }); if (fq_user_is_concat) return false; } @@ -113,11 +114,8 @@ ngraph::pass::AddFakeQuantizeFusion::AddFakeQuantizeFusion() { std::shared_ptr new_input_high = get_constant_from_source(input_high_sub); if (!new_input_high) new_input_high = input_high_sub; - auto new_fq = fq->clone_with_new_inputs({input, - new_input_low, - new_input_high, - fq->input_value(3), - fq->input_value(4)}); + auto new_fq = + fq->clone_with_new_inputs({input, new_input_low, new_input_high, fq->input_value(3), fq->input_value(4)}); if (transformation_callback(new_fq)) return false; register_new_node(new_fq); diff --git a/src/common/transformations/src/transformations/common_optimizations/align_eltwise_input_ranks.cpp b/src/common/transformations/src/transformations/common_optimizations/align_eltwise_input_ranks.cpp index cc3ad56fc6c..7dc91d29333 100644 --- a/src/common/transformations/src/transformations/common_optimizations/align_eltwise_input_ranks.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/align_eltwise_input_ranks.cpp @@ -34,8 +34,9 @@ ngraph::pass::AlignEltwiseInputRanks::AlignEltwiseInputRanks() { // So we skip extending Multiply's constant input rank here. if (ov::is_type(node)) { auto inputs = node->input_values(); - if (std::any_of(inputs.begin(), inputs.end(), - [] (const Output& input) -> bool { return ov::is_type(input.get_node()); })) + if (std::any_of(inputs.begin(), inputs.end(), [](const Output& input) -> bool { + return ov::is_type(input.get_node()); + })) return false; } diff --git a/src/common/transformations/src/transformations/common_optimizations/batch_to_space_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/batch_to_space_fusion.cpp index f1ed56be0f8..ae1e21371e2 100644 --- a/src/common/transformations/src/transformations/common_optimizations/batch_to_space_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/batch_to_space_fusion.cpp @@ -3,68 +3,79 @@ // #include "transformations/common_optimizations/batch_to_space_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::BatchToSpaceFusion, "BatchToSpaceFusion", 0); ngraph::pass::BatchToSpaceFusion::BatchToSpaceFusion() { MATCHER_SCOPE(BatchToSpaceFusion); auto data_pattern = pattern::any_input(pattern::has_static_shape()); - auto reshape_before_pattern = pattern::wrap_type({data_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto trans_before_pattern = pattern::wrap_type({data_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto reshape_or_transpose_before_pattern = std::make_shared(OutputVector{reshape_before_pattern, trans_before_pattern}); + auto reshape_before_pattern = + pattern::wrap_type({data_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto trans_before_pattern = + pattern::wrap_type({data_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto reshape_or_transpose_before_pattern = + std::make_shared(OutputVector{reshape_before_pattern, trans_before_pattern}); auto depth_to_space_pattern = pattern::wrap_type({reshape_or_transpose_before_pattern}); auto starts_pattern = pattern::wrap_type(); auto ends_pattern = pattern::wrap_type(); - auto slice_pattern = pattern::wrap_type({depth_to_space_pattern, starts_pattern, ends_pattern, - pattern::wrap_type()}); - auto reshape_after_pattern = pattern::wrap_type({slice_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto trans_after_pattern = pattern::wrap_type({slice_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto reshape_or_transpose_after_pattern = std::make_shared(OutputVector{reshape_after_pattern, trans_after_pattern}); + auto slice_pattern = pattern::wrap_type( + {depth_to_space_pattern, starts_pattern, ends_pattern, pattern::wrap_type()}); + auto reshape_after_pattern = + pattern::wrap_type({slice_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto trans_after_pattern = + pattern::wrap_type({slice_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto reshape_or_transpose_after_pattern = + std::make_shared(OutputVector{reshape_after_pattern, trans_after_pattern}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); - auto get_reshape_or_transpose = [&pattern_map] (const std::shared_ptr& reshape_pattern, - const std::shared_ptr& trans_pattern) -> std::shared_ptr { + auto get_reshape_or_transpose = [&pattern_map]( + const std::shared_ptr& reshape_pattern, + const std::shared_ptr& trans_pattern) -> std::shared_ptr { if (pattern_map.count(reshape_pattern)) return pattern_map.at(reshape_pattern).get_node_shared_ptr(); if (pattern_map.count(trans_pattern)) return pattern_map.at(trans_pattern).get_node_shared_ptr(); return nullptr; }; - auto check_input_output_shape = [] (const std::shared_ptr& node) -> bool { + auto check_input_output_shape = [](const std::shared_ptr& node) -> bool { const auto& input_shape = node->get_input_shape(0); const auto& output_shape = node->get_output_shape(0); // Transpose permutation has to be [1, 0, 2, 3] - return input_shape[0] == output_shape[1] && - input_shape[1] == output_shape[0] && - input_shape[2] == output_shape[2] && - input_shape[3] == output_shape[3]; + return input_shape[0] == output_shape[1] && input_shape[1] == output_shape[0] && + input_shape[2] == output_shape[2] && input_shape[3] == output_shape[3]; }; - std::shared_ptr reshape_or_trans_before = get_reshape_or_transpose(reshape_before_pattern, trans_before_pattern); + std::shared_ptr reshape_or_trans_before = + get_reshape_or_transpose(reshape_before_pattern, trans_before_pattern); if (!reshape_or_trans_before) return false; if (!check_input_output_shape(reshape_or_trans_before)) return false; - std::shared_ptr reshape_or_trans_after = get_reshape_or_transpose(reshape_after_pattern, trans_after_pattern); + std::shared_ptr reshape_or_trans_after = + get_reshape_or_transpose(reshape_after_pattern, trans_after_pattern); if (!reshape_or_trans_after) return false; if (!check_input_output_shape(reshape_or_trans_after)) return false; - auto depth_to_space = std::dynamic_pointer_cast(pattern_map.at(depth_to_space_pattern).get_node_shared_ptr()); + auto depth_to_space = std::dynamic_pointer_cast( + pattern_map.at(depth_to_space_pattern).get_node_shared_ptr()); if (!depth_to_space) return false; if (depth_to_space->get_mode() != opset6::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST) @@ -73,8 +84,8 @@ ngraph::pass::BatchToSpaceFusion::BatchToSpaceFusion() { if (dts_shape.size() != 4) return false; auto block_size = static_cast(depth_to_space->get_block_size()); - auto block_shape = op::Constant::create(element::i64, Shape{4}, - std::vector{1, 1, block_size, block_size}); + auto block_shape = + op::Constant::create(element::i64, Shape{4}, std::vector{1, 1, block_size, block_size}); auto starts = std::dynamic_pointer_cast(pattern_map.at(starts_pattern).get_node_shared_ptr()); if (!starts) return false; @@ -99,15 +110,16 @@ ngraph::pass::BatchToSpaceFusion::BatchToSpaceFusion() { } auto crops_begin = op::Constant::create(element::i64, Shape{4}, starts_value); auto crops_end = op::Constant::create(element::i64, Shape{4}, ends_value); - auto batch_to_space = register_new_node(pattern_map.at(data_pattern), block_shape, crops_begin, crops_end); + auto batch_to_space = register_new_node(pattern_map.at(data_pattern), + block_shape, + crops_begin, + crops_end); batch_to_space->set_friendly_name(reshape_or_trans_after->get_friendly_name()); - copy_runtime_info({ - reshape_or_trans_before, - depth_to_space, - pattern_map.at(slice_pattern).get_node_shared_ptr(), - reshape_or_trans_after - }, + copy_runtime_info({reshape_or_trans_before, + depth_to_space, + pattern_map.at(slice_pattern).get_node_shared_ptr(), + reshape_or_trans_after}, batch_to_space); replace_node(reshape_or_trans_after, batch_to_space); diff --git a/src/common/transformations/src/transformations/common_optimizations/binarize_weights.cpp b/src/common/transformations/src/transformations/common_optimizations/binarize_weights.cpp index 3a5d88b7368..69c0fd0802e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/binarize_weights.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/binarize_weights.cpp @@ -3,20 +3,19 @@ // #include "transformations/common_optimizations/binarize_weights.hpp" -#include "itt.hpp" #include -#include - #include #include #include +#include + +#include "itt.hpp" using namespace ngraph; NGRAPH_RTTI_DEFINITION(pass::BinarizeWeights, "BinarizeWeights", 0); - static float quantize(float f, float input_low, float input_high, float output_low, float output_high) { if (f <= input_low) return output_low; @@ -25,10 +24,14 @@ static float quantize(float f, float input_low, float input_high, float output_l return std::round((f - input_low) / (input_high - input_low)) * (output_high - output_low) + output_low; } - -static std::vector quantize_weights(const Shape& weights_shape, std::vector& weights, - Shape input_low_high_shape, const std::vector& input_low, const std::vector& input_high, - Shape output_low_high_shape, const std::vector& output_low, const std::vector& output_high) { +static std::vector quantize_weights(const Shape& weights_shape, + std::vector& weights, + Shape input_low_high_shape, + const std::vector& input_low, + const std::vector& input_high, + Shape output_low_high_shape, + const std::vector& output_low, + const std::vector& output_high) { NGRAPH_CHECK(shape_size(input_low_high_shape) == 1 || shape_size(input_low_high_shape) == weights_shape[0]); NGRAPH_CHECK(shape_size(output_low_high_shape) == 1 || shape_size(output_low_high_shape) == weights_shape[0]); size_t out_feat_off = 1; @@ -38,54 +41,54 @@ static std::vector quantize_weights(const Shape& weights_shape, std::vect std::vector out; out.reserve(shape_size(weights_shape)); - auto get_idx = [out_feat_off] (size_t i, const Shape& shape) -> size_t { + auto get_idx = [out_feat_off](size_t i, const Shape& shape) -> size_t { return (i / out_feat_off) % shape[0]; }; for (size_t i = 0; i < shape_size(weights_shape); i++) { size_t in_idx = get_idx(i, input_low_high_shape); size_t out_idx = get_idx(i, output_low_high_shape); - out.push_back(quantize(weights[i], input_low[in_idx], input_high[in_idx], output_low[out_idx], output_high[out_idx])); + out.push_back( + quantize(weights[i], input_low[in_idx], input_high[in_idx], output_low[out_idx], output_high[out_idx])); } return out; } - pass::BinarizeWeights::BinarizeWeights() { MATCHER_SCOPE(BinarizeWeights); - auto activations_fq_pattern = pattern::wrap_type( - {pattern::any_input(), - pattern::wrap_type(), - pattern::wrap_type(), - pattern::wrap_type(), - pattern::wrap_type()}, - pattern::consumers_count(1)); - auto weights_fq_pattern = pattern::wrap_type( - {pattern::wrap_type(), - pattern::wrap_type(), - pattern::wrap_type(), - pattern::wrap_type(), - pattern::wrap_type()}, - pattern::consumers_count(1)); + auto activations_fq_pattern = pattern::wrap_type({pattern::any_input(), + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type()}, + pattern::consumers_count(1)); + auto weights_fq_pattern = pattern::wrap_type({pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type(), + pattern::wrap_type()}, + pattern::consumers_count(1)); auto conv_pattern = pattern::wrap_type({activations_fq_pattern, weights_fq_pattern}); - matcher_pass_callback callback = [=](pattern::Matcher &m) { + matcher_pass_callback callback = [=](pattern::Matcher& m) { auto conv = std::dynamic_pointer_cast(m.get_match_root()); if (!conv) return false; - auto activations_fq = std::dynamic_pointer_cast(conv->input_value(0).get_node_shared_ptr()); + auto activations_fq = + std::dynamic_pointer_cast(conv->input_value(0).get_node_shared_ptr()); if (!activations_fq || activations_fq->get_levels() != 2) return false; auto weights_fq = std::dynamic_pointer_cast(conv->input_value(1).get_node_shared_ptr()); if (!weights_fq || weights_fq->get_levels() != 2) return false; - auto weights_const = std::dynamic_pointer_cast(weights_fq->input_value(0).get_node_shared_ptr()); + auto weights_const = + std::dynamic_pointer_cast(weights_fq->input_value(0).get_node_shared_ptr()); if (!weights_const) return false; - auto check_output_low_high = [] (const std::vector& output_low, - const std::vector& output_high) -> std::tuple { + auto check_output_low_high = [](const std::vector& output_low, + const std::vector& output_high) -> std::tuple { bool output_low_is_zero = true; bool output_low_high_are_opposite = true; for (size_t i = 0; i < output_low.size(); i++) { @@ -95,8 +98,10 @@ pass::BinarizeWeights::BinarizeWeights() { return std::tuple{output_low_is_zero, output_low_high_are_opposite}; }; - auto activations_output_low_const = std::dynamic_pointer_cast(activations_fq->input_value(3).get_node_shared_ptr()); - auto activations_output_high_const = std::dynamic_pointer_cast(activations_fq->input_value(4).get_node_shared_ptr()); + auto activations_output_low_const = + std::dynamic_pointer_cast(activations_fq->input_value(3).get_node_shared_ptr()); + auto activations_output_high_const = + std::dynamic_pointer_cast(activations_fq->input_value(4).get_node_shared_ptr()); if (!activations_output_low_const || !activations_output_high_const) return false; @@ -105,17 +110,21 @@ pass::BinarizeWeights::BinarizeWeights() { bool act_out_low_high_are_opposite = false; auto activations_output_low = activations_output_low_const->cast_vector(); auto activations_output_high = activations_output_high_const->cast_vector(); - std::tie(act_out_low_is_zero, act_out_low_high_are_opposite) = check_output_low_high(activations_output_low, - activations_output_high); + std::tie(act_out_low_is_zero, act_out_low_high_are_opposite) = + check_output_low_high(activations_output_low, activations_output_high); if (!(act_out_low_high_are_opposite || act_out_low_is_zero)) return false; - auto weights_input_low_const = std::dynamic_pointer_cast(weights_fq->input_value(1).get_node_shared_ptr()); - auto weights_input_high_const = std::dynamic_pointer_cast(weights_fq->input_value(2).get_node_shared_ptr()); + auto weights_input_low_const = + std::dynamic_pointer_cast(weights_fq->input_value(1).get_node_shared_ptr()); + auto weights_input_high_const = + std::dynamic_pointer_cast(weights_fq->input_value(2).get_node_shared_ptr()); if (!weights_input_low_const || !weights_input_high_const) return false; - auto weights_output_low_const = std::dynamic_pointer_cast(weights_fq->input_value(3).get_node_shared_ptr()); - auto weights_output_high_const = std::dynamic_pointer_cast(weights_fq->input_value(4).get_node_shared_ptr()); + auto weights_output_low_const = + std::dynamic_pointer_cast(weights_fq->input_value(3).get_node_shared_ptr()); + auto weights_output_high_const = + std::dynamic_pointer_cast(weights_fq->input_value(4).get_node_shared_ptr()); if (!weights_output_low_const || !weights_output_high_const) return false; @@ -123,13 +132,13 @@ pass::BinarizeWeights::BinarizeWeights() { bool weights_out_low_high_are_opposite = false; auto weights_output_low = weights_output_low_const->cast_vector(); auto weights_output_high = weights_output_high_const->cast_vector(); - std::tie(std::ignore, weights_out_low_high_are_opposite) = check_output_low_high(weights_output_low, - weights_output_high); + std::tie(std::ignore, weights_out_low_high_are_opposite) = + check_output_low_high(weights_output_low, weights_output_high); if (!weights_out_low_high_are_opposite) return false; // Normalize output low and high to either (0, 1) or (-1, 1) - auto normalize_output_low_high = [] (std::vector& output_low, std::vector& output_high) { + auto normalize_output_low_high = [](std::vector& output_low, std::vector& output_high) { for (size_t i = 0; i < output_low.size(); i++) { output_low[i] /= output_high[i]; output_high[i] = 1.0f; @@ -144,9 +153,11 @@ pass::BinarizeWeights::BinarizeWeights() { const std::shared_ptr& weights_norm_factor = weights_output_high_const; // Create new FQ on activations with new output low/high - auto output_low_normalized = op::Constant::create(element::f32, activations_output_low_const->get_shape(), activations_output_low); + auto output_low_normalized = + op::Constant::create(element::f32, activations_output_low_const->get_shape(), activations_output_low); output_low_normalized->set_friendly_name(activations_output_low_const->get_friendly_name()); - auto output_high_normalized = op::Constant::create(element::f32, activations_output_high_const->get_shape(), activations_output_high); + auto output_high_normalized = + op::Constant::create(element::f32, activations_output_high_const->get_shape(), activations_output_high); output_high_normalized->set_friendly_name(activations_output_high_const->get_friendly_name()); auto new_activations_fq = activations_fq->clone_with_new_inputs({activations_fq->input_value(0), activations_fq->input_value(1), @@ -159,25 +170,35 @@ pass::BinarizeWeights::BinarizeWeights() { auto weights = weights_const->cast_vector(); auto weights_input_low = weights_input_low_const->cast_vector(); auto weights_input_high = weights_input_high_const->cast_vector(); - auto quantized_weights = quantize_weights(weights_const->get_shape(), weights, - weights_input_low_const->get_shape(), weights_input_low, weights_input_high, - weights_output_low_const->get_shape(), weights_output_low, weights_output_high); - auto quantized_weights_const = op::Constant::create(element::f32, weights_const->get_shape(), quantized_weights); + auto quantized_weights = quantize_weights(weights_const->get_shape(), + weights, + weights_input_low_const->get_shape(), + weights_input_low, + weights_input_high, + weights_output_low_const->get_shape(), + weights_output_low, + weights_output_high); + auto quantized_weights_const = + op::Constant::create(element::f32, weights_const->get_shape(), quantized_weights); quantized_weights_const->set_friendly_name(weights_const->get_friendly_name()); auto new_conv = conv->clone_with_new_inputs({new_activations_fq, quantized_weights_const}); std::vector norm_factor_shape = {-1}; for (size_t i = 2; i < weights_const->get_shape().size(); i++) norm_factor_shape.push_back(1); - auto norm_factor_shape_const = opset5::Constant::create(element::i64, Shape{norm_factor_shape.size()}, norm_factor_shape); + auto norm_factor_shape_const = + opset5::Constant::create(element::i64, Shape{norm_factor_shape.size()}, norm_factor_shape); - auto activations_norm_factor_reshaped = std::make_shared(activations_norm_factor, norm_factor_shape_const, false); + auto activations_norm_factor_reshaped = + std::make_shared(activations_norm_factor, norm_factor_shape_const, false); auto mul = std::make_shared(new_conv, activations_norm_factor_reshaped); - auto weights_norm_factor_reshaped = std::make_shared(weights_norm_factor, norm_factor_shape_const, false); + auto weights_norm_factor_reshaped = + std::make_shared(weights_norm_factor, norm_factor_shape_const, false); auto mul2 = std::make_shared(mul, weights_norm_factor_reshaped); - copy_runtime_info({activations_fq, weights_fq, conv}, - {new_activations_fq, new_conv, activations_norm_factor_reshaped, mul, weights_norm_factor_reshaped, mul2}); + copy_runtime_info( + {activations_fq, weights_fq, conv}, + {new_activations_fq, new_conv, activations_norm_factor_reshaped, mul, weights_norm_factor_reshaped, mul2}); mul2->set_friendly_name(conv->get_friendly_name()); replace_node(conv, mul2); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/broadcast_elementwise_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/broadcast_elementwise_fusion.cpp index 4107dfa8eb8..e6fcba2ab90 100644 --- a/src/common/transformations/src/transformations/common_optimizations/broadcast_elementwise_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/broadcast_elementwise_fusion.cpp @@ -2,13 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp" #include #include #include +#include "itt.hpp" + NGRAPH_RTTI_DEFINITION(ngraph::pass::BroadcastElementwiseFusion, "BroadcastElementwiseFusion", 0); namespace { @@ -26,32 +27,33 @@ bool can_eliminate_broadcast(const ngraph::Output& eltwise, // Broadcast since eltwise_input will broadcast another eltwise input automatically. auto broadcast_input = broadcast.get_node()->get_input_node_shared_ptr(1); if ((ov::is_type(broadcast_input) || - ov::is_type(broadcast_input)) && + ov::is_type(broadcast_input)) && broadcast_input->input_value(0) == eltwise_input) { return true; } - const auto & input_shape = eltwise_input.get_partial_shape(); - const auto & broadcast_shape = broadcast.get_partial_shape(); + const auto& input_shape = eltwise_input.get_partial_shape(); + const auto& broadcast_shape = broadcast.get_partial_shape(); if (input_shape.rank().is_dynamic() || broadcast_shape.rank().is_dynamic()) { return false; } - const int64_t & input_shape_rank = input_shape.rank().get_length(); - const int64_t & broadcast_shape_rank = broadcast_shape.rank().get_length(); + const int64_t& input_shape_rank = input_shape.rank().get_length(); + const int64_t& broadcast_shape_rank = broadcast_shape.rank().get_length(); if (broadcast_shape_rank > input_shape_rank) { - //We can not eliminate broadcast op because - //in the case input_shape will be broadcasted + // We can not eliminate broadcast op because + // in the case input_shape will be broadcasted return false; } - for (int64_t i_dim = input_shape_rank - 1, b_dim = broadcast_shape_rank - 1; i_dim >= 0 && b_dim >=0; --i_dim, --b_dim) { + for (int64_t i_dim = input_shape_rank - 1, b_dim = broadcast_shape_rank - 1; i_dim >= 0 && b_dim >= 0; + --i_dim, --b_dim) { if (input_shape[i_dim].is_static() && broadcast_shape[b_dim].is_static()) { - const auto &input_shape_dim = input_shape[i_dim].get_length(); - const auto &broadcast_shape_dim = broadcast_shape[b_dim].get_length(); + const auto& input_shape_dim = input_shape[i_dim].get_length(); + const auto& broadcast_shape_dim = broadcast_shape[b_dim].get_length(); if (input_shape_dim != broadcast_shape_dim && broadcast_shape_dim != 1) { - //We can not eliminate broadcast op because - //input_shape will be broadcast + // We can not eliminate broadcast op because + // input_shape will be broadcast return false; } } else if (input_shape[i_dim].is_dynamic() && broadcast_shape[i_dim].is_static() && @@ -67,23 +69,24 @@ bool can_eliminate_broadcast(const ngraph::Output& eltwise, return true; } -} // namespace +} // namespace ngraph::pass::BroadcastElementwiseFusion::BroadcastElementwiseFusion() { MATCHER_SCOPE(BroadcastElementwiseFusion); auto broadcast_input = pattern::any_input(); - auto broadcast = pattern::wrap_type({broadcast_input, pattern::any_input()}, pattern::consumers_count(1)); + auto broadcast = pattern::wrap_type({broadcast_input, pattern::any_input()}, + pattern::consumers_count(1)); auto eltwise_input = pattern::any_input(); auto eltwise = pattern::wrap_type({eltwise_input, broadcast}); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - auto & pattern_value = m.get_pattern_value_map(); + auto& pattern_value = m.get_pattern_value_map(); - const auto & m_eltwise_input = pattern_value.at(eltwise_input); - const auto & m_eltwise = pattern_value.at(eltwise); + const auto& m_eltwise_input = pattern_value.at(eltwise_input); + const auto& m_eltwise = pattern_value.at(eltwise); - const auto & m_broadcast_input = pattern_value.at(broadcast_input); - auto & m_broadcast = pattern_value.at(broadcast); + const auto& m_broadcast_input = pattern_value.at(broadcast_input); + auto& m_broadcast = pattern_value.at(broadcast); if (!can_eliminate_broadcast(m_eltwise, m_eltwise_input, m_broadcast)) { return false; diff --git a/src/common/transformations/src/transformations/common_optimizations/clamp_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/clamp_fusion.cpp index 6521150788b..af19f56319e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/clamp_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/clamp_fusion.cpp @@ -3,16 +3,16 @@ // #include "transformations/common_optimizations/clamp_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ClampFusion, "ClampFusion", 0); @@ -21,21 +21,25 @@ ngraph::pass::ClampFusion::ClampFusion() { auto data_pattern = ngraph::pattern::any_input(); auto min_const_pattern = ngraph::pattern::wrap_type(); auto max_const_pattern = ngraph::pattern::wrap_type(); - auto max_pattern1 = ngraph::pattern::wrap_type({data_pattern, min_const_pattern}, pattern::consumers_count(1)); + auto max_pattern1 = + ngraph::pattern::wrap_type({data_pattern, min_const_pattern}, pattern::consumers_count(1)); auto min_pattern1 = ngraph::pattern::wrap_type({max_pattern1, max_const_pattern}); auto min_pattern2 = ngraph::pattern::wrap_type({data_pattern, max_const_pattern}); - auto max_pattern2 = ngraph::pattern::wrap_type({min_pattern2, min_const_pattern}, pattern::consumers_count(1)); + auto max_pattern2 = + ngraph::pattern::wrap_type({min_pattern2, min_const_pattern}, pattern::consumers_count(1)); auto root = std::make_shared(ngraph::OutputVector{min_pattern1, max_pattern2}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto pattern_map = m.get_pattern_value_map(); auto data = pattern_map.at(data_pattern); - auto min_const = std::dynamic_pointer_cast(pattern_map.at(min_const_pattern).get_node_shared_ptr()); + auto min_const = + std::dynamic_pointer_cast(pattern_map.at(min_const_pattern).get_node_shared_ptr()); if (!min_const) return false; if (shape_size(min_const->get_shape()) != 1) return false; - auto max_const = std::dynamic_pointer_cast(pattern_map.at(max_const_pattern).get_node_shared_ptr()); + auto max_const = + std::dynamic_pointer_cast(pattern_map.at(max_const_pattern).get_node_shared_ptr()); if (!max_const) return false; if (shape_size(max_const->get_shape()) != 1) diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 2e4d2edca4c..69147c85af5 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -2,106 +2,105 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include "transformations/init_node_info.hpp" -#include "itt.hpp" -#include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp" -#include "transformations/common_optimizations/nop_elimination.hpp" #include "transformations/common_optimizations/common_optimizations.hpp" -#include "transformations/common_optimizations/conv_mul_fusion.hpp" -#include "transformations/common_optimizations/fq_mul_fusion.hpp" -#include "transformations/common_optimizations/fq_reshape_fusion.hpp" -#include "transformations/common_optimizations/gelu_fusion.hpp" -#include "transformations/common_optimizations/depth_to_space_fusion.hpp" -#include "transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp" -#include "transformations/common_optimizations/optimize_strided_slice.hpp" -#include "transformations/common_optimizations/softplus_fusion.hpp" -#include "transformations/common_optimizations/softplus_to_mish_fusion.hpp" -#include "transformations/common_optimizations/swish_fusion.hpp" -#include "transformations/common_optimizations/normalize_l2_fusion.hpp" -#include "transformations/common_optimizations/pull_transpose_through_fq.hpp" -#include "transformations/common_optimizations/leaky_relu_fusion.hpp" -#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" -#include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp" -#include "transformations/common_optimizations/hsigmoid_fusion.hpp" -#include "transformations/common_optimizations/hswish_fusion.hpp" -#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" -#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" -#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp" -#include "transformations/common_optimizations/random_uniform_fusion.hpp" -#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" -#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" -#include "transformations/common_optimizations/clamp_fusion.hpp" -#include "transformations/common_optimizations/pad_fusion.hpp" -#include "transformations/common_optimizations/eliminate_unsqueeze_gather.hpp" -#include "transformations/common_optimizations/shuffle_channels_fusion.hpp" -#include "transformations/common_optimizations/softmax_fusion.hpp" -#include "transformations/common_optimizations/mvn_fusion.hpp" -#include "transformations/common_optimizations/binarize_weights.hpp" -#include "transformations/common_optimizations/conv_to_binary_conv.hpp" -#include "transformations/common_optimizations/space_to_batch_fusion.hpp" -#include "transformations/common_optimizations/batch_to_space_fusion.hpp" -#include "transformations/common_optimizations/dilated_convolution_converter.hpp" -#include "transformations/common_optimizations/transpose_sinking.hpp" -#include "transformations/common_optimizations/split_squeeze_concat_fusion.hpp" -#include "transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp" -#include "transformations/common_optimizations/transpose_to_reshape.hpp" -#include "transformations/common_optimizations/strides_optimization.hpp" -#include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" -#include "transformations/common_optimizations/mul_conv_fusion.hpp" -#include "transformations/common_optimizations/interpolate_sequence_fusion.hpp" -#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" -#include -#include "transformations/common_optimizations/matmul_multiply_fusion.hpp" -#include "transformations/common_optimizations/mark_precision_sensitive_divides.hpp" -#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" -#include "transformations/common_optimizations/concat_reduce_fusion.hpp" -#include "transformations/op_conversions/convert_pad_to_group_conv.hpp" -#include "transformations/op_conversions/convert_divide.hpp" -#include "transformations/op_conversions/convert_gather_downgrade.hpp" -#include "transformations/op_conversions/convert_gather_upgrade.hpp" -#include "transformations/op_conversions/convert_mod.hpp" -#include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp" -#include "transformations/op_conversions/convert_negative.hpp" -#include "transformations/op_conversions/convert_scatter_elements_to_scatter.hpp" -#include "transformations/op_conversions/convert_reduce_to_pooling.hpp" -#include "transformations/op_conversions/convert_subtract.hpp" -#include "transformations/op_conversions/convert_softmax_downgrade.hpp" -#include "transformations/op_conversions/convert_softmax_upgrade.hpp" -#include "transformations/op_conversions/convert_depth_to_space.hpp" -#include "transformations/op_conversions/convert_space_to_depth.hpp" -#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" -#include "transformations/op_conversions/convert_gelu.hpp" -#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" -#include "transformations/op_conversions/detection_output_downgrade.hpp" -#include "transformations/op_conversions/detection_output_upgrade.hpp" -#include "transformations/op_conversions/batch_norm_decomposition.hpp" -#include "transformations/op_conversions/einsum_decomposition.hpp" -#include "transformations/op_conversions/gelu7_downgrade.hpp" -#include "transformations/op_conversions/reduce_l1_decomposition.hpp" -#include "transformations/op_conversions/reduce_l2_decomposition.hpp" -#include "transformations/op_conversions/hswish_decomposition.hpp" -#include "transformations/op_conversions/hsigmoid_decomposition.hpp" -#include "transformations/op_conversions/log_softmax_decomposition.hpp" -#include "transformations/op_conversions/mvn6_decomposition.hpp" -#include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp" -#include "transformations/op_conversions/gather_normalize_negative_indices.hpp" -#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" -#include "transformations/op_conversions/convert_maxpool_downgrade.hpp" -#include "transformations/op_conversions/convert_maxpool_upgrade.hpp" -#include "transformations/disable_decompression_convert_constant_folding.hpp" -#include "transformations/op_conversions/convert_prior_box_v8_to_v0.hpp" -#include +#include #include -#include +#include +#include #include - +#include +#include #include #include #include -#include + +#include "itt.hpp" +#include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/batch_to_space_fusion.hpp" +#include "transformations/common_optimizations/binarize_weights.hpp" +#include "transformations/common_optimizations/broadcast_elementwise_fusion.hpp" +#include "transformations/common_optimizations/clamp_fusion.hpp" +#include "transformations/common_optimizations/concat_reduce_fusion.hpp" +#include "transformations/common_optimizations/conv_mul_fusion.hpp" +#include "transformations/common_optimizations/conv_to_binary_conv.hpp" +#include "transformations/common_optimizations/convert_compression_only_to_legacy.hpp" +#include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" +#include "transformations/common_optimizations/convert_quantize_dequantize.hpp" +#include "transformations/common_optimizations/depth_to_space_fusion.hpp" +#include "transformations/common_optimizations/dilated_convolution_converter.hpp" +#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp" +#include "transformations/common_optimizations/dropout_with_random_uniform_replacer.hpp" +#include "transformations/common_optimizations/eliminate_unsqueeze_gather.hpp" +#include "transformations/common_optimizations/fq_mul_fusion.hpp" +#include "transformations/common_optimizations/fq_reshape_fusion.hpp" +#include "transformations/common_optimizations/gelu_fusion.hpp" +#include "transformations/common_optimizations/hsigmoid_fusion.hpp" +#include "transformations/common_optimizations/hswish_fusion.hpp" +#include "transformations/common_optimizations/interpolate_sequence_fusion.hpp" +#include "transformations/common_optimizations/leaky_relu_fusion.hpp" +#include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" +#include "transformations/common_optimizations/mark_precision_sensitive_divides.hpp" +#include "transformations/common_optimizations/matmul_multiply_fusion.hpp" +#include "transformations/common_optimizations/mul_conv_fusion.hpp" +#include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/mvn_fusion.hpp" +#include "transformations/common_optimizations/nop_elimination.hpp" +#include "transformations/common_optimizations/normalize_l2_fusion.hpp" +#include "transformations/common_optimizations/optimize_strided_slice.hpp" +#include "transformations/common_optimizations/pad_fusion.hpp" +#include "transformations/common_optimizations/pull_transpose_through_fq.hpp" +#include "transformations/common_optimizations/random_uniform_fusion.hpp" +#include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp" +#include "transformations/common_optimizations/shuffle_channels_fusion.hpp" +#include "transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp" +#include "transformations/common_optimizations/softmax_fusion.hpp" +#include "transformations/common_optimizations/softplus_fusion.hpp" +#include "transformations/common_optimizations/softplus_to_mish_fusion.hpp" +#include "transformations/common_optimizations/space_to_batch_fusion.hpp" +#include "transformations/common_optimizations/split_squeeze_concat_fusion.hpp" +#include "transformations/common_optimizations/strides_optimization.hpp" +#include "transformations/common_optimizations/swish_fusion.hpp" +#include "transformations/common_optimizations/transpose_sinking.hpp" +#include "transformations/common_optimizations/transpose_to_reshape.hpp" +#include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "transformations/init_node_info.hpp" +#include "transformations/op_conversions/batch_norm_decomposition.hpp" +#include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" +#include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" +#include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" +#include "transformations/op_conversions/convert_depth_to_space.hpp" +#include "transformations/op_conversions/convert_divide.hpp" +#include "transformations/op_conversions/convert_gather_downgrade.hpp" +#include "transformations/op_conversions/convert_gather_upgrade.hpp" +#include "transformations/op_conversions/convert_gelu.hpp" +#include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" +#include "transformations/op_conversions/convert_maxpool_downgrade.hpp" +#include "transformations/op_conversions/convert_maxpool_upgrade.hpp" +#include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp" +#include "transformations/op_conversions/convert_mod.hpp" +#include "transformations/op_conversions/convert_negative.hpp" +#include "transformations/op_conversions/convert_pad_to_group_conv.hpp" +#include "transformations/op_conversions/convert_prior_box_v8_to_v0.hpp" +#include "transformations/op_conversions/convert_reduce_to_pooling.hpp" +#include "transformations/op_conversions/convert_scatter_elements_to_scatter.hpp" +#include "transformations/op_conversions/convert_softmax_downgrade.hpp" +#include "transformations/op_conversions/convert_softmax_upgrade.hpp" +#include "transformations/op_conversions/convert_space_to_depth.hpp" +#include "transformations/op_conversions/convert_subtract.hpp" +#include "transformations/op_conversions/detection_output_downgrade.hpp" +#include "transformations/op_conversions/detection_output_upgrade.hpp" +#include "transformations/op_conversions/einsum_decomposition.hpp" +#include "transformations/op_conversions/gather_normalize_negative_indices.hpp" +#include "transformations/op_conversions/gelu7_downgrade.hpp" +#include "transformations/op_conversions/hsigmoid_decomposition.hpp" +#include "transformations/op_conversions/hswish_decomposition.hpp" +#include "transformations/op_conversions/log_softmax_decomposition.hpp" +#include "transformations/op_conversions/mvn6_decomposition.hpp" +#include "transformations/op_conversions/reduce_l1_decomposition.hpp" +#include "transformations/op_conversions/reduce_l2_decomposition.hpp" +#include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::CommonOptimizations, "CommonOptimizations", 0); diff --git a/src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp b/src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp index 4df6241a92c..f576115556f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp @@ -4,14 +4,13 @@ #include "transformations/common_optimizations/compress_float_constants.hpp" -#include "openvino/opsets/opset8.hpp" +#include "itt.hpp" #include "ngraph/rt_info.hpp" +#include "openvino/opsets/opset8.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/rt_info/decompression.hpp" #include "transformations/rt_info/disable_fp16_compression.hpp" #include "transformations/rt_info/old_api_map_element_type_attribute.hpp" -#include "itt.hpp" - namespace { template @@ -40,7 +39,8 @@ std::shared_ptr change_constant_precision_to_fp16(std::shared_ptr( - const_node_pattern.get_node_shared_ptr()); + auto const_node = std::dynamic_pointer_cast(const_node_pattern.get_node_shared_ptr()); if (!const_node) return false; diff --git a/src/common/transformations/src/transformations/common_optimizations/concat_reduce_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/concat_reduce_fusion.cpp index d4b6ff35387..a3b3c18ef60 100644 --- a/src/common/transformations/src/transformations/common_optimizations/concat_reduce_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/concat_reduce_fusion.cpp @@ -3,27 +3,25 @@ // #include "transformations/common_optimizations/concat_reduce_fusion.hpp" -#include "transformations/common_optimizations/nop_elimination.hpp" -#include "transformations/utils/utils.hpp" #include -#include - #include -#include -#include #include #include -#include "itt.hpp" +#include +#include +#include +#include "itt.hpp" +#include "transformations/common_optimizations/nop_elimination.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ReplaceConcatReduceByMinOrMax, "ReplaceConcatReduceByMinOrMax", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::PullSqueezeThroughEltwise, "PullSqueezeThroughEltwise", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::ConcatReduceFusion, "ConcatReduceFusion", 0); - namespace { -enum class ReduceType {NONE, MAX, MIN}; +enum class ReduceType { NONE, MAX, MIN }; ReduceType get_reduce_type(const std::shared_ptr& reduce_node) { if (ov::is_type(reduce_node)) { @@ -34,7 +32,7 @@ ReduceType get_reduce_type(const std::shared_ptr& reduce_node) { return ReduceType::NONE; } } -} // namespace +} // namespace ngraph::pass::PullSqueezeThroughEltwise::PullSqueezeThroughEltwise() { MATCHER_SCOPE(PullSqueezeThroughEltwise); @@ -87,13 +85,15 @@ ngraph::pass::ReplaceConcatReduceByMinOrMax::ReplaceConcatReduceByMinOrMax() { auto concat_pattern = ngraph::pattern::wrap_type({pattern::any_input(), pattern::any_input()}); auto reduce_axes_pattern = ngraph::pattern::wrap_type(); - auto reduce_pattern = ngraph::pattern::wrap_type({concat_pattern, reduce_axes_pattern}); + auto reduce_pattern = + ngraph::pattern::wrap_type({concat_pattern, reduce_axes_pattern}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); auto concat = as_type_ptr(pattern_map.at(concat_pattern).get_node_shared_ptr()); - auto reduce = as_type_ptr(pattern_map.at(reduce_pattern).get_node_shared_ptr()); + auto reduce = + as_type_ptr(pattern_map.at(reduce_pattern).get_node_shared_ptr()); if (!reduce || !concat) return false; @@ -118,7 +118,8 @@ ngraph::pass::ReplaceConcatReduceByMinOrMax::ReplaceConcatReduceByMinOrMax() { copy_runtime_info({concat, reduce}, result_node); if (!reduce->get_keep_dims()) { - const auto squeeze_axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, {}, {*reduction_axes.begin()}); + const auto squeeze_axis_node = + ngraph::opset8::Constant::create(ngraph::element::i64, {}, {*reduction_axes.begin()}); result_node = register_new_node(result_node, squeeze_axis_node); copy_runtime_info({concat, reduce}, result_node); } diff --git a/src/common/transformations/src/transformations/common_optimizations/conv_mul_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/conv_mul_fusion.cpp index b737d34d06b..bbe836325de 100644 --- a/src/common/transformations/src/transformations/common_optimizations/conv_mul_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/conv_mul_fusion.cpp @@ -3,18 +3,17 @@ // #include "transformations/common_optimizations/conv_mul_fusion.hpp" -#include "itt.hpp" #include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include - -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvolutionMultiplyFusion, "ConvolutionMultiplyFusion", 0); @@ -26,18 +25,18 @@ ngraph::pass::ConvolutionMultiplyFusion::ConvolutionMultiplyFusion() { auto mul_const = ngraph::pattern::wrap_type(pattern::has_static_shape()); auto mul = ngraph::pattern::wrap_type({conv, mul_const}); - matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher & m) -> bool { - const auto & pattern_to_output = m.get_pattern_value_map(); + matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher& m) -> bool { + const auto& pattern_to_output = m.get_pattern_value_map(); - const auto & m_weights = pattern_to_output.at(weights); - const auto & m_const = pattern_to_output.at(mul_const); - const auto & m_input = pattern_to_output.at(input); - const auto & m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); - const auto & m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); + const auto& m_weights = pattern_to_output.at(weights); + const auto& m_const = pattern_to_output.at(mul_const); + const auto& m_input = pattern_to_output.at(input); + const auto& m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); + const auto& m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); - const auto & channel_dim = m_weights.get_partial_shape()[0].get_length(); - const auto & weights_rank = m_weights.get_partial_shape().rank().get_length(); - const auto & const_shape = m_const.get_shape(); + const auto& channel_dim = m_weights.get_partial_shape()[0].get_length(); + const auto& weights_rank = m_weights.get_partial_shape().rank().get_length(); + const auto& const_shape = m_const.get_shape(); bool is_scalar_multiplier(shape_size(const_shape) == 1); @@ -60,9 +59,12 @@ ngraph::pass::ConvolutionMultiplyFusion::ConvolutionMultiplyFusion() { if (!is_scalar_multiplier) { auto final_const_shape = Shape(weights_rank, 1); final_const_shape[0] = channel_dim; - final_const = std::make_shared(m_const, - opset4::Constant::create(ngraph::element::i64, ngraph::Shape{final_const_shape.size()}, - final_const_shape), true); + final_const = + std::make_shared(m_const, + opset4::Constant::create(ngraph::element::i64, + ngraph::Shape{final_const_shape.size()}, + final_const_shape), + true); } // Multiply convolution weights with aligned Constant values @@ -87,22 +89,22 @@ ngraph::pass::GroupConvolutionMultiplyFusion::GroupConvolutionMultiplyFusion() { auto input = pattern::any_input(); auto weights = ngraph::pattern::any_input(pattern::has_static_dims({0, 1}) /* has GOIYX layout */); auto conv = ngraph::pattern::wrap_type({input, weights}, pattern::consumers_count(1)); - auto mul_const = ngraph::pattern::wrap_type();//pattern::has_static_shape()); + auto mul_const = ngraph::pattern::wrap_type(); // pattern::has_static_shape()); auto mul = ngraph::pattern::wrap_type({conv, mul_const}); - matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher & m) -> bool { - const auto & pattern_to_output = m.get_pattern_value_map(); + matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher& m) -> bool { + const auto& pattern_to_output = m.get_pattern_value_map(); auto m_weights = pattern_to_output.at(weights); - const auto & m_const = pattern_to_output.at(mul_const); - const auto & m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); - const auto & m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); + const auto& m_const = pattern_to_output.at(mul_const); + const auto& m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); + const auto& m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); - const auto & weights_shape = m_weights.get_partial_shape(); - const auto & G = weights_shape[0].get_length(); - const auto & O = weights_shape[1].get_length(); - const auto & weights_rank = weights_shape.rank().get_length(); - const auto & const_shape = m_const.get_shape(); + const auto& weights_shape = m_weights.get_partial_shape(); + const auto& G = weights_shape[0].get_length(); + const auto& O = weights_shape[1].get_length(); + const auto& weights_rank = weights_shape.rank().get_length(); + const auto& const_shape = m_const.get_shape(); bool is_scalar_multiplier(shape_size(const_shape) == 1); @@ -144,9 +146,12 @@ ngraph::pass::GroupConvolutionMultiplyFusion::GroupConvolutionMultiplyFusion() { final_const_shape[0] = G; final_const_shape[1] = O; } - final_const = std::make_shared(m_const, - opset4::Constant::create(ngraph::element::i64, ngraph::Shape{final_const_shape.size()}, - final_const_shape), true); + final_const = + std::make_shared(m_const, + opset4::Constant::create(ngraph::element::i64, + ngraph::Shape{final_const_shape.size()}, + final_const_shape), + true); } // Multiply convolution weights with aligned Constant values @@ -173,22 +178,23 @@ ngraph::pass::ConvolutionBackpropDataMultiplyFusion::ConvolutionBackpropDataMult MATCHER_SCOPE(ConvolutionBackpropDataMultiplyFusion); auto input = pattern::any_input(); auto weights = ngraph::pattern::any_input(pattern::has_static_dim(1) /* has IOYX layout */); - auto conv = ngraph::pattern::wrap_type({input, weights}, pattern::consumers_count(1)); + auto conv = + ngraph::pattern::wrap_type({input, weights}, pattern::consumers_count(1)); auto mul_const = ngraph::pattern::wrap_type(pattern::has_static_shape()); auto mul = ngraph::pattern::wrap_type({conv, mul_const}); - matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher & m) -> bool { - const auto & pattern_to_output = m.get_pattern_value_map(); + matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher& m) -> bool { + const auto& pattern_to_output = m.get_pattern_value_map(); - const auto & m_weights = pattern_to_output.at(weights); - const auto & m_const = pattern_to_output.at(mul_const); - const auto & m_input = pattern_to_output.at(input); - const auto & m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); - const auto & m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); + const auto& m_weights = pattern_to_output.at(weights); + const auto& m_const = pattern_to_output.at(mul_const); + const auto& m_input = pattern_to_output.at(input); + const auto& m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); + const auto& m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); - const auto & channel_dim = m_weights.get_partial_shape()[1].get_length(); - const auto & weights_rank = m_weights.get_partial_shape().rank().get_length(); - const auto & const_shape = m_const.get_shape(); + const auto& channel_dim = m_weights.get_partial_shape()[1].get_length(); + const auto& weights_rank = m_weights.get_partial_shape().rank().get_length(); + const auto& const_shape = m_const.get_shape(); bool is_scalar_multiplier(shape_size(const_shape) == 1); @@ -211,9 +217,12 @@ ngraph::pass::ConvolutionBackpropDataMultiplyFusion::ConvolutionBackpropDataMult if (!is_scalar_multiplier) { auto final_const_shape = Shape(weights_rank - 1, 1); final_const_shape[0] = channel_dim; - final_const = std::make_shared(m_const, - opset4::Constant::create(ngraph::element::i64, ngraph::Shape{final_const_shape.size()}, - final_const_shape), true); + final_const = + std::make_shared(m_const, + opset4::Constant::create(ngraph::element::i64, + ngraph::Shape{final_const_shape.size()}, + final_const_shape), + true); } // Multiply convolution weights with aligned Constant values @@ -231,29 +240,32 @@ ngraph::pass::ConvolutionBackpropDataMultiplyFusion::ConvolutionBackpropDataMult register_matcher(m, callback); } -NGRAPH_RTTI_DEFINITION(ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion, "GroupConvolutionBackpropDataMultiplyFusion", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion, + "GroupConvolutionBackpropDataMultiplyFusion", + 0); ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion::GroupConvolutionBackpropDataMultiplyFusion() { MATCHER_SCOPE(GroupConvolutionBackpropDataMultiplyFusion); auto input = pattern::any_input(); auto weights = ngraph::pattern::any_input(pattern::has_static_dims({0, 2}) /* has GIOYX layout */); - auto conv = ngraph::pattern::wrap_type({input, weights}, pattern::consumers_count(1)); + auto conv = + ngraph::pattern::wrap_type({input, weights}, pattern::consumers_count(1)); auto mul_const = ngraph::pattern::wrap_type(pattern::has_static_shape()); auto mul = ngraph::pattern::wrap_type({conv, mul_const}); - matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher & m) -> bool { - const auto & pattern_to_output = m.get_pattern_value_map(); + matcher_pass_callback callback = [conv, input, weights, mul, mul_const](pattern::Matcher& m) -> bool { + const auto& pattern_to_output = m.get_pattern_value_map(); - const auto & m_weights = pattern_to_output.at(weights); - const auto & m_const = pattern_to_output.at(mul_const); - const auto & m_input = pattern_to_output.at(input); - const auto & m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); - const auto & m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); + const auto& m_weights = pattern_to_output.at(weights); + const auto& m_const = pattern_to_output.at(mul_const); + const auto& m_input = pattern_to_output.at(input); + const auto& m_conv = pattern_to_output.at(conv).get_node_shared_ptr(); + const auto& m_mul = pattern_to_output.at(mul).get_node_shared_ptr(); - const auto & G = m_weights.get_partial_shape()[0].get_length(); - const auto & O = m_weights.get_partial_shape()[2].get_length(); - const auto & weights_rank = m_weights.get_partial_shape().rank().get_length(); - const auto & const_shape = m_const.get_shape(); + const auto& G = m_weights.get_partial_shape()[0].get_length(); + const auto& O = m_weights.get_partial_shape()[2].get_length(); + const auto& weights_rank = m_weights.get_partial_shape().rank().get_length(); + const auto& const_shape = m_const.get_shape(); bool is_scalar_multiplier(shape_size(const_shape) == 1); @@ -277,9 +289,12 @@ ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion::GroupConvolutionBackpr auto final_const_shape = Shape(weights_rank, 1); final_const_shape[0] = G; final_const_shape[2] = O; - final_const = std::make_shared(m_const, - opset4::Constant::create(ngraph::element::i64, ngraph::Shape{final_const_shape.size()}, - final_const_shape), true); + final_const = + std::make_shared(m_const, + opset4::Constant::create(ngraph::element::i64, + ngraph::Shape{final_const_shape.size()}, + final_const_shape), + true); } // Multiply convolution weights with aligned Constant values diff --git a/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp b/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp index 965d245779f..a31062bcee1 100644 --- a/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/conv_to_binary_conv.cpp @@ -3,15 +3,15 @@ // #include "transformations/common_optimizations/conv_to_binary_conv.hpp" -#include "itt.hpp" #include -#include - #include #include #include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvToBinaryConv, "ConvToBinaryConv", 0); @@ -32,16 +32,16 @@ static std::vector binarize_weights(const std::vector& weights) ngraph::pass::ConvToBinaryConv::ConvToBinaryConv() { MATCHER_SCOPE(ConvToBinaryConv); - auto fq_pattern = ngraph::pattern::wrap_type( - {ngraph::pattern::any_input(), - ngraph::pattern::any_input(), - ngraph::pattern::any_input(), - ngraph::pattern::wrap_type(), - ngraph::pattern::wrap_type()}, - pattern::consumers_count(1)); - auto conv_pattern = ngraph::pattern::wrap_type({fq_pattern, ngraph::pattern::wrap_type()}); + auto fq_pattern = ngraph::pattern::wrap_type({ngraph::pattern::any_input(), + ngraph::pattern::any_input(), + ngraph::pattern::any_input(), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}, + pattern::consumers_count(1)); + auto conv_pattern = + ngraph::pattern::wrap_type({fq_pattern, ngraph::pattern::wrap_type()}); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto conv = std::dynamic_pointer_cast(m.get_match_root()); if (!conv) return false; @@ -49,41 +49,54 @@ ngraph::pass::ConvToBinaryConv::ConvToBinaryConv() { if (!fq || fq->get_levels() != 2) return false; - auto output_low_constant = std::dynamic_pointer_cast(fq->input_value(3).get_node_shared_ptr()); + auto output_low_constant = + std::dynamic_pointer_cast(fq->input_value(3).get_node_shared_ptr()); if (!output_low_constant) return false; auto output_low = output_low_constant->cast_vector(); - bool output_low_is_zero = std::all_of(output_low.begin(), output_low.end(), [] (float f) -> bool { return f == 0.0f; }); - bool output_low_is_minus_one = std::all_of(output_low.begin(), output_low.end(), [] (float f) -> bool { return f == -1.0f; }); - auto output_high_constant = std::dynamic_pointer_cast(fq->input_value(4).get_node_shared_ptr()); + bool output_low_is_zero = std::all_of(output_low.begin(), output_low.end(), [](float f) -> bool { + return f == 0.0f; + }); + bool output_low_is_minus_one = std::all_of(output_low.begin(), output_low.end(), [](float f) -> bool { + return f == -1.0f; + }); + auto output_high_constant = + std::dynamic_pointer_cast(fq->input_value(4).get_node_shared_ptr()); if (!output_high_constant) return false; auto output_high = output_high_constant->cast_vector(); - bool output_high_is_one = std::all_of(output_high.begin(), output_high.end(), [] (float f) -> bool { return f == 1.0f; }); + bool output_high_is_one = std::all_of(output_high.begin(), output_high.end(), [](float f) -> bool { + return f == 1.0f; + }); if (!(output_high_is_one && (output_low_is_zero || output_low_is_minus_one))) - return false; + return false; auto weights_constant = std::dynamic_pointer_cast(conv->input_value(1).get_node_shared_ptr()); if (!weights_constant) return false; auto weights = weights_constant->cast_vector(); - if (!std::all_of(weights.begin(), weights.end(), [] (float f) -> bool { return f == -1.0f || f == 1.0f; })) + if (!std::all_of(weights.begin(), weights.end(), [](float f) -> bool { + return f == -1.0f || f == 1.0f; + })) return false; auto bin_weights = binarize_weights(weights); - auto bin_weights_constant = std::make_shared(element::u1, weights_constant->get_shape(), bin_weights.data()); + auto bin_weights_constant = + std::make_shared(element::u1, weights_constant->get_shape(), bin_weights.data()); if (output_low_is_zero && output_high_is_one) { - auto new_conv = std::make_shared(conv->input_value(0), bin_weights_constant, - conv->get_strides(), - conv->get_pads_begin(), - conv->get_pads_end(), - conv->get_dilations(), - opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT, - -1, - conv->get_auto_pad()); + auto new_conv = std::make_shared( + conv->input_value(0), + bin_weights_constant, + conv->get_strides(), + conv->get_pads_begin(), + conv->get_pads_end(), + conv->get_dilations(), + opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT, + -1, + conv->get_auto_pad()); new_conv->set_friendly_name(conv->get_friendly_name()); std::vector axes; std::vector weights_reduced_shape = {-1}; @@ -94,13 +107,13 @@ ngraph::pass::ConvToBinaryConv::ConvToBinaryConv() { weights_reduced_shape.push_back(1); } auto weights_reduced = std::make_shared( - op::Constant::create(element::f32, weights_constant->get_shape(), weights), - op::Constant::create(element::i64, Shape{axes.size()}, axes), false); - std::shared_ptr weights_reduced_reshaped = std::make_shared(weights_reduced, - op::Constant::create(element::i64, - Shape{weights_reduced_shape.size()}, - weights_reduced_shape), - false); + op::Constant::create(element::f32, weights_constant->get_shape(), weights), + op::Constant::create(element::i64, Shape{axes.size()}, axes), + false); + std::shared_ptr weights_reduced_reshaped = std::make_shared( + weights_reduced, + op::Constant::create(element::i64, Shape{weights_reduced_shape.size()}, weights_reduced_shape), + false); weights_reduced_reshaped = ngraph::get_constant_from_source(weights_reduced_reshaped); auto add = std::make_shared(new_conv, weights_reduced_reshaped); auto mul = std::make_shared(add, op::Constant::create(element::f32, Shape{}, {0.5})); @@ -110,14 +123,16 @@ ngraph::pass::ConvToBinaryConv::ConvToBinaryConv() { return true; } - auto new_conv = std::make_shared(conv->input_value(0), bin_weights_constant, - conv->get_strides(), - conv->get_pads_begin(), - conv->get_pads_end(), - conv->get_dilations(), - opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT, - 0, - conv->get_auto_pad()); + auto new_conv = + std::make_shared(conv->input_value(0), + bin_weights_constant, + conv->get_strides(), + conv->get_pads_begin(), + conv->get_pads_end(), + conv->get_dilations(), + opset5::BinaryConvolution::BinaryConvolutionMode::XNOR_POPCOUNT, + 0, + conv->get_auto_pad()); new_conv->set_friendly_name(conv->get_friendly_name()); copy_runtime_info(conv, new_conv); replace_node(conv, new_conv); diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp index 33809fe27c0..55caec13138 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_nms_gather_path_to_unsigned.cpp @@ -2,16 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // #include "transformations/common_optimizations/convert_nms_gather_path_to_unsigned.hpp" -#include + +#include +#include +#include #include #include #include #include -#include -#include +#include #include -#include #include + #include "itt.hpp" #include "ngraph/node.hpp" @@ -20,84 +22,82 @@ using namespace std; namespace ngraph { namespace pass { class InitNMSPath : public pass::MatcherPass { - public: - NGRAPH_RTTI_DECLARATION; - InitNMSPath() { - MATCHER_SCOPE(InitNMSPath); - auto nms_pattern = pattern::wrap_type(); - matcher_pass_callback callback = [=](pattern::Matcher &m) { - const auto &out_nodes = m.get_match_root()->output(0).get_target_inputs(); - for (const auto & out_node : out_nodes) { - ov::set_nms_selected_indices(out_node.get_node()); - } - return true; - }; - auto m = make_shared(nms_pattern, matcher_name); - register_matcher(m, callback); - } - }; -class PropagateNMSPath: public pass::MatcherPass { - public: - NGRAPH_RTTI_DECLARATION; - PropagateNMSPath(){ - MATCHER_SCOPE(PropagateNMSPath); - auto node_pattern = pattern::wrap_type< - opset8::Squeeze, - opset8::Unsqueeze, - opset8::Reshape, - op::util::BroadcastBase, - opset8::StridedSlice, - opset8::Slice, - opset8::VariadicSplit, - op::util::GatherBase, - opset8::Concat, - opset8::Convert>(); - matcher_pass_callback callback = [=](pattern::Matcher &m) { - auto node = m.get_match_root(); - const auto & inputs = node->input_values(); - if (any_of(inputs.begin(), inputs.end(), [](const Output & output) { +public: + NGRAPH_RTTI_DECLARATION; + InitNMSPath() { + MATCHER_SCOPE(InitNMSPath); + auto nms_pattern = + pattern::wrap_type(); + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& out_nodes = m.get_match_root()->output(0).get_target_inputs(); + for (const auto& out_node : out_nodes) { + ov::set_nms_selected_indices(out_node.get_node()); + } + return true; + }; + auto m = make_shared(nms_pattern, matcher_name); + register_matcher(m, callback); + } +}; +class PropagateNMSPath : public pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + PropagateNMSPath() { + MATCHER_SCOPE(PropagateNMSPath); + auto node_pattern = pattern::wrap_type(); + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto node = m.get_match_root(); + const auto& inputs = node->input_values(); + if (any_of(inputs.begin(), inputs.end(), [](const Output& output) { return ov::has_nms_selected_indices(output.get_node()); })) { - ov::set_nms_selected_indices(node.get()); - } + ov::set_nms_selected_indices(node.get()); + } + return false; + }; + auto m = make_shared(node_pattern, matcher_name); + register_matcher(m, callback); + } +}; +class UpdateConvertGather : public pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + UpdateConvertGather() { + MATCHER_SCOPE(UpdateConvertGather); + auto node_pattern = pattern::wrap_type(); + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto gather = m.get_match_root(); + auto indices = gather->input_value(1); + if (!ov::has_nms_selected_indices(indices.get_node())) return false; - }; - auto m = make_shared(node_pattern, matcher_name); - register_matcher(m, callback); - } - }; -class UpdateConvertGather: public pass::MatcherPass { - public: - NGRAPH_RTTI_DECLARATION; - UpdateConvertGather(){ - MATCHER_SCOPE(UpdateConvertGather); - auto node_pattern = pattern::wrap_type(); - matcher_pass_callback callback = [=](pattern::Matcher &m) { - auto gather = m.get_match_root(); - auto indices = gather->input_value(1); - if (!ov::has_nms_selected_indices(indices.get_node())) - return false; - gather->get_rt_info()["dontReverseIndices"] = true; - auto out_type = (indices.get_element_type() == element::i64 ? element::u64 : element::u32); - auto existing_convert = dynamic_pointer_cast(indices.get_node_shared_ptr()); - if (existing_convert && indices.get_target_inputs().size() == 1) { - existing_convert->set_convert_element_type(out_type); - existing_convert->validate_and_infer_types(); - } else { - auto new_convert_to_unsigned = make_shared(indices, out_type); - gather->input(1).replace_source_output(new_convert_to_unsigned); - copy_runtime_info(gather, new_convert_to_unsigned); - } - return true; - }; - auto m = make_shared(node_pattern, matcher_name); - register_matcher(m, callback); - } - }; -}// namespace pass -}// namespace ngraph + gather->get_rt_info()["dontReverseIndices"] = true; + auto out_type = (indices.get_element_type() == element::i64 ? element::u64 : element::u32); + auto existing_convert = dynamic_pointer_cast(indices.get_node_shared_ptr()); + if (existing_convert && indices.get_target_inputs().size() == 1) { + existing_convert->set_convert_element_type(out_type); + existing_convert->validate_and_infer_types(); + } else { + auto new_convert_to_unsigned = make_shared(indices, out_type); + gather->input(1).replace_source_output(new_convert_to_unsigned); + copy_runtime_info(gather, new_convert_to_unsigned); + } + return true; + }; + auto m = make_shared(node_pattern, matcher_name); + register_matcher(m, callback); + } +}; +} // namespace pass +} // namespace ngraph NGRAPH_RTTI_DEFINITION(ngraph::pass::InitNMSPath, "InitNMSPath", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::PropagateNMSPath, "PropagateNMSPath", 0); diff --git a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp index ae4d10eea4a..ee15d1458cd 100644 --- a/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/convert_quantize_dequantize.cpp @@ -3,25 +3,25 @@ // #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" -#include "transformations/utils/utils.hpp" -#include "itt.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include - +#include "itt.hpp" +#include "transformations/utils/utils.hpp" // ConvertQuantizeDequantize converts Quantize/Dequantize pair to a single FakeQuantize. // Since Quantize is decomposed to FakeQuantize and Dequantize is decomposed to Subtract->Multiply, // the full pattern to match is presented on the left hand side of the graph below. // On the right hand side is the graph after transformation. // Currently transformation supports only i8 and u8 quantized data type. -// That implies 'levels' attribute to be 256, as well as (output_low, output_high) be (-128, 127) or (0, 255) (depends on sign of the quantized data type). -// Another limitation is that 'zero_point' and 'scale' have to be broadcastable to the output of FakeQuantize. +// That implies 'levels' attribute to be 256, as well as (output_low, output_high) be (-128, 127) or (0, 255) (depends +// on sign of the quantized data type). Another limitation is that 'zero_point' and 'scale' have to be broadcastable to +// the output of FakeQuantize. // // // | | | | | @@ -65,13 +65,16 @@ ngraph::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() { auto input_high_pattern = ngraph::pattern::any_input(); auto output_low_pattern = ngraph::pattern::wrap_type(); auto output_high_pattern = ngraph::pattern::wrap_type(); - auto fq_pattern = ngraph::pattern::wrap_type({data_pattern, input_low_pattern, - input_high_pattern, output_low_pattern, - output_high_pattern}); - auto convert1_pattern = ngraph::pattern::wrap_type({fq_pattern}, pattern::type_matches_any({element::i8, element::u8})); - auto convert2_pattern = ngraph::pattern::wrap_type({convert1_pattern}, pattern::type_matches(element::f32)); + auto fq_pattern = ngraph::pattern::wrap_type( + {data_pattern, input_low_pattern, input_high_pattern, output_low_pattern, output_high_pattern}); + auto convert1_pattern = + ngraph::pattern::wrap_type({fq_pattern}, + pattern::type_matches_any({element::i8, element::u8})); + auto convert2_pattern = + ngraph::pattern::wrap_type({convert1_pattern}, pattern::type_matches(element::f32)); auto zero_point_pattern = ngraph::pattern::any_input(); - auto sub_pattern = ngraph::pattern::wrap_type({convert2_pattern, zero_point_pattern}, pattern::consumers_count(1)); + auto sub_pattern = ngraph::pattern::wrap_type({convert2_pattern, zero_point_pattern}, + pattern::consumers_count(1)); auto scale_pattern = ngraph::pattern::any_input(); auto mul_pattern = ngraph::pattern::wrap_type({sub_pattern, scale_pattern}); @@ -85,10 +88,12 @@ ngraph::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() { auto data = pattern_map[data_pattern]; auto input_low = pattern_map[input_low_pattern]; auto input_high = pattern_map[input_high_pattern]; - auto output_low = std::dynamic_pointer_cast(pattern_map[output_low_pattern].get_node_shared_ptr()); + auto output_low = + std::dynamic_pointer_cast(pattern_map[output_low_pattern].get_node_shared_ptr()); if (!output_low) return false; - auto output_high = std::dynamic_pointer_cast(pattern_map[output_high_pattern].get_node_shared_ptr()); + auto output_high = + std::dynamic_pointer_cast(pattern_map[output_high_pattern].get_node_shared_ptr()); if (!output_high) return false; auto fq = std::dynamic_pointer_cast(pattern_map[fq_pattern].get_node_shared_ptr()); @@ -133,9 +138,11 @@ ngraph::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() { } std::shared_ptr new_out_low = std::make_shared( - std::make_shared(output_low, zero_point), scale); + std::make_shared(output_low, zero_point), + scale); std::shared_ptr new_out_high = std::make_shared( - std::make_shared(output_high, zero_point), scale); + std::make_shared(output_high, zero_point), + scale); // check if new_out_low/high shapes are broadcastable to FQ's input auto data_shape = data.get_partial_shape(); @@ -155,7 +162,12 @@ ngraph::pass::ConvertQuantizeDequantize::ConvertQuantizeDequantize() { if (const_out_high) new_out_high = const_out_high; - auto new_fq = std::make_shared(data, input_low, input_high, new_out_low, new_out_high, levels); + auto new_fq = std::make_shared(data, + input_low, + input_high, + new_out_low, + new_out_high, + levels); new_fq->set_friendly_name(mul->get_friendly_name()); copy_runtime_info({fq, convert1.get_node_shared_ptr(), convert2.get_node_shared_ptr()}, new_fq); diff --git a/src/common/transformations/src/transformations/common_optimizations/depth_to_space_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/depth_to_space_fusion.cpp index 3ca49a88926..b95c4428949 100644 --- a/src/common/transformations/src/transformations/common_optimizations/depth_to_space_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/depth_to_space_fusion.cpp @@ -3,20 +3,21 @@ // #include "transformations/common_optimizations/depth_to_space_fusion.hpp" -#include "itt.hpp" #include +#include +#include +#include #include -#include -#include -#include - +#include "itt.hpp" #include "transformations/utils/utils.hpp" namespace { -bool check_block_first(const ngraph::PartialShape& shape_input, const ngraph::PartialShape& shape_reshape_before, - const ngraph::AxisVector& permutation, const ngraph::PartialShape& shape_reshape_after, +bool check_block_first(const ngraph::PartialShape& shape_input, + const ngraph::PartialShape& shape_reshape_before, + const ngraph::AxisVector& permutation, + const ngraph::PartialShape& shape_reshape_after, size_t& possible_block_size) { const auto input_rank = shape_input.rank(); auto spatial_dims = input_rank.get_length() - 2; @@ -50,7 +51,8 @@ bool check_block_first(const ngraph::PartialShape& shape_input, const ngraph::Pa return false; } - // y = reshape(x'', [N, C / (block_size ^ K), D1 * block_size, D2 * block_size, D3 * block_size, ..., DK * block_size]) + // y = reshape(x'', [N, C / (block_size ^ K), D1 * block_size, D2 * block_size, D3 * block_size, ..., DK * + // block_size]) expected_shape = {shape_input[0], c_dim}; for (int i = 2; i < input_rank.get_length(); ++i) expected_shape.push_back(shape_input[i] * possible_block_size); @@ -62,8 +64,10 @@ bool check_block_first(const ngraph::PartialShape& shape_input, const ngraph::Pa return true; } -bool check_depth_first(const ngraph::PartialShape& shape_input, const ngraph::PartialShape& shape_reshape_before, - const ngraph::AxisVector& permutation, const ngraph::PartialShape& shape_reshape_after, +bool check_depth_first(const ngraph::PartialShape& shape_input, + const ngraph::PartialShape& shape_reshape_before, + const ngraph::AxisVector& permutation, + const ngraph::PartialShape& shape_reshape_after, size_t& possible_block_size) { const auto input_rank = shape_input.rank(); auto spatial_dims = input_rank.get_length() - 2; @@ -95,7 +99,8 @@ bool check_depth_first(const ngraph::PartialShape& shape_input, const ngraph::Pa return false; } - // y = reshape(x'', [N, C / (block_size ^ K), D1 * block_size, D2 * block_size, D3 * block_size, ..., DK * block_size]) + // y = reshape(x'', [N, C / (block_size ^ K), D1 * block_size, D2 * block_size, D3 * block_size, ..., DK * + // block_size]) expected_shape = {shape_input[0], c_dim}; for (int i = 2; i < input_rank.get_length(); ++i) expected_shape.push_back(shape_input[i] * possible_block_size); @@ -107,7 +112,7 @@ bool check_depth_first(const ngraph::PartialShape& shape_input, const ngraph::Pa return true; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::DepthToSpaceFusion, "DepthToSpaceFusion", 0); @@ -117,9 +122,11 @@ ngraph::pass::DepthToSpaceFusion::DepthToSpaceFusion() { auto input1 = ngraph::pattern::any_input(); auto input2 = ngraph::pattern::any_input(); auto input3 = ngraph::pattern::any_input(); - auto reshape_before = ngraph::pattern::wrap_type({ input0, input1 }, pattern::consumers_count(1)); - auto permute = ngraph::pattern::wrap_type({ reshape_before, input2 }, pattern::consumers_count(1)); - auto reshape_after = ngraph::pattern::wrap_type({ permute, input3 }); + auto reshape_before = + ngraph::pattern::wrap_type({input0, input1}, pattern::consumers_count(1)); + auto permute = + ngraph::pattern::wrap_type({reshape_before, input2}, pattern::consumers_count(1)); + auto reshape_after = ngraph::pattern::wrap_type({permute, input3}); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto reshape_after = std::dynamic_pointer_cast(m.get_match_root()); @@ -127,7 +134,8 @@ ngraph::pass::DepthToSpaceFusion::DepthToSpaceFusion() { return false; } - auto permute = std::dynamic_pointer_cast(reshape_after->get_input_node_shared_ptr(0)); + auto permute = + std::dynamic_pointer_cast(reshape_after->get_input_node_shared_ptr(0)); if (!permute) { return false; } @@ -148,13 +156,16 @@ ngraph::pass::DepthToSpaceFusion::DepthToSpaceFusion() { } // check that all dimensions except batch are static - if (std::any_of(p_shape_input.begin() + 1, p_shape_input.end(), [](const ngraph::Dimension& x) { return x.is_dynamic(); })) { + if (std::any_of(p_shape_input.begin() + 1, p_shape_input.end(), [](const ngraph::Dimension& x) { + return x.is_dynamic(); + })) { return false; } // input shape: [ batch, C, spatial_dims], expected_shape = spatial_dims.size() * 2 + 2 auto expected_shape_size = (input_rank.get_length() - 2) * 2 + 2; - if (input_rank != p_shape_reshape_after.rank().get_length() || p_shape_reshape_before.rank().get_length() != expected_shape_size || + if (input_rank != p_shape_reshape_after.rank().get_length() || + p_shape_reshape_before.rank().get_length() != expected_shape_size || p_shape_permute.rank().get_length() != expected_shape_size) { return false; } @@ -170,13 +181,18 @@ ngraph::pass::DepthToSpaceFusion::DepthToSpaceFusion() { size_t block_size; if (check_depth_first(p_shape_input, p_shape_reshape_before, permutation, p_shape_reshape_after, block_size)) { mode = ngraph::opset3::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST; - } else if (check_block_first(p_shape_input, p_shape_reshape_before, permutation, p_shape_reshape_after, block_size)) { + } else if (check_block_first(p_shape_input, + p_shape_reshape_before, + permutation, + p_shape_reshape_after, + block_size)) { mode = ngraph::opset3::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST; } else { return false; } - auto depth_to_space = std::make_shared(reshape_before->input_value(0), mode, block_size); + auto depth_to_space = + std::make_shared(reshape_before->input_value(0), mode, block_size); depth_to_space->set_friendly_name(reshape_after->get_friendly_name()); ngraph::copy_runtime_info({reshape_before, permute, reshape_after}, depth_to_space); ngraph::replace_node(reshape_after, depth_to_space); diff --git a/src/common/transformations/src/transformations/common_optimizations/dilated_convolution_converter.cpp b/src/common/transformations/src/transformations/common_optimizations/dilated_convolution_converter.cpp index 846d007a953..f23cfa6d4a4 100644 --- a/src/common/transformations/src/transformations/common_optimizations/dilated_convolution_converter.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/dilated_convolution_converter.cpp @@ -3,16 +3,15 @@ // #include "transformations/common_optimizations/dilated_convolution_converter.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include #include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::DilatedConvolutionConverter, "DilatedConvolutionConverter", 0); @@ -22,28 +21,34 @@ ngraph::pass::DilatedConvolutionConverter::DilatedConvolutionConverter() { auto block_shape_pattern = pattern::wrap_type(); auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); - auto space_to_batch_pattern = pattern::wrap_type({data_pattern, block_shape_pattern, pads_begin_pattern, pads_end_pattern}); + auto space_to_batch_pattern = pattern::wrap_type( + {data_pattern, block_shape_pattern, pads_begin_pattern, pads_end_pattern}); auto conv_pattern = pattern::wrap_type({space_to_batch_pattern, pattern::any_input()}); auto crops_begin_pattern = pattern::wrap_type(); auto crops_end_pattern = pattern::wrap_type(); - auto batch_to_space_pattern = pattern::wrap_type({conv_pattern, pattern::any_input(), - crops_begin_pattern, crops_end_pattern}); + auto batch_to_space_pattern = pattern::wrap_type( + {conv_pattern, pattern::any_input(), crops_begin_pattern, crops_end_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); - auto block_shape = std::dynamic_pointer_cast(pattern_map.at(block_shape_pattern).get_node_shared_ptr()); + auto block_shape = + std::dynamic_pointer_cast(pattern_map.at(block_shape_pattern).get_node_shared_ptr()); if (!block_shape) return false; - auto pads_begin = std::dynamic_pointer_cast(pattern_map.at(pads_begin_pattern).get_node_shared_ptr()); + auto pads_begin = + std::dynamic_pointer_cast(pattern_map.at(pads_begin_pattern).get_node_shared_ptr()); if (!pads_begin) return false; - auto pads_end = std::dynamic_pointer_cast(pattern_map.at(pads_end_pattern).get_node_shared_ptr()); + auto pads_end = + std::dynamic_pointer_cast(pattern_map.at(pads_end_pattern).get_node_shared_ptr()); if (!pads_end) return false; - auto crops_begin = std::dynamic_pointer_cast(pattern_map.at(crops_begin_pattern).get_node_shared_ptr()); + auto crops_begin = + std::dynamic_pointer_cast(pattern_map.at(crops_begin_pattern).get_node_shared_ptr()); if (!crops_begin) return false; - auto crops_end = std::dynamic_pointer_cast(pattern_map.at(crops_end_pattern).get_node_shared_ptr()); + auto crops_end = + std::dynamic_pointer_cast(pattern_map.at(crops_end_pattern).get_node_shared_ptr()); if (!crops_end) return false; auto conv = std::dynamic_pointer_cast(pattern_map.at(conv_pattern).get_node_shared_ptr()); @@ -57,10 +62,7 @@ ngraph::pass::DilatedConvolutionConverter::DilatedConvolutionConverter() { dilations[i] = block_shape_val[i + 2]; auto pads_begin_val = pads_begin->cast_vector(); auto pads_end_val = pads_end->cast_vector(); - if (!(pads_begin_val[0] == 0 && - pads_begin_val[1] == 0 && - pads_end_val[0] == 0 && - pads_end_val[1] == 0)) + if (!(pads_begin_val[0] == 0 && pads_begin_val[1] == 0 && pads_end_val[0] == 0 && pads_end_val[1] == 0)) return false; auto crops_begin_val = crops_begin->cast_vector(); auto crops_end_val = crops_end->cast_vector(); @@ -70,18 +72,24 @@ ngraph::pass::DilatedConvolutionConverter::DilatedConvolutionConverter() { std::vector new_pads_end; for (size_t i = 2; i < pads_end_val.size(); i++) new_pads_end.push_back(pads_end_val[i] - crops_end_val[i]); - auto new_conv = register_new_node(pattern_map.at(data_pattern), conv->input_value(1), - conv->get_strides(), new_pads_begin, new_pads_end, dilations, op::PadType::EXPLICIT); + auto new_conv = register_new_node(pattern_map.at(data_pattern), + conv->input_value(1), + conv->get_strides(), + new_pads_begin, + new_pads_end, + dilations, + op::PadType::EXPLICIT); auto batch_to_space = pattern_map.at(batch_to_space_pattern).get_node_shared_ptr(); new_conv->set_friendly_name(batch_to_space->get_friendly_name()); - copy_runtime_info({ - pattern_map.at(space_to_batch_pattern).get_node_shared_ptr(), - pattern_map.at(conv_pattern).get_node_shared_ptr(), - batch_to_space, - }, - new_conv); + copy_runtime_info( + { + pattern_map.at(space_to_batch_pattern).get_node_shared_ptr(), + pattern_map.at(conv_pattern).get_node_shared_ptr(), + batch_to_space, + }, + new_conv); replace_node(batch_to_space, new_conv); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp b/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp index 547ea7e5b42..baa521905ea 100644 --- a/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/dimension_tracking.cpp @@ -161,7 +161,8 @@ P2Btype ov::batch_util::find_batch(const std::shared_ptr& f) { } void ov::batch_util::restore_original_dimensions( - const std::map, ov::PartialShape>& parameter_to_shape, bool leave_batch_dynamic) { + const std::map, ov::PartialShape>& parameter_to_shape, + bool leave_batch_dynamic) { for (const auto& item : parameter_to_shape) { const auto& batch_marked_shape = item.first->get_partial_shape(); auto original_shape = item.second; @@ -227,7 +228,7 @@ bool ov::batch_util::detach_detection_output(const std::shared_ptr& f ResultVector new_outputs, outputs_to_delete; for (auto& result_node : f->get_results()) { auto do_node = result_node->input_value(0).get_node_shared_ptr(); - if (ov::is_type(do_node)) // cases with do->convert->result + if (ov::is_type(do_node)) // cases with do->convert->result do_node = do_node->get_input_node_shared_ptr(0); if (ov::is_type(do_node) || ov::is_type(do_node)) { for (auto& new_result_src : do_node->input_values()) { @@ -278,7 +279,7 @@ bool ov::pass::FindBatch::run_on_model(const std::shared_ptr& m) { bool failed_to_propagate_batch = ov::batch_util::check_batch_tracks_through_all_the_nodes(m); - if (failed_to_propagate_batch) { // restore original input shape with labels + if (failed_to_propagate_batch) { // restore original input shape with labels for (const auto& item : parameter_to_shape) item.first->set_partial_shape(item.second); } else { // restore original input shape with batch labels diff --git a/src/common/transformations/src/transformations/common_optimizations/disable_shapeof_constant_folding.cpp b/src/common/transformations/src/transformations/common_optimizations/disable_shapeof_constant_folding.cpp index 7f766c0daf4..74cef221073 100644 --- a/src/common/transformations/src/transformations/common_optimizations/disable_shapeof_constant_folding.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/disable_shapeof_constant_folding.cpp @@ -3,19 +3,17 @@ // #include - -#include #include #include -#include - +#include #include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableShapeOfConstantFolding, "DisableShapeOfConstantFolding", 0); ngraph::pass::DisableShapeOfConstantFolding::DisableShapeOfConstantFolding() { - auto shape_of = pattern::wrap_type([=](const Output & output) { - const auto & shape = output.get_partial_shape(); + auto shape_of = pattern::wrap_type([=](const Output& output) { + const auto& shape = output.get_partial_shape(); return shape.is_dynamic() || shape_size(shape.get_shape()) != 1; }); diff --git a/src/common/transformations/src/transformations/common_optimizations/divide_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/divide_fusion.cpp index 0f5738b1f81..40135a5dd3e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/divide_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/divide_fusion.cpp @@ -22,13 +22,13 @@ ngraph::pass::DivideFusion::DivideFusion() { auto p_mul_input = pattern::any_input(); auto p_mul = ngraph::pattern::wrap_type({p_mul_input, p_pow}); - matcher_pass_callback callback = [=](pattern::Matcher &m) { - const auto & pattern_to_output = m.get_pattern_value_map(); - const auto & minuend_input = pattern_to_output.at(p_mul_input); - const auto & subtrahend_input = pattern_to_output.at(p_pow_input); - const auto & mul = pattern_to_output.at(p_mul).get_node_shared_ptr(); - const auto & pow = pattern_to_output.at(p_pow).get_node_shared_ptr(); - const auto & minus_one = pattern_to_output.at(p_pow_const).get_node_shared_ptr(); + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); + const auto& minuend_input = pattern_to_output.at(p_mul_input); + const auto& subtrahend_input = pattern_to_output.at(p_pow_input); + const auto& mul = pattern_to_output.at(p_mul).get_node_shared_ptr(); + const auto& pow = pattern_to_output.at(p_pow).get_node_shared_ptr(); + const auto& minus_one = pattern_to_output.at(p_pow_const).get_node_shared_ptr(); auto minus_one_const = std::dynamic_pointer_cast(minus_one); if (!minus_one_const || !op::util::has_constant_value(minus_one_const, -1.)) { diff --git a/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp b/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp index 3d970ee5f1e..c7a327d3b07 100644 --- a/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include #include -#include +#include "itt.hpp" #include "ngraph/rt_info.hpp" -#include -#include +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0); diff --git a/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp b/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp index 0e03abe7272..833e6b99d0b 100644 --- a/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/eliminate_unsqueeze_gather.cpp @@ -8,6 +8,7 @@ #include #include #include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::EliminateUnsqueezeGather, "EliminateUnsqueezeGather", 0); @@ -17,10 +18,12 @@ ngraph::pass::EliminateUnsqueezeGather::EliminateUnsqueezeGather() { // Remove Unsqueeze + Gather pair, if Gather gathers data by `1` dimension that was previously added by Unsqueeze const auto unsqueezeAxis = ngraph::pattern::any_input(); const auto unsqueezeInput = ngraph::pattern::any_input(); - const auto unsqueeze = ngraph::pattern::wrap_type({unsqueezeInput, unsqueezeAxis}, pattern::consumers_count(1)); + const auto unsqueeze = ngraph::pattern::wrap_type({unsqueezeInput, unsqueezeAxis}, + pattern::consumers_count(1)); const auto gatherIndices = ngraph::opset6::Constant::create(ngraph::element::i64, ngraph::Shape{}, {0}); const auto gatherAxis = ngraph::pattern::any_input(); - const auto gather = ngraph::pattern::wrap_type({unsqueeze, gatherIndices, gatherAxis}); + const auto gather = + ngraph::pattern::wrap_type({unsqueeze, gatherIndices, gatherAxis}); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto& patternValue = m.get_pattern_value_map(); @@ -28,7 +31,8 @@ ngraph::pass::EliminateUnsqueezeGather::EliminateUnsqueezeGather() { const auto& m_unsqueezeAxis = patternValue.at(unsqueezeAxis); const auto& m_gatherAxis = patternValue.at(gatherAxis); - const auto& unsqueezeAxisNode = ngraph::as_type_ptr(m_unsqueezeAxis.get_node_shared_ptr()); + const auto& unsqueezeAxisNode = + ngraph::as_type_ptr(m_unsqueezeAxis.get_node_shared_ptr()); const auto& gatherAxisNode = ngraph::as_type_ptr(m_gatherAxis.get_node_shared_ptr()); if (!unsqueezeAxisNode || !gatherAxisNode) { @@ -68,10 +72,12 @@ ngraph::pass::EliminateGatherUnsqueeze::EliminateGatherUnsqueeze() { const auto gather_indices_label = ngraph::pattern::wrap_type(pattern::rank_equals(0)); const auto gather_axis_label = ngraph::pattern::wrap_type(); const auto gather_label = ngraph::pattern::wrap_type( - {ngraph::pattern::any_input(), gather_indices_label, gather_axis_label}, pattern::rank_equals(0)); + {ngraph::pattern::any_input(), gather_indices_label, gather_axis_label}, + pattern::rank_equals(0)); - const auto unsqueeze_label = ngraph::pattern::wrap_type( - {gather_label, ngraph::pattern::any_input()}, pattern::rank_equals(1)); + const auto unsqueeze_label = + ngraph::pattern::wrap_type({gather_label, ngraph::pattern::any_input()}, + pattern::rank_equals(1)); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto pattern_nodes = m.get_pattern_map(); @@ -80,7 +86,10 @@ ngraph::pass::EliminateGatherUnsqueeze::EliminateGatherUnsqueeze() { auto& gather = pattern_nodes.at(gather_label); auto& unsqueeze = pattern_nodes.at(unsqueeze_label); - auto new_indices = ngraph::op::util::make_try_fold(gather_indices, opset6::Constant::create(element::i32, {1}, {1}), false); + auto new_indices = + ngraph::op::util::make_try_fold(gather_indices, + opset6::Constant::create(element::i32, {1}, {1}), + false); auto new_gather = gather->clone_with_new_inputs({gather->input_value(0), new_indices, gather->input_value(2)}); new_gather->set_friendly_name(gather->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp index 2bfaf08f42c..2b8af6c28e8 100644 --- a/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/fq_mul_fusion.hpp" -#include "transformations/utils/utils.hpp" #include -#include - #include #include #include #include +#include + +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeMulFusion, "FakeQuantizeMulFusion", 0); @@ -43,25 +43,22 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() { const auto fq_output_low_p = ngraph::pattern::any_input(); const auto fq_output_high_p = ngraph::pattern::any_input(); - const auto fq_node_p = ngraph::pattern::wrap_type({data_p, - ngraph::pattern::any_input(), - ngraph::pattern::any_input(), - fq_output_low_p, - fq_output_high_p}, - pattern::consumers_count(1)); + const auto fq_node_p = ngraph::pattern::wrap_type( + {data_p, ngraph::pattern::any_input(), ngraph::pattern::any_input(), fq_output_low_p, fq_output_high_p}, + pattern::consumers_count(1)); const auto mul_constant_p = ngraph::pattern::wrap_type(); - const auto mul_node_p = ngraph::pattern::wrap_type( - {fq_node_p, mul_constant_p}, pattern::consumers_count(1)); + const auto mul_node_p = + ngraph::pattern::wrap_type({fq_node_p, mul_constant_p}, pattern::consumers_count(1)); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); const auto& data = pattern_map.at(data_p); const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr(); - const auto & original_output_low = pattern_map.at(fq_output_low_p); - const auto & original_output_high = pattern_map.at(fq_output_high_p); + const auto& original_output_low = pattern_map.at(fq_output_low_p); + const auto& original_output_high = pattern_map.at(fq_output_high_p); auto mul_constant = pattern_map.at(mul_constant_p).get_node_shared_ptr(); auto mul_constant_shape = mul_constant->get_shape(); bool is_single_value = shape_size(mul_constant_shape) == 1; @@ -73,7 +70,8 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() { is_single_value = op::util::get_single_value(constant, v); if (is_single_value) { mul_constant_shape = Shape{1}; - mul_constant = std::make_shared(mul_constant->get_element_type(), mul_constant_shape, v); + mul_constant = + std::make_shared(mul_constant->get_element_type(), mul_constant_shape, v); } } } @@ -81,15 +79,14 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() { if (!is_single_value) { auto fq_outputs = fq_node->get_users(); // Convolution and GroupConvolution LP transformations require output low/high to have the same values - bool fq_output_is_conv = std::any_of(fq_outputs.begin(), fq_outputs.end(), - [] (const std::shared_ptr& node) -> bool { - return is_type(node) || - is_type(node); - }); + bool fq_output_is_conv = + std::any_of(fq_outputs.begin(), fq_outputs.end(), [](const std::shared_ptr& node) -> bool { + return is_type(node) || is_type(node); + }); if (fq_output_is_conv) { return false; } - const auto & data_rank = data.get_partial_shape().rank(); + const auto& data_rank = data.get_partial_shape().rank(); if (data_rank.is_dynamic()) { return false; } @@ -97,12 +94,14 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() { auto diff = rank - mul_constant_shape.size(); if (diff > 0) { mul_constant_shape.insert(mul_constant_shape.begin(), diff, 1); - mul_constant = std::make_shared(mul_constant, - op::Constant::create(element::i64, Shape{mul_constant_shape.size()}, mul_constant_shape), false); + mul_constant = std::make_shared( + mul_constant, + op::Constant::create(element::i64, Shape{mul_constant_shape.size()}, mul_constant_shape), + false); } } - auto get_adjusted_output_range = [&] (const Output& node) -> std::shared_ptr { + auto get_adjusted_output_range = [&](const Output& node) -> std::shared_ptr { auto ret = std::make_shared(node, mul_constant); copy_runtime_info(node.get_node_shared_ptr(), ret); auto constant = get_constant_from_source(ret); @@ -112,10 +111,10 @@ ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() { }; const auto new_fq_node = fq_node->clone_with_new_inputs({fq_node->input_value(0), - fq_node->input_value(1), - fq_node->input_value(2), - get_adjusted_output_range(original_output_low), - get_adjusted_output_range(original_output_high)}); + fq_node->input_value(1), + fq_node->input_value(2), + get_adjusted_output_range(original_output_low), + get_adjusted_output_range(original_output_high)}); bool fq_on_weights = is_type(data.get_node()); if (!fq_on_weights && transformation_callback(new_fq_node)) return false; diff --git a/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp index aed87c1627e..0194639bfff 100644 --- a/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/fq_reshape_fusion.cpp @@ -2,72 +2,78 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/fq_reshape_fusion.hpp" #include -#include - #include #include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeReshapeFusion, "FakeQuantizeReshapeFusion", 0); ngraph::pass::FakeQuantizeReshapeFusion::FakeQuantizeReshapeFusion() { MATCHER_SCOPE(FakeQuantizeReshapeFusion); const auto fq_node_p = ngraph::pattern::wrap_type( - {ngraph::pattern::wrap_type(), // for weights only - ngraph::pattern::any_input(), - ngraph::pattern::any_input(), - ngraph::pattern::any_input(), - ngraph::pattern::any_input()}, - pattern::consumers_count(1)); + {ngraph::pattern::wrap_type(), // for weights only + ngraph::pattern::any_input(), + ngraph::pattern::any_input(), + ngraph::pattern::any_input(), + ngraph::pattern::any_input()}, + pattern::consumers_count(1)); const auto reshape_node_p = ngraph::pattern::wrap_type( - {fq_node_p, ngraph::pattern::any_input()}, [](const Output & output) { - // WA: check that all Reshape node consumers are not GroupConvolution operations - const auto & target_inputs = output.get_target_inputs(); - return std::all_of(target_inputs.begin(), target_inputs.end(), - [](const Input & input){ - return input.get_node()->get_type_info() != opset4::GroupConvolution::get_type_info_static(); - }); + {fq_node_p, ngraph::pattern::any_input()}, + [](const Output& output) { + // WA: check that all Reshape node consumers are not GroupConvolution operations + const auto& target_inputs = output.get_target_inputs(); + return std::all_of(target_inputs.begin(), target_inputs.end(), [](const Input& input) { + return input.get_node()->get_type_info() != opset4::GroupConvolution::get_type_info_static(); }); + }); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) { - const auto &pattern_map = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr(); if (fq_node->is_dynamic()) return false; - const auto &reshape_node = pattern_map.at(reshape_node_p).get_node_shared_ptr(); - const auto &original_data_rank = fq_node->get_input_shape(0).size(); - OutputVector renewed_inputs = {reshape_node->clone_with_new_inputs({fq_node->input_value(0), reshape_node->input_value(1)})}; + const auto& reshape_node = pattern_map.at(reshape_node_p).get_node_shared_ptr(); + const auto& original_data_rank = fq_node->get_input_shape(0).size(); + OutputVector renewed_inputs = { + reshape_node->clone_with_new_inputs({fq_node->input_value(0), reshape_node->input_value(1)})}; for (auto i = 1; i < 5; ++i) { Output limit_input = fq_node->input_value(i); auto limit_shape = limit_input.get_shape(); NGRAPH_CHECK(limit_shape.size() <= original_data_rank, "FakeQuantize limit input has unexpected rank"); - if (limit_shape.size() < original_data_rank) // aligning limit rank with data rank + if (limit_shape.size() < original_data_rank) // aligning limit rank with data rank limit_shape.insert(limit_shape.begin(), original_data_rank - limit_shape.size(), uint64_t(1)); NGRAPH_CHECK(limit_shape.size() == original_data_rank, "FakeQuantize limit input has unexpected rank"); - const auto &limit_size = shape_size(limit_shape); - const auto &max_element = *std::max_element(limit_shape.begin(), limit_shape.end()); - if (max_element == limit_size) { // per-tensor / per-channel limit + const auto& limit_size = shape_size(limit_shape); + const auto& max_element = *std::max_element(limit_shape.begin(), limit_shape.end()); + if (max_element == limit_size) { // per-tensor / per-channel limit auto new_limit_shape = reshape_node->get_output_shape(0); - std::transform(new_limit_shape.begin(), new_limit_shape.end(), new_limit_shape.begin(), - [max_element](size_t &dim) { return dim == max_element ? max_element : 1; }); - const auto &new_limit_size = shape_size(new_limit_shape); - if (new_limit_size == limit_size) { // we tracked future channel placement + std::transform(new_limit_shape.begin(), + new_limit_shape.end(), + new_limit_shape.begin(), + [max_element](size_t& dim) { + return dim == max_element ? max_element : 1; + }); + const auto& new_limit_size = shape_size(new_limit_shape); + if (new_limit_size == limit_size) { // we tracked future channel placement if (new_limit_shape == limit_input.get_shape()) renewed_inputs.push_back(limit_input); else renewed_inputs.push_back(reshape_node->clone_with_new_inputs( - {limit_input, opset4::Constant::create(element::i64, {new_limit_shape.size()}, new_limit_shape)})); + {limit_input, + opset4::Constant::create(element::i64, {new_limit_shape.size()}, new_limit_shape)})); continue; } } // resulting FQ will become or already is more than per-tensor / per-channel return false; } - for (auto &new_input : renewed_inputs) + for (auto& new_input : renewed_inputs) copy_runtime_info({reshape_node, fq_node}, new_input.get_node_shared_ptr()); const auto new_fq_node = fq_node->clone_with_new_inputs(renewed_inputs); replace_node(reshape_node, new_fq_node); diff --git a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp index 7b92e3dafeb..14adcb372f2 100644 --- a/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/gelu_fusion.cpp @@ -18,8 +18,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusion, "GeluFusion", 0); -NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusionWithErfOne, - "GeluFusionWithErfOne", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusionWithErfOne, "GeluFusionWithErfOne", 0); ngraph::pass::GeluFusionWithErfOne::GeluFusionWithErfOne() { MATCHER_SCOPE(GeluFusionWithErfOne); @@ -27,42 +26,34 @@ ngraph::pass::GeluFusionWithErfOne::GeluFusionWithErfOne() { // Shared by every pattern: (1 + erf(x / sqrt(2))) auto input = ngraph::pattern::any_input(); auto div_constant = ngraph::pattern::wrap_type(); - auto div = ngraph::pattern::wrap_type( - {input, div_constant}); + auto div = ngraph::pattern::wrap_type({input, div_constant}); auto erf = ngraph::pattern::wrap_type({div}); auto add_constant = ngraph::pattern::wrap_type(); - auto add = - ngraph::pattern::wrap_type({add_constant, erf}); + auto add = ngraph::pattern::wrap_type({add_constant, erf}); auto mul_constant = ngraph::pattern::wrap_type(); // (0.5 * x) * (1 + erf(x / sqrt(2)) - auto mul_first = ngraph::pattern::wrap_type( - {input, mul_constant}); - auto mul = - ngraph::pattern::wrap_type({mul_first, add}); + auto mul_first = ngraph::pattern::wrap_type({input, mul_constant}); + auto mul = ngraph::pattern::wrap_type({mul_first, add}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto div_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(div_constant).get_node_shared_ptr()); - auto add_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto mul_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(mul_constant).get_node_shared_ptr()); if (!div_const_value || !add_const_value || !mul_const_value) { return false; } - bool valid_constant_values = - op::util::has_constant_value(div_const_value, M_SQRT2) && - op::util::has_constant_value(add_const_value, 1.0f) && - op::util::has_constant_value(mul_const_value, 0.5f); + bool valid_constant_values = op::util::has_constant_value(div_const_value, M_SQRT2) && + op::util::has_constant_value(add_const_value, 1.0f) && + op::util::has_constant_value(mul_const_value, 0.5f); if (!valid_constant_values) { return false; @@ -88,8 +79,7 @@ ngraph::pass::GeluFusionWithErfOne::GeluFusionWithErfOne() { register_matcher(m, callback); } -NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusionWithErfTwo, - "GeluFusionWithErfTwo", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusionWithErfTwo, "GeluFusionWithErfTwo", 0); ngraph::pass::GeluFusionWithErfTwo::GeluFusionWithErfTwo() { MATCHER_SCOPE(GeluFusionWithErfTwo); @@ -97,42 +87,34 @@ ngraph::pass::GeluFusionWithErfTwo::GeluFusionWithErfTwo() { // Shared by every pattern: (1 + erf(x / sqrt(2))) auto input = ngraph::pattern::any_input(); auto div_constant = ngraph::pattern::wrap_type(); - auto div = ngraph::pattern::wrap_type( - {input, div_constant}); + auto div = ngraph::pattern::wrap_type({input, div_constant}); auto erf = ngraph::pattern::wrap_type({div}); auto add_constant = ngraph::pattern::wrap_type(); - auto add = - ngraph::pattern::wrap_type({add_constant, erf}); + auto add = ngraph::pattern::wrap_type({add_constant, erf}); auto mul_constant = ngraph::pattern::wrap_type(); // 0.5 * (x * (1 + erf(x / sqrt(2))) - auto mul_first = - ngraph::pattern::wrap_type({input, add}); - auto mul = ngraph::pattern::wrap_type( - {mul_constant, mul_first}); + auto mul_first = ngraph::pattern::wrap_type({input, add}); + auto mul = ngraph::pattern::wrap_type({mul_constant, mul_first}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto div_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(div_constant).get_node_shared_ptr()); - auto add_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto mul_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(mul_constant).get_node_shared_ptr()); if (!div_const_value || !add_const_value || !mul_const_value) { return false; } - bool valid_constant_values = - op::util::has_constant_value(div_const_value, M_SQRT2) && - op::util::has_constant_value(add_const_value, 1.0f) && - op::util::has_constant_value(mul_const_value, 0.5f); + bool valid_constant_values = op::util::has_constant_value(div_const_value, M_SQRT2) && + op::util::has_constant_value(add_const_value, 1.0f) && + op::util::has_constant_value(mul_const_value, 0.5f); if (!valid_constant_values) { return false; @@ -158,8 +140,7 @@ ngraph::pass::GeluFusionWithErfTwo::GeluFusionWithErfTwo() { register_matcher(m, callback); } -NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusionWithErfThree, - "GeluFusionWithErfThree", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::GeluFusionWithErfThree, "GeluFusionWithErfThree", 0); ngraph::pass::GeluFusionWithErfThree::GeluFusionWithErfThree() { MATCHER_SCOPE(GeluFusionWithErfThree); @@ -167,42 +148,34 @@ ngraph::pass::GeluFusionWithErfThree::GeluFusionWithErfThree() { // Shared by every pattern: (1 + erf(x / sqrt(2))) auto input = ngraph::pattern::any_input(); auto div_constant = ngraph::pattern::wrap_type(); - auto div = ngraph::pattern::wrap_type( - {input, div_constant}); + auto div = ngraph::pattern::wrap_type({input, div_constant}); auto erf = ngraph::pattern::wrap_type({div}); auto add_constant = ngraph::pattern::wrap_type(); - auto add = - ngraph::pattern::wrap_type({add_constant, erf}); + auto add = ngraph::pattern::wrap_type({add_constant, erf}); auto mul_constant = ngraph::pattern::wrap_type(); // x * (0.5 * (1 + erf(x / sqrt(2))) - auto mul_first = ngraph::pattern::wrap_type( - {add, mul_constant}); - auto mul = ngraph::pattern::wrap_type( - {input, mul_first}); + auto mul_first = ngraph::pattern::wrap_type({add, mul_constant}); + auto mul = ngraph::pattern::wrap_type({input, mul_first}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto div_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(div_constant).get_node_shared_ptr()); - auto add_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto mul_const_value = - std::dynamic_pointer_cast( - pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(mul_constant).get_node_shared_ptr()); if (!div_const_value || !add_const_value || !mul_const_value) { return false; } - bool valid_constant_values = - op::util::has_constant_value(div_const_value, M_SQRT2) && - op::util::has_constant_value(add_const_value, 1.0f) && - op::util::has_constant_value(mul_const_value, 0.5f); + bool valid_constant_values = op::util::has_constant_value(div_const_value, M_SQRT2) && + op::util::has_constant_value(add_const_value, 1.0f) && + op::util::has_constant_value(mul_const_value, 0.5f); if (!valid_constant_values) { return false; diff --git a/src/common/transformations/src/transformations/common_optimizations/hsigmoid_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/hsigmoid_fusion.cpp index cca1c8ecda0..81025a071b0 100644 --- a/src/common/transformations/src/transformations/common_optimizations/hsigmoid_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/hsigmoid_fusion.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/hsigmoid_fusion.hpp" -#include "transformations/utils/utils.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::HSigmoidFusion, "HSigmoidFusion", 0); @@ -28,17 +28,20 @@ ngraph::pass::HSigmoidFusionWithReluDiv::HSigmoidFusionWithReluDiv() { auto div_constant = ngraph::pattern::wrap_type(); auto div = ngraph::pattern::wrap_type({min, div_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) - && op::util::has_constant_value(min_const_value, 6.0) - && op::util::has_constant_value(div_const_value, 6.0); + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) && + op::util::has_constant_value(min_const_value, 6.0) && + op::util::has_constant_value(div_const_value, 6.0); if (!valid_constant_values) { return false; @@ -47,12 +50,14 @@ ngraph::pass::HSigmoidFusionWithReluDiv::HSigmoidFusionWithReluDiv() { auto hsigmoid = register_new_node(x_output); hsigmoid->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(relu).get_node_shared_ptr(), - pattern_to_output.at(min).get_node_shared_ptr(), - pattern_to_output.at(div).get_node_shared_ptr(), - }, - hsigmoid); + ngraph::copy_runtime_info( + { + pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(relu).get_node_shared_ptr(), + pattern_to_output.at(min).get_node_shared_ptr(), + pattern_to_output.at(div).get_node_shared_ptr(), + }, + hsigmoid); ngraph::replace_node(m.get_match_root(), hsigmoid); return true; }; @@ -75,17 +80,20 @@ ngraph::pass::HSigmoidFusionWithReluMul::HSigmoidFusionWithReluMul() { auto mul_constant = ngraph::pattern::wrap_type(); auto mul_second = ngraph::pattern::wrap_type({min, mul_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(mul_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) - && op::util::has_constant_value(min_const_value, 6.0f) - && op::util::has_constant_value(mul_const_value, (1.0f/6.0f), 0.0001f); + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) && + op::util::has_constant_value(min_const_value, 6.0f) && + op::util::has_constant_value(mul_const_value, (1.0f / 6.0f), 0.0001f); if (!valid_constant_values) { return false; @@ -94,11 +102,10 @@ ngraph::pass::HSigmoidFusionWithReluMul::HSigmoidFusionWithReluMul() { auto hsigmoid = register_new_node(x_output); hsigmoid->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(relu).get_node_shared_ptr(), - pattern_to_output.at(min).get_node_shared_ptr(), - pattern_to_output.at(mul_second).get_node_shared_ptr() - }, + ngraph::copy_runtime_info({pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(relu).get_node_shared_ptr(), + pattern_to_output.at(min).get_node_shared_ptr(), + pattern_to_output.at(mul_second).get_node_shared_ptr()}, hsigmoid); ngraph::replace_node(m.get_match_root(), hsigmoid); return true; @@ -124,19 +131,23 @@ ngraph::pass::HSigmoidFusionWithoutRelu::HSigmoidFusionWithoutRelu() { auto div = ngraph::pattern::wrap_type({min, div_constant}); auto mul = ngraph::pattern::wrap_type({input, div}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto max_const_value = std::dynamic_pointer_cast(pattern_to_output.at(max_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto max_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(max_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) - && op::util::has_constant_value(max_const_value, 0.0f) - && op::util::has_constant_value(min_const_value, 6.0f) - && op::util::has_constant_value(div_const_value, 6.0f); + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) && + op::util::has_constant_value(max_const_value, 0.0f) && + op::util::has_constant_value(min_const_value, 6.0f) && + op::util::has_constant_value(div_const_value, 6.0f); if (!valid_constant_values) { return false; @@ -145,11 +156,10 @@ ngraph::pass::HSigmoidFusionWithoutRelu::HSigmoidFusionWithoutRelu() { auto hsigmoid = register_new_node(x_output); hsigmoid->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(max).get_node_shared_ptr(), - pattern_to_output.at(min).get_node_shared_ptr(), - pattern_to_output.at(div).get_node_shared_ptr() - }, + ngraph::copy_runtime_info({pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(max).get_node_shared_ptr(), + pattern_to_output.at(min).get_node_shared_ptr(), + pattern_to_output.at(div).get_node_shared_ptr()}, hsigmoid); ngraph::replace_node(m.get_match_root(), hsigmoid); return true; @@ -171,31 +181,33 @@ ngraph::pass::HSigmoidFusionWithClampMul::HSigmoidFusionWithClampMul() { auto mul_constant = ngraph::pattern::wrap_type(); auto mul_first = ngraph::pattern::wrap_type({clamp, mul_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(mul_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) - && op::util::has_constant_value(mul_const_value, (1.0/6.0), 0.0001); + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) && + op::util::has_constant_value(mul_const_value, (1.0 / 6.0), 0.0001); if (!valid_constant_values) { return false; } - auto clamp_node = std::dynamic_pointer_cast(pattern_to_output.at(clamp).get_node_shared_ptr()); + auto clamp_node = + std::dynamic_pointer_cast(pattern_to_output.at(clamp).get_node_shared_ptr()); if (!clamp_node || clamp_node->get_min() != 0 || clamp_node->get_max() != 6) return false; auto hsigmoid = register_new_node(x_output); hsigmoid->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(clamp).get_node_shared_ptr(), - pattern_to_output.at(mul_first).get_node_shared_ptr() - }, + ngraph::copy_runtime_info({pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(clamp).get_node_shared_ptr(), + pattern_to_output.at(mul_first).get_node_shared_ptr()}, hsigmoid); ngraph::replace_node(m.get_match_root(), hsigmoid); return true; @@ -217,31 +229,33 @@ ngraph::pass::HSigmoidFusionWithClampDiv::HSigmoidFusionWithClampDiv() { auto div_constant = ngraph::pattern::wrap_type(); auto div = ngraph::pattern::wrap_type({clamp, div_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) - && op::util::has_constant_value(div_const_value, 6.0); + bool valid_constant_values = + op::util::has_constant_value(add_const_value, 3.0) && op::util::has_constant_value(div_const_value, 6.0); if (!valid_constant_values) { return false; } - auto clamp_node = std::dynamic_pointer_cast(pattern_to_output.at(clamp).get_node_shared_ptr()); + auto clamp_node = + std::dynamic_pointer_cast(pattern_to_output.at(clamp).get_node_shared_ptr()); if (!clamp_node || clamp_node->get_min() != 0 || clamp_node->get_max() != 6) return false; auto hsigmoid = register_new_node(x_output); hsigmoid->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(clamp).get_node_shared_ptr(), - pattern_to_output.at(div).get_node_shared_ptr() - }, + ngraph::copy_runtime_info({pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(clamp).get_node_shared_ptr(), + pattern_to_output.at(div).get_node_shared_ptr()}, hsigmoid); ngraph::replace_node(m.get_match_root(), hsigmoid); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/hswish_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/hswish_fusion.cpp index bedaf649c44..e233f10a334 100644 --- a/src/common/transformations/src/transformations/common_optimizations/hswish_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/hswish_fusion.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/hswish_fusion.hpp" -#include "transformations/utils/utils.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::HSwishFusion, "HSwishFusion", 0); @@ -29,17 +29,20 @@ ngraph::pass::HSwishFusionWithReluDiv::HSwishFusionWithReluDiv() { auto div_constant = ngraph::pattern::wrap_type(); auto div = std::make_shared(mul, div_constant); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto div_const_value = std::dynamic_pointer_cast(pattern_to_output.at(div_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto div_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(div_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) - && op::util::has_constant_value(min_const_value, 6.0) - && op::util::has_constant_value(div_const_value, 6.0); + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0) && + op::util::has_constant_value(min_const_value, 6.0) && + op::util::has_constant_value(div_const_value, 6.0); if (!valid_constant_values) { return false; @@ -48,16 +51,18 @@ ngraph::pass::HSwishFusionWithReluDiv::HSwishFusionWithReluDiv() { auto hswish = std::make_shared(x_output); hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add_constant).get_node_shared_ptr(), - pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(relu).get_node_shared_ptr(), - pattern_to_output.at(min_constant).get_node_shared_ptr(), - pattern_to_output.at(min).get_node_shared_ptr(), - pattern_to_output.at(mul).get_node_shared_ptr(), - pattern_to_output.at(div_constant).get_node_shared_ptr(), - pattern_to_output.at(div).get_node_shared_ptr(), - }, - hswish); + ngraph::copy_runtime_info( + { + pattern_to_output.at(add_constant).get_node_shared_ptr(), + pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(relu).get_node_shared_ptr(), + pattern_to_output.at(min_constant).get_node_shared_ptr(), + pattern_to_output.at(min).get_node_shared_ptr(), + pattern_to_output.at(mul).get_node_shared_ptr(), + pattern_to_output.at(div_constant).get_node_shared_ptr(), + pattern_to_output.at(div).get_node_shared_ptr(), + }, + hswish); ngraph::replace_node(m.get_match_root(), hswish); return true; }; @@ -81,17 +86,20 @@ ngraph::pass::HSwishFusionWithReluMul::HSwishFusionWithReluMul() { auto mul_constant = ngraph::pattern::wrap_type(); auto mul_second = std::make_shared(mul_first, mul_constant); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(input); - auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - auto min_const_value = std::dynamic_pointer_cast(pattern_to_output.at(min_constant).get_node_shared_ptr()); - auto mul_const_value = std::dynamic_pointer_cast(pattern_to_output.at(mul_constant).get_node_shared_ptr()); + auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto min_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(min_constant).get_node_shared_ptr()); + auto mul_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(mul_constant).get_node_shared_ptr()); - bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) - && op::util::has_constant_value(min_const_value, 6.0f) - && op::util::has_constant_value(mul_const_value, (1.0f/6.0f), 0.0001f); + bool valid_constant_values = op::util::has_constant_value(add_const_value, 3.0f) && + op::util::has_constant_value(min_const_value, 6.0f) && + op::util::has_constant_value(mul_const_value, (1.0f / 6.0f), 0.0001f); if (!valid_constant_values) { return false; @@ -100,15 +108,14 @@ ngraph::pass::HSwishFusionWithReluMul::HSwishFusionWithReluMul() { auto hswish = std::make_shared(x_output); hswish->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add_constant).get_node_shared_ptr(), - pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(relu).get_node_shared_ptr(), - pattern_to_output.at(min_constant).get_node_shared_ptr(), - pattern_to_output.at(min).get_node_shared_ptr(), - pattern_to_output.at(mul_first).get_node_shared_ptr(), - pattern_to_output.at(mul_constant).get_node_shared_ptr(), - pattern_to_output.at(mul_second).get_node_shared_ptr() - }, + ngraph::copy_runtime_info({pattern_to_output.at(add_constant).get_node_shared_ptr(), + pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(relu).get_node_shared_ptr(), + pattern_to_output.at(min_constant).get_node_shared_ptr(), + pattern_to_output.at(min).get_node_shared_ptr(), + pattern_to_output.at(mul_first).get_node_shared_ptr(), + pattern_to_output.at(mul_constant).get_node_shared_ptr(), + pattern_to_output.at(mul_second).get_node_shared_ptr()}, hswish); ngraph::replace_node(m.get_match_root(), hswish); return true; @@ -127,7 +134,7 @@ ngraph::pass::HSwishFusionWithHSigmoid::HSwishFusionWithHSigmoid() { auto hsigmoid_pattern = pattern::wrap_type({input}, pattern::consumers_count(1)); auto mul_pattern = pattern::wrap_type({input, hsigmoid_pattern}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { const auto& pattern_to_output = m.get_pattern_value_map(); auto hsigmoid = pattern_to_output.at(hsigmoid_pattern).get_node_shared_ptr(); auto mul = pattern_to_output.at(mul_pattern).get_node_shared_ptr(); @@ -154,27 +161,30 @@ ngraph::pass::HSwishFusionWithClamp::HSwishFusionWithClamp() { const auto clamp = ngraph::pattern::wrap_type({add}); const auto mul = ngraph::pattern::wrap_type({clamp, input}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - const auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); const auto x_output = pattern_to_output.at(input); - const auto add_const_value = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + const auto add_const_value = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); if (!op::util::has_constant_value(add_const_value, 3.0)) { return false; } - const auto clamp_node = std::dynamic_pointer_cast(pattern_to_output.at(clamp).get_node_shared_ptr()); + const auto clamp_node = + std::dynamic_pointer_cast(pattern_to_output.at(clamp).get_node_shared_ptr()); if (!clamp_node || clamp_node->get_min() != 0 || clamp_node->get_max() != 6) return false; auto hswish = std::make_shared(x_output); - auto new_mul_const = std::make_shared(add_const_value->get_element_type(), Shape{}, std::vector{6.0}); + auto new_mul_const = std::make_shared(add_const_value->get_element_type(), + Shape{}, + std::vector{6.0}); auto new_mul = std::make_shared(hswish, new_mul_const); new_mul->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(clamp).get_node_shared_ptr(), - pattern_to_output.at(mul).get_node_shared_ptr() - }, + ngraph::copy_runtime_info({pattern_to_output.at(add).get_node_shared_ptr(), + pattern_to_output.at(clamp).get_node_shared_ptr(), + pattern_to_output.at(mul).get_node_shared_ptr()}, {hswish, new_mul_const, new_mul}); ngraph::replace_node(m.get_match_root(), new_mul); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/interpolate_sequence_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/interpolate_sequence_fusion.cpp index 5157d56aa5d..ba4bd4674b1 100644 --- a/src/common/transformations/src/transformations/common_optimizations/interpolate_sequence_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/interpolate_sequence_fusion.cpp @@ -2,20 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/interpolate_sequence_fusion.hpp" #include #include +#include #include #include #include #include -#include +#include "itt.hpp" // #include -#include #include +#include namespace { using namespace ngraph; @@ -23,7 +23,8 @@ using namespace ngraph; bool compatible_axes(const std::vector& fst_axes_vector, const std::vector& snd_axes_vector) { std::set fst_axes_set(fst_axes_vector.begin(), fst_axes_vector.end()); for (const auto& a : snd_axes_vector) { - if (fst_axes_set.count(a) != 0) return false; + if (fst_axes_set.count(a) != 0) + return false; } return true; } @@ -31,14 +32,16 @@ bool compatible_axes(const std::vector& fst_axes_vector, const std::vec bool shape_calculation_mode_can_use_constant_inputs(const std::shared_ptr& interpolate) { const auto& attrs = interpolate->get_attrs(); if (attrs.shape_calculation_mode == ngraph::opset8::Interpolate::ShapeCalcMode::SIZES) { - return std::dynamic_pointer_cast(interpolate->input_value(1).get_node_shared_ptr()) != nullptr; + return std::dynamic_pointer_cast(interpolate->input_value(1).get_node_shared_ptr()) != + nullptr; } return std::dynamic_pointer_cast(interpolate->input_value(2).get_node_shared_ptr()) != nullptr; } bool is_candidate_for_fusion(const std::shared_ptr& interpolate) { return (interpolate->get_input_partial_shape(0).rank().is_static()) && - (interpolate->inputs().size() != 4 || std::dynamic_pointer_cast(interpolate->input_value(3).get_node_shared_ptr())) && + (interpolate->inputs().size() != 4 || + std::dynamic_pointer_cast(interpolate->input_value(3).get_node_shared_ptr())) && shape_calculation_mode_can_use_constant_inputs(interpolate); } @@ -51,31 +54,36 @@ std::vector get_interpolated_axes(const std::shared_ptr(interpolate->input_value(3).get_node_shared_ptr())->cast_vector(); + return std::dynamic_pointer_cast(interpolate->input_value(3).get_node_shared_ptr()) + ->cast_vector(); } bool can_be_fused(const std::shared_ptr& fst, const std::shared_ptr& snd) { // The first Interpolate (fst) must have only one consumer. for (const auto& output : fst->outputs()) { for (const auto& consumer : output.get_target_inputs()) { - if (consumer.get_node() != snd.get()) return false; + if (consumer.get_node() != snd.get()) + return false; } } - if (fst->get_attrs() != snd->get_attrs() || !is_candidate_for_fusion(fst) || !is_candidate_for_fusion(snd)) return false; + if (fst->get_attrs() != snd->get_attrs() || !is_candidate_for_fusion(fst) || !is_candidate_for_fusion(snd)) + return false; const auto fst_axes = get_interpolated_axes(fst); const auto snd_axes = get_interpolated_axes(snd); return compatible_axes(fst_axes, snd_axes); } -ngraph::NodeVector subgraph_for_sizes_calculation_mode(const std::shared_ptr& fst, const std::shared_ptr& snd, +ngraph::NodeVector subgraph_for_sizes_calculation_mode(const std::shared_ptr& fst, + const std::shared_ptr& snd, pass::MatcherPass* matcherPass) { const auto fst_axes = get_interpolated_axes(fst); const auto snd_axes = get_interpolated_axes(snd); const auto fst_sizes_node = std::dynamic_pointer_cast(fst->input_value(1).get_node_shared_ptr()); const auto snd_sizes_node = std::dynamic_pointer_cast(snd->input_value(1).get_node_shared_ptr()); - if (!fst_sizes_node || !snd_sizes_node) return {}; + if (!fst_sizes_node || !snd_sizes_node) + return {}; const auto fst_sizes = fst_sizes_node->cast_vector(); const auto snd_sizes = snd_sizes_node->cast_vector(); @@ -109,20 +117,30 @@ ngraph::NodeVector subgraph_for_sizes_calculation_mode(const std::shared_ptr(new_sizes_cast, cast_shape_to_float); - const auto new_interpolate = ov::as_type_ptr(fst->clone_with_new_inputs({fst->input_value(0), new_sizes_node, div_node, - new_axes_node})); + const auto new_interpolate = ov::as_type_ptr( + fst->clone_with_new_inputs({fst->input_value(0), new_sizes_node, div_node, new_axes_node})); matcherPass->register_new_node(new_interpolate); - return {new_sizes_node, new_axes_node, new_sizes_cast, shape_node, gather_axis_node, gather_node, cast_shape_to_float, div_node, new_interpolate}; + return {new_sizes_node, + new_axes_node, + new_sizes_cast, + shape_node, + gather_axis_node, + gather_node, + cast_shape_to_float, + div_node, + new_interpolate}; } -ngraph::NodeVector subgraph_for_scales_calculation_mode(const std::shared_ptr& fst, const std::shared_ptr& snd, +ngraph::NodeVector subgraph_for_scales_calculation_mode(const std::shared_ptr& fst, + const std::shared_ptr& snd, pass::MatcherPass* matcherPass) { const auto fst_axes = get_interpolated_axes(fst); const auto snd_axes = get_interpolated_axes(snd); const auto fst_scales_node = std::dynamic_pointer_cast(fst->input_value(2).get_node_shared_ptr()); const auto snd_scales_node = std::dynamic_pointer_cast(snd->input_value(2).get_node_shared_ptr()); - if (!fst_scales_node || !snd_scales_node) return {}; + if (!fst_scales_node || !snd_scales_node) + return {}; const auto fst_scales = fst_scales_node->cast_vector(); const auto snd_scales = snd_scales_node->cast_vector(); @@ -159,14 +177,24 @@ ngraph::NodeVector subgraph_for_scales_calculation_mode(const std::shared_ptr(add_node); auto cast_mul_result_to_int = std::make_shared(floor_node, element::i64); - const auto new_interpolate = ov::as_type_ptr(fst->clone_with_new_inputs({fst->input_value(0), cast_mul_result_to_int, - new_scales_node, new_axes_node})); + const auto new_interpolate = ov::as_type_ptr( + fst->clone_with_new_inputs({fst->input_value(0), cast_mul_result_to_int, new_scales_node, new_axes_node})); matcherPass->register_new_node(new_interpolate); - return {new_scales_node, new_axes_node, shape_node, gather_axis_node, gather_node, cast_shape_to_float, mul_node, eps_node, - add_node, floor_node, cast_mul_result_to_int, new_interpolate}; + return {new_scales_node, + new_axes_node, + shape_node, + gather_axis_node, + gather_node, + cast_shape_to_float, + mul_node, + eps_node, + add_node, + floor_node, + cast_mul_result_to_int, + new_interpolate}; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::InterpolateSequenceFusion, "InterpolateSequenceFusion", 0); @@ -175,12 +203,16 @@ ngraph::pass::InterpolateSequenceFusion::InterpolateSequenceFusion() { auto interpolate_pattern = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto snd_interpolate = std::dynamic_pointer_cast(m.get_match_root()); - if (!snd_interpolate) return false; + if (!snd_interpolate) + return false; - auto fst_interpolate = std::dynamic_pointer_cast(snd_interpolate->input_value(0).get_node_shared_ptr()); - if (!fst_interpolate) return false; + auto fst_interpolate = + std::dynamic_pointer_cast(snd_interpolate->input_value(0).get_node_shared_ptr()); + if (!fst_interpolate) + return false; - if (!can_be_fused(fst_interpolate, snd_interpolate)) return false; + if (!can_be_fused(fst_interpolate, snd_interpolate)) + return false; NodeVector new_subgraph; if (fst_interpolate->get_attrs().shape_calculation_mode == ngraph::opset8::Interpolate::ShapeCalcMode::SIZES) { @@ -188,7 +220,8 @@ ngraph::pass::InterpolateSequenceFusion::InterpolateSequenceFusion() { } else { new_subgraph = subgraph_for_scales_calculation_mode(fst_interpolate, snd_interpolate, this); } - if (new_subgraph.empty()) return false; + if (new_subgraph.empty()) + return false; auto& new_interpolate = new_subgraph.back(); new_interpolate->set_friendly_name(snd_interpolate->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp index 9b98c8e7124..74af8fe6b7c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/leaky_relu_fusion.cpp @@ -3,16 +3,15 @@ // #include "transformations/common_optimizations/leaky_relu_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include #include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::LeakyReluFusion, "LeakyReluFusion", 0); @@ -20,24 +19,23 @@ ngraph::pass::LeakyReluFusion::LeakyReluFusion() { MATCHER_SCOPE(LeakyReluFusion); auto data_pattern = ngraph::pattern::any_input(); auto alpha_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); - auto multiply_pattern = ngraph::pattern::wrap_type({data_pattern, alpha_pattern}, pattern::consumers_count(1)); + auto multiply_pattern = + ngraph::pattern::wrap_type({data_pattern, alpha_pattern}, pattern::consumers_count(1)); auto max_pattern = ngraph::pattern::wrap_type({data_pattern, multiply_pattern}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); - const auto & original_alpha_pattern = pattern_map.at(alpha_pattern); + const auto& pattern_map = m.get_pattern_value_map(); + const auto& original_alpha_pattern = pattern_map.at(alpha_pattern); if (shape_size(original_alpha_pattern.get_shape()) != 1) return false; - auto leaky_relu = register_new_node(pattern_map.at(data_pattern), original_alpha_pattern); + auto leaky_relu = + register_new_node(pattern_map.at(data_pattern), original_alpha_pattern); auto maximum = pattern_map.at(max_pattern); leaky_relu->set_friendly_name(maximum.get_node()->get_friendly_name()); - copy_runtime_info({ - pattern_map.at(multiply_pattern).get_node_shared_ptr(), - maximum.get_node_shared_ptr() - }, + copy_runtime_info({pattern_map.at(multiply_pattern).get_node_shared_ptr(), maximum.get_node_shared_ptr()}, leaky_relu); replace_node(maximum.get_node_shared_ptr(), leaky_relu); diff --git a/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp index 2d4bce6b417..7d1ff2c7b28 100644 --- a/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/lin_op_sequence_fusion.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/lin_op_sequence_fusion.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" using namespace ngraph; @@ -28,8 +28,8 @@ ngraph::pass::AddMultiplyFusion::AddMultiplyFusion() { auto m_mul_constant = ngraph::pattern::wrap_type(); auto m_mul = ngraph::pattern::wrap_type({m_add, m_mul_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool { - auto & label_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) -> bool { + auto& label_to_output = m.get_pattern_value_map(); auto mul = label_to_output[m_mul].get_node_shared_ptr(); auto add = label_to_output[m_add].get_node_shared_ptr(); @@ -42,7 +42,8 @@ ngraph::pass::AddMultiplyFusion::AddMultiplyFusion() { Output mul_const = label_to_output[m_mul_constant]; Output add_const = label_to_output[m_add_constant]; - if ((input.get_element_type() != mul_const.get_element_type()) || (add_const.get_element_type() != mul_const.get_element_type())) { + if ((input.get_element_type() != mul_const.get_element_type()) || + (add_const.get_element_type() != mul_const.get_element_type())) { return false; } @@ -52,7 +53,8 @@ ngraph::pass::AddMultiplyFusion::AddMultiplyFusion() { auto new_mul = register_new_node(input, mul_const); // Add two constants using opset3::Add constant folding and create new Add operation - auto new_add = std::make_shared(new_mul, op::util::eltwise_fold(add_const, mul_const)); + auto new_add = + std::make_shared(new_mul, op::util::eltwise_fold(add_const, mul_const)); copy_runtime_info({add, mul}, {new_mul, new_add}); new_add->set_friendly_name(mul->get_friendly_name()); @@ -75,8 +77,8 @@ ngraph::pass::AddAddFusion::AddAddFusion() { auto m_add2_constant = ngraph::pattern::wrap_type(); auto m_add2 = ngraph::pattern::wrap_type({m_add1, m_add2_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool { - auto & label_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) -> bool { + auto& label_to_output = m.get_pattern_value_map(); auto add1 = label_to_output[m_add1].get_node_shared_ptr(); auto add2 = label_to_output[m_add2].get_node_shared_ptr(); @@ -87,7 +89,8 @@ ngraph::pass::AddAddFusion::AddAddFusion() { // Replace Add->Add with single Add // Add operation will be added to the list of ops requested for pattern matching - auto new_add = register_new_node(input, op::util::eltwise_fold(add1_const, add2_const)); + auto new_add = + register_new_node(input, op::util::eltwise_fold(add1_const, add2_const)); copy_runtime_info({add1, add2}, new_add); new_add->set_friendly_name(add2->get_friendly_name()); @@ -110,8 +113,8 @@ ngraph::pass::MultiplyMultiplyFusion::MultiplyMultiplyFusion() { auto m_mul2_constant = ngraph::pattern::wrap_type(); auto m_mul2 = ngraph::pattern::wrap_type({m_mul1, m_mul2_constant}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool { - auto & label_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) -> bool { + auto& label_to_output = m.get_pattern_value_map(); auto mul1 = label_to_output[m_mul1].get_node_shared_ptr(); auto mul2 = label_to_output[m_mul2].get_node_shared_ptr(); @@ -122,7 +125,9 @@ ngraph::pass::MultiplyMultiplyFusion::MultiplyMultiplyFusion() { // Replace Multiply->Multiply with single Multiply // Multiply operation will be added to the list of ops requested for pattern matching - auto new_mul = register_new_node(input, op::util::eltwise_fold(mul1_const, mul2_const)); + auto new_mul = + register_new_node(input, + op::util::eltwise_fold(mul1_const, mul2_const)); copy_runtime_info({mul1, mul2}, new_mul); new_mul->set_friendly_name(mul2->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_divides.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_divides.cpp index afddeeec062..c57fd4db283 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_divides.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_divides.cpp @@ -13,8 +13,8 @@ #include "transformations/utils/utils.hpp" bool ov::pass::MarkPrecisionSensitiveDivides::run_on_model(const std::shared_ptr& m) { - std::deque nodes; - std::unordered_set visited, precision_sensitive_visited; + std::deque nodes; + std::unordered_set visited, precision_sensitive_visited; for (auto& r : m->get_results()) { nodes.push_back(r.get()); visited.insert(r.get()); @@ -48,7 +48,8 @@ bool ov::pass::MarkPrecisionSensitiveDivides::run_on_model(const std::shared_ptr for (auto& input_value : curr_node->input_values()) { // continue searching const auto& input_node = input_value.get_node(); - if (visited.count(input_node)) continue; + if (visited.count(input_node)) + continue; nodes.push_front(input_node); visited.insert(input_node); } diff --git a/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp b/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp index dd1775bd951..4c218c9c75d 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mark_precision_sensitive_subgraphs.cpp @@ -29,7 +29,7 @@ bool ov::pass::MarkPrecisionSensitiveSubgraphs::run_on_model(const std::shared_p visited.insert(r.get()); } - auto markup_func = [](Node * node) { + auto markup_func = [](Node* node) { if (ov::is_type(node)) { ov::disable_fp16_compression(node->shared_from_this()); } @@ -42,14 +42,16 @@ bool ov::pass::MarkPrecisionSensitiveSubgraphs::run_on_model(const std::shared_p if (ov::is_precision_sensitive(input)) { visited.insert(input.get_source_output().get_node()); ngraph::op::util::visit_shape_path(input.get_source_output().get_node(), - precision_sensitive_visited, markup_func); + precision_sensitive_visited, + markup_func); } } for (auto& input_value : curr_node->input_values()) { // continue searching const auto& input_node = input_value.get_node(); - if (visited.count(input_node)) continue; + if (visited.count(input_node)) + continue; nodes.push_front(input_node); visited.insert(input_node); } diff --git a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp index 07cb1bf115f..3558be6499f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/matmul_const_transposes_extraction.cpp @@ -13,13 +13,12 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::MatMulConstTransposesExtraction, "MatMulCon ngraph::pass::MatMulConstTransposesExtraction::MatMulConstTransposesExtraction() { auto data_pattern = pattern::any_input(); - auto weights_pattern = pattern::wrap_type([](Output node) -> bool { - const auto& pshape = node.get_partial_shape(); - const auto& rank = pshape.rank(); - return rank.is_static() && rank.get_length() >= 2 && - std::count(pshape.begin(), pshape.end(), 1) >= rank.get_length() - 2; - }); + auto weights_pattern = pattern::wrap_type([](Output node) -> bool { + const auto& pshape = node.get_partial_shape(); + const auto& rank = pshape.rank(); + return rank.is_static() && rank.get_length() >= 2 && + std::count(pshape.begin(), pshape.end(), 1) >= rank.get_length() - 2; + }); auto matmul_pattern = pattern::wrap_type({data_pattern, weights_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { auto node = m.get_match_root(); @@ -33,13 +32,17 @@ ngraph::pass::MatMulConstTransposesExtraction::MatMulConstTransposesExtraction() std::vector transpose_order(weights.get_partial_shape().size()); std::iota(transpose_order.begin(), transpose_order.end(), 0); std::reverse(transpose_order.end() - 2, transpose_order.end()); - std::shared_ptr transpose = std::make_shared(weights, - op::Constant::create(element::i32, {transpose_order.size()}, transpose_order)); + std::shared_ptr transpose = std::make_shared( + weights, + op::Constant::create(element::i32, {transpose_order.size()}, transpose_order)); if (ov::is_type(weights.get_node())) { if (auto constant = get_constant_from_source(transpose)) transpose = constant; } - auto new_matmul = std::make_shared(pattern_value_map.at(data_pattern), transpose, matmul->get_transpose_a(), true); + auto new_matmul = std::make_shared(pattern_value_map.at(data_pattern), + transpose, + matmul->get_transpose_a(), + true); new_matmul->set_friendly_name(matmul->get_friendly_name()); copy_runtime_info(node, {transpose, new_matmul}); replace_node(node, new_matmul); diff --git a/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp index 0dc61b235a4..c86bfad7a80 100644 --- a/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/matmul_multiply_fusion.cpp @@ -3,12 +3,13 @@ // #include "transformations/common_optimizations/matmul_multiply_fusion.hpp" -#include "transformations/utils/utils.hpp" #include #include #include + #include "itt.hpp" +#include "transformations/utils/utils.hpp" using namespace ngraph; @@ -61,7 +62,8 @@ static std::shared_ptr fuse_const_to_weights(const std::shared_ptr& // Check if const's last dimension matches last weights dimension if (matmul_casted->get_transpose_b()) { if (weights_shape[weights_rank - 2].is_dynamic() || - (weights_rank > 1 && const_shape.back() != static_cast(weights_shape[weights_rank - 2].get_length()))) { + (weights_rank > 1 && + const_shape.back() != static_cast(weights_shape[weights_rank - 2].get_length()))) { return nullptr; } } else if (weights_shape[weights_rank - 1].is_dynamic() || @@ -83,8 +85,9 @@ static std::shared_ptr fuse_const_to_weights(const std::shared_ptr& bool const_broadcasts_weights = weights_rank < const_rank; for (int64_t i = 3; i <= const_rank; i++) { if (const_shape[const_rank - i] != 1) { - const_broadcasts_weights = const_broadcasts_weights || - ((weights_rank - i >= 0) && (weights_shape[weights_rank - i] != const_shape[const_rank - i])); + const_broadcasts_weights = + const_broadcasts_weights || + ((weights_rank - i >= 0) && (weights_shape[weights_rank - i] != const_shape[const_rank - i])); } } bool const_broadcasts_input = true; @@ -94,8 +97,9 @@ static std::shared_ptr fuse_const_to_weights(const std::shared_ptr& const_broadcasts_input = input_rank < const_rank; for (int64_t i = 3; i <= const_rank; i++) { if (const_shape[const_rank - i] != 1) { - const_broadcasts_input = const_broadcasts_input || - ((input_rank - i >= 0) && (input_shape[input_rank - i] != const_shape[const_rank - i])); + const_broadcasts_input = + const_broadcasts_input || + ((input_rank - i >= 0) && (input_shape[input_rank - i] != const_shape[const_rank - i])); } } } @@ -105,10 +109,11 @@ static std::shared_ptr fuse_const_to_weights(const std::shared_ptr& } } - auto transpose_const = [] (const std::shared_ptr& mul_const) -> std::shared_ptr { + auto transpose_const = [](const std::shared_ptr& mul_const) -> std::shared_ptr { auto const_shape = mul_const->get_shape(); auto const_rank = const_shape.size(); - if (shape_size(const_shape) == 1 || (const_rank > 1 && const_shape[const_rank - 2] == 1 && const_shape[const_rank - 1] == 1)) { + if (shape_size(const_shape) == 1 || + (const_rank > 1 && const_shape[const_rank - 2] == 1 && const_shape[const_rank - 1] == 1)) { // Nothing to transpose - constant has shape (..., 1, 1) return mul_const; } @@ -116,14 +121,17 @@ static std::shared_ptr fuse_const_to_weights(const std::shared_ptr& // Scalars were fused before, it suffices to check for 1D shape here if (const_rank == 1) { const_shape.insert(const_shape.begin(), 1); - new_const = std::make_shared(mul_const, - opset8::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false); + new_const = std::make_shared( + mul_const, + opset8::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), + false); } std::vector perm(const_shape.size()); std::iota(perm.begin(), perm.end(), 0); std::swap(*(perm.end() - 1), *(perm.end() - 2)); - auto transpose = std::make_shared(new_const, - opset8::Constant::create(element::i64, Shape{perm.size()}, perm)); + auto transpose = + std::make_shared(new_const, + opset8::Constant::create(element::i64, Shape{perm.size()}, perm)); return get_constant_from_source(transpose); }; @@ -149,7 +157,8 @@ pass::MatMulMultiplyFusion::MatMulMultiplyFusion() { const auto& pattern_map = m.get_pattern_value_map(); const auto& weights = pattern_map.at(weights_pattern); auto mul = pattern_map.at(mul_pattern).get_node_shared_ptr(); - auto mul_const = std::dynamic_pointer_cast(pattern_map.at(mul_const_pattern).get_node_shared_ptr()); + auto mul_const = + std::dynamic_pointer_cast(pattern_map.at(mul_const_pattern).get_node_shared_ptr()); if (!mul_const) return false; auto matmul = pattern_map.at(matmul_pattern).get_node_shared_ptr(); diff --git a/src/common/transformations/src/transformations/common_optimizations/mish_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/mish_fusion.cpp index 5037c16fc5d..0567bce6cdb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mish_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mish_fusion.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/mish_fusion.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::MishFusion, "MishFusion", 0); @@ -24,7 +24,7 @@ ngraph::pass::MishFusion::MishFusion() { auto mul = std::make_shared(input, tanh); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { - auto & pattern_to_output = m.get_pattern_value_map(); + auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); auto mish = std::make_shared(exp_input); @@ -34,7 +34,8 @@ ngraph::pass::MishFusion::MishFusion() { pattern_to_output.at(tanh).get_node_shared_ptr(), pattern_to_output.at(log).get_node_shared_ptr(), pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(exp).get_node_shared_ptr()}, mish); + pattern_to_output.at(exp).get_node_shared_ptr()}, + mish); ngraph::replace_node(m.get_match_root(), mish); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp index e75372e89d4..a77601e3821 100644 --- a/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp @@ -3,63 +3,61 @@ // #include - -#include -#include - -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include #include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0); @@ -68,7 +66,7 @@ bool ngraph::pass::MOCTransformations::run_on_model(const std::shared_ptr input_shapes; if (!m_use_shapes) { - for (auto &¶m : f->get_parameters()) { + for (auto&& param : f->get_parameters()) { input_shapes[param.get()] = param->get_partial_shape(); param->set_partial_shape(PartialShape::dynamic(param->get_partial_shape().rank())); } @@ -81,7 +79,7 @@ bool ngraph::pass::MOCTransformations::run_on_model(const std::shared_ptr(); if (m_low_precision_enabled) { manager.register_pass( - element::TypeVector{ ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4 }); + element::TypeVector{ngraph::element::i8, ngraph::element::u8, ngraph::element::i4, ngraph::element::u4}); } if (!m_use_shapes) { manager.register_pass(); @@ -191,7 +189,7 @@ bool ngraph::pass::MOCTransformations::run_on_model(const std::shared_ptrget_parameters()) { + for (auto&& param : f->get_parameters()) { param->set_partial_shape(input_shapes.at(param.get())); } f->validate_nodes_and_infer_types(); diff --git a/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp index e5a74bdca70..28a0196dba2 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mul_conv_fusion.cpp @@ -3,18 +3,17 @@ // #include "transformations/common_optimizations/mul_conv_fusion.hpp" -#include "itt.hpp" #include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include - -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::MultiplyConvolutionFusion, "MultiplyConvolutionFusion", 0); @@ -22,11 +21,12 @@ ngraph::pass::MultiplyConvolutionFusion::MultiplyConvolutionFusion() { MATCHER_SCOPE(MultiplyConvolutionFusion); auto input_pattern = pattern::any_input(); auto mul_const_pattern = ngraph::pattern::wrap_type(); - auto mul_pattern = ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); + auto mul_pattern = + ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); auto weights_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); auto conv_pattern = ngraph::pattern::wrap_type({mul_pattern, weights_pattern}); - matcher_pass_callback callback = [=](pattern::Matcher & m) -> bool { + matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { const auto& pattern_to_output = m.get_pattern_value_map(); // Can't fuse Multiply to Convolution if that Multiply is part of dequantization subgraph @@ -58,8 +58,7 @@ ngraph::pass::MultiplyConvolutionFusion::MultiplyConvolutionFusion() { auto new_conv = conv->clone_with_new_inputs({input, new_weights}); new_conv->set_friendly_name(conv->get_friendly_name()); - copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, - {new_weights, new_conv}); + copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, {new_weights, new_conv}); replace_node(conv, new_conv); return true; @@ -75,11 +74,12 @@ ngraph::pass::MultiplyGroupConvolutionFusion::MultiplyGroupConvolutionFusion() { MATCHER_SCOPE(MultiplyGroupConvolutionFusion); auto input_pattern = pattern::any_input(); auto mul_const_pattern = ngraph::pattern::wrap_type(); - auto mul_pattern = ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); + auto mul_pattern = + ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); auto weights_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); auto conv_pattern = ngraph::pattern::wrap_type({mul_pattern, weights_pattern}); - matcher_pass_callback callback = [=](pattern::Matcher & m) -> bool { + matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { const auto& pattern_to_output = m.get_pattern_value_map(); // Can't fuse Multiply to Convolution if that Multiply is part of dequantization subgraph @@ -109,7 +109,10 @@ ngraph::pass::MultiplyGroupConvolutionFusion::MultiplyGroupConvolutionFusion() { if (op::util::check_for_broadcast(weights_shape, new_shape)) { return false; } - mul_const = std::make_shared(mul_const, op::Constant::create(element::u64, Shape{new_shape.size()}, new_shape), false); + mul_const = std::make_shared( + mul_const, + op::Constant::create(element::u64, Shape{new_shape.size()}, new_shape), + false); } auto weights_multiply = std::make_shared(weights, mul_const); @@ -122,8 +125,7 @@ ngraph::pass::MultiplyGroupConvolutionFusion::MultiplyGroupConvolutionFusion() { auto new_conv = conv->clone_with_new_inputs({input, new_weights}); new_conv->set_friendly_name(conv->get_friendly_name()); - copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, - {new_weights, new_conv}); + copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, {new_weights, new_conv}); replace_node(conv, new_conv); return true; @@ -139,11 +141,12 @@ ngraph::pass::MultiplyConvolutionBackpropDataFusion::MultiplyConvolutionBackprop MATCHER_SCOPE(MultiplyConvolutionBackpropDataFusion); auto input_pattern = pattern::any_input(); auto mul_const_pattern = ngraph::pattern::wrap_type(); - auto mul_pattern = ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); + auto mul_pattern = + ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); auto weights_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); auto conv_pattern = ngraph::pattern::wrap_type({mul_pattern, weights_pattern}); - matcher_pass_callback callback = [=](pattern::Matcher & m) -> bool { + matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { const auto& pattern_to_output = m.get_pattern_value_map(); // Can't fuse Multiply to Convolution if that Multiply is part of dequantization subgraph @@ -165,7 +168,7 @@ ngraph::pass::MultiplyConvolutionBackpropDataFusion::MultiplyConvolutionBackprop // In backprop, weights pixels are applied to input differently than in fprop convolution for (size_t i = 0; i < mul_const_shape.size(); i++) { if (i == 1) - continue; + continue; if (mul_const_shape[i] != 1) return false; } @@ -175,7 +178,10 @@ ngraph::pass::MultiplyConvolutionBackpropDataFusion::MultiplyConvolutionBackprop if (op::util::check_for_broadcast(weights_shape, new_shape)) { return false; } - mul_const = std::make_shared(mul_const, op::Constant::create(element::u64, Shape{new_shape.size()}, new_shape), false); + mul_const = std::make_shared( + mul_const, + op::Constant::create(element::u64, Shape{new_shape.size()}, new_shape), + false); } auto weights_multiply = std::make_shared(weights, mul_const); @@ -188,8 +194,7 @@ ngraph::pass::MultiplyConvolutionBackpropDataFusion::MultiplyConvolutionBackprop auto new_conv = conv->clone_with_new_inputs({input, new_weights}); new_conv->set_friendly_name(conv->get_friendly_name()); - copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, - {new_weights, new_conv}); + copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, {new_weights, new_conv}); replace_node(conv, new_conv); return true; @@ -199,17 +204,21 @@ ngraph::pass::MultiplyConvolutionBackpropDataFusion::MultiplyConvolutionBackprop register_matcher(m, callback); } -NGRAPH_RTTI_DEFINITION(ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion, "MultiplyGroupConvolutionBackpropDataFusion", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion, + "MultiplyGroupConvolutionBackpropDataFusion", + 0); ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion::MultiplyGroupConvolutionBackpropDataFusion() { MATCHER_SCOPE(MultiplyGroupConvolutionBackpropDataFusion); auto input_pattern = pattern::any_input(); auto mul_const_pattern = ngraph::pattern::wrap_type(); - auto mul_pattern = ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); + auto mul_pattern = + ngraph::pattern::wrap_type({input_pattern, mul_const_pattern}, pattern::consumers_count(1)); auto weights_pattern = ngraph::pattern::any_input(pattern::has_static_shape()); - auto conv_pattern = ngraph::pattern::wrap_type({mul_pattern, weights_pattern}); + auto conv_pattern = + ngraph::pattern::wrap_type({mul_pattern, weights_pattern}); - matcher_pass_callback callback = [=](pattern::Matcher & m) -> bool { + matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool { const auto& pattern_to_output = m.get_pattern_value_map(); // Can't fuse Multiply to Convolution if that Multiply is part of dequantization subgraph @@ -230,11 +239,12 @@ ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion::MultiplyGroupConvoluti // In backprop, weights pixels are applied to input differently than in fprop convolution for (size_t i = 0; i < mul_const_shape.size(); i++) { if (i == 1) - continue; + continue; if (mul_const_shape[i] != 1) return false; } - // Reshape mul_const from shape (1, C, 1, 1) to (G, C / G, 1, 1, 1) to match GroupConvolutionBackpropData weights format + // Reshape mul_const from shape (1, C, 1, 1) to (G, C / G, 1, 1, 1) to match GroupConvolutionBackpropData + // weights format auto G = mul_const_shape[1] > 1 ? weights_shape[0] : 1; auto C = mul_const_shape[1] / G; Shape new_shape{G, C, 1}; @@ -242,7 +252,10 @@ ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion::MultiplyGroupConvoluti if (op::util::check_for_broadcast(weights_shape, new_shape)) { return false; } - mul_const = std::make_shared(mul_const, op::Constant::create(element::u64, Shape{new_shape.size()}, new_shape), false); + mul_const = std::make_shared( + mul_const, + op::Constant::create(element::u64, Shape{new_shape.size()}, new_shape), + false); } auto weights_multiply = std::make_shared(weights, mul_const); @@ -255,8 +268,7 @@ ngraph::pass::MultiplyGroupConvolutionBackpropDataFusion::MultiplyGroupConvoluti auto new_conv = conv->clone_with_new_inputs({input, new_weights}); new_conv->set_friendly_name(conv->get_friendly_name()); - copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, - {new_weights, new_conv}); + copy_runtime_info({conv, pattern_to_output.at(mul_pattern).get_node_shared_ptr()}, {new_weights, new_conv}); replace_node(conv, new_conv); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp index f2702cc3649..9dc9a72c76f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mul_fake_quantize_fusion.cpp @@ -3,17 +3,16 @@ // #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::MulFakeQuantizeFusion, "MulFakeQuantizeFusion", 0); @@ -21,8 +20,8 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() { MATCHER_SCOPE(MulFakeQuantizeFusion); auto input_pattern = ngraph::pattern::any_input(); auto const_pattern = ngraph::pattern::wrap_type(); - auto mul_pattern = ngraph::pattern::wrap_type({input_pattern, const_pattern}, - pattern::consumers_count(1)); + auto mul_pattern = + ngraph::pattern::wrap_type({input_pattern, const_pattern}, pattern::consumers_count(1)); auto fq_pattern = ngraph::pattern::wrap_type({mul_pattern, ngraph::pattern::any_input(), ngraph::pattern::any_input(), @@ -34,10 +33,12 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() { const auto& type = input.get_element_type(); if (type.bitwidth() < element::f32.bitwidth()) return false; - auto fq = std::dynamic_pointer_cast(pattern_value_map.at(fq_pattern).get_node_shared_ptr()); + auto fq = + std::dynamic_pointer_cast(pattern_value_map.at(fq_pattern).get_node_shared_ptr()); if (!fq) return false; - auto mul_const = std::dynamic_pointer_cast(pattern_value_map.at(const_pattern).get_node_shared_ptr()); + auto mul_const = + std::dynamic_pointer_cast(pattern_value_map.at(const_pattern).get_node_shared_ptr()); if (!mul_const) return false; @@ -50,7 +51,9 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() { } auto mul_const_value = mul_const->cast_vector(); - if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [] (float f) -> bool { return f <= 0.0f; })) + if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [](float f) -> bool { + return f <= 0.0f; + })) return false; std::shared_ptr new_const = mul_const; @@ -75,8 +78,10 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() { if (diff > 0) { // Reshape constants like (C, 1, 1) to (1, C, 1, 1) const_shape.insert(const_shape.begin(), diff, 1); - new_const = std::make_shared(new_const, - op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), false); + new_const = std::make_shared( + new_const, + op::Constant::create(element::u64, Shape{const_shape.size()}, const_shape), + false); } // disallow constant shapes other than (N, 1, 1, ..., 1) or (1, C, 1, ..., 1) @@ -87,10 +92,10 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() { auto fq_users = fq->get_users(); // Concat LPT transformation supports per tensor quantization only - bool fq_user_is_concat = std::any_of(fq_users.begin(), fq_users.end(), - [] (const std::shared_ptr node_ptr) -> bool { - return is_type(node_ptr); - }); + bool fq_user_is_concat = + std::any_of(fq_users.begin(), fq_users.end(), [](const std::shared_ptr node_ptr) -> bool { + return is_type(node_ptr); + }); if (fq_user_is_concat) return false; } @@ -104,8 +109,8 @@ ngraph::pass::MulFakeQuantizeFusion::MulFakeQuantizeFusion() { if (!new_input_high) new_input_high = input_high_div; - auto new_fq = fq->clone_with_new_inputs({input, new_input_low, new_input_high, - fq->input_value(3), fq->input_value(4)}); + auto new_fq = + fq->clone_with_new_inputs({input, new_input_low, new_input_high, fq->input_value(3), fq->input_value(4)}); if (transformation_callback(new_fq)) return false; register_new_node(new_fq); diff --git a/src/common/transformations/src/transformations/common_optimizations/mvn_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/mvn_fusion.cpp index 3cac385c0aa..55f39d5f46c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/mvn_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/mvn_fusion.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/mvn_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::MVNFusion, "MVNFusion", 0); @@ -38,35 +38,35 @@ ngraph::pass::MVNFusionWithoutConstants::MVNFusionWithoutConstants() { // (x - ReduceMean(x, axes)) // `------mean1-------' auto mean1_axes = pattern::wrap_type(); - auto mean1 = pattern::wrap_type({ x, mean1_axes }); + auto mean1 = pattern::wrap_type({x, mean1_axes}); // (x - ReduceMean(x, axes)) // `-sub1------------------' - auto sub1 = pattern::wrap_type({ x, mean1 }); + auto sub1 = pattern::wrap_type({x, mean1}); // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) // `---mean2----------' auto mean2_axes = pattern::wrap_type(); - auto mean2 = pattern::wrap_type({ x, mean2_axes }); + auto mean2 = pattern::wrap_type({x, mean2_axes}); // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) // `-sub2------------------' - auto sub2 = pattern::wrap_type({ x, mean2 }); + auto sub2 = pattern::wrap_type({x, mean2}); - const auto reuseSub1OrNot = std::make_shared(OutputVector{ sub1, sub2 }); + const auto reuseSub1OrNot = std::make_shared(OutputVector{sub1, sub2}); - auto cast = pattern::wrap_type({ reuseSub1OrNot }); - const auto hasConvertOrNot = std::make_shared(OutputVector{ cast, reuseSub1OrNot }); + auto cast = pattern::wrap_type({reuseSub1OrNot}); + const auto hasConvertOrNot = std::make_shared(OutputVector{cast, reuseSub1OrNot}); // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) // `---------------------power--' - auto const_2 = pattern::wrap_type(value_is_equal_to({ 2.0 })); - auto power = pattern::wrap_type({ hasConvertOrNot, const_2 }); + auto const_2 = pattern::wrap_type(value_is_equal_to({2.0})); + auto power = pattern::wrap_type({hasConvertOrNot, const_2}); // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) // `---mean3--------------------------------' auto mean3_axes = pattern::wrap_type(); - auto mean3 = pattern::wrap_type({ power, mean3_axes }); + auto mean3 = pattern::wrap_type({power, mean3_axes}); auto const_0_5 = pattern::wrap_type(value_is_equal_to({0.5})); auto eps = pattern::wrap_type(); @@ -74,48 +74,51 @@ ngraph::pass::MVNFusionWithoutConstants::MVNFusionWithoutConstants() { // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) // `--Power--------------------------------------' - auto power_sqrt_os = pattern::wrap_type({ mean3, const_0_5 }); - auto sqrt_os = pattern::wrap_type({ mean3 }); - const auto powerOrSqrt_os = std::make_shared(OutputVector{ power_sqrt_os, sqrt_os }); + auto power_sqrt_os = pattern::wrap_type({mean3, const_0_5}); + auto sqrt_os = pattern::wrap_type({mean3}); + const auto powerOrSqrt_os = std::make_shared(OutputVector{power_sqrt_os, sqrt_os}); // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) + eps // `----------------------------------------------Add---' - auto add_eps_os = pattern::wrap_type({ powerOrSqrt_os, eps }); + auto add_eps_os = pattern::wrap_type({powerOrSqrt_os, eps}); // ------------------- INSIDE_SQRT ---------------------- // (Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps)) // `-----------------------------------------------Add---' - auto add_eps_is = pattern::wrap_type({ mean3, eps }); + auto add_eps_is = pattern::wrap_type({mean3, eps}); // Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2)) // `--Power--------------------------------------' - auto power_sqrt_is = pattern::wrap_type({ add_eps_is, const_0_5 }); - auto sqrt_is = pattern::wrap_type({ add_eps_is }); - const auto powerOrSqrt_is = std::make_shared(OutputVector{ power_sqrt_is, sqrt_is }); + auto power_sqrt_is = pattern::wrap_type({add_eps_is, const_0_5}); + auto sqrt_is = pattern::wrap_type({add_eps_is}); + const auto powerOrSqrt_is = std::make_shared(OutputVector{power_sqrt_is, sqrt_is}); - auto outsideOrInside = std::make_shared(OutputVector{ add_eps_os, powerOrSqrt_is }); + auto outsideOrInside = std::make_shared(OutputVector{add_eps_os, powerOrSqrt_is}); // Final Divide - auto const_neg_1 = pattern::wrap_type(value_is_equal_to({ -1 })); - auto power_div = pattern::wrap_type({ outsideOrInside, const_neg_1 }); - auto div = pattern::wrap_type({ sub1, power_div }); + auto const_neg_1 = pattern::wrap_type(value_is_equal_to({-1})); + auto power_div = pattern::wrap_type({outsideOrInside, const_neg_1}); + auto div = pattern::wrap_type({sub1, power_div}); - auto div_alt = pattern::wrap_type({ sub1, outsideOrInside }); - const auto powerMulOrDiv = std::make_shared(OutputVector{ div, div_alt }); + auto div_alt = pattern::wrap_type({sub1, outsideOrInside}); + const auto powerMulOrDiv = std::make_shared(OutputVector{div, div_alt}); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(x); - auto const_eps_node = std::dynamic_pointer_cast(pattern_to_output.at(eps).get_node_shared_ptr()); + auto const_eps_node = + std::dynamic_pointer_cast(pattern_to_output.at(eps).get_node_shared_ptr()); float eps_value; if (!op::util::get_single_value(const_eps_node, eps_value)) { return false; } - auto axes_1_node = std::dynamic_pointer_cast(pattern_to_output.at(mean1_axes).get_node_shared_ptr()); - auto axes_3_node = std::dynamic_pointer_cast(pattern_to_output.at(mean3_axes).get_node_shared_ptr()); + auto axes_1_node = + std::dynamic_pointer_cast(pattern_to_output.at(mean1_axes).get_node_shared_ptr()); + auto axes_3_node = + std::dynamic_pointer_cast(pattern_to_output.at(mean3_axes).get_node_shared_ptr()); if (!axes_1_node || !axes_3_node) { return false; @@ -128,7 +131,8 @@ ngraph::pass::MVNFusionWithoutConstants::MVNFusionWithoutConstants() { return false; } if (pattern_to_output.count(mean2_axes)) { - auto axes_2_node = std::dynamic_pointer_cast(pattern_to_output.at(mean2_axes).get_node_shared_ptr()); + auto axes_2_node = std::dynamic_pointer_cast( + pattern_to_output.at(mean2_axes).get_node_shared_ptr()); if (!axes_2_node) { return false; } @@ -138,10 +142,10 @@ ngraph::pass::MVNFusionWithoutConstants::MVNFusionWithoutConstants() { } } - ngraph::NodeVector nodes_to_copy_info({ pattern_to_output.at(mean1).get_node_shared_ptr(), - pattern_to_output.at(sub1).get_node_shared_ptr(), - pattern_to_output.at(power).get_node_shared_ptr(), - pattern_to_output.at(mean3).get_node_shared_ptr() }); + ngraph::NodeVector nodes_to_copy_info({pattern_to_output.at(mean1).get_node_shared_ptr(), + pattern_to_output.at(sub1).get_node_shared_ptr(), + pattern_to_output.at(power).get_node_shared_ptr(), + pattern_to_output.at(mean3).get_node_shared_ptr()}); op::MVNEpsMode mode; if (pattern_to_output.count(add_eps_os)) { @@ -202,71 +206,78 @@ ngraph::pass::MVNFusionWithConstantsInside::MVNFusionWithConstantsInside() { // (x - ReduceMean(x, axes))^2 // `------mean1-------' auto mean1_axes = pattern::wrap_type(); - auto mean1 = pattern::wrap_type({ x, mean1_axes }); + auto mean1 = pattern::wrap_type({x, mean1_axes}); // (x - ReduceMean(x, axes))^2 // `-squared_difference------' - auto squared_difference = pattern::wrap_type({ x, mean1 }); + auto squared_difference = pattern::wrap_type({x, mean1}); // 1 / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) // `---mean2--------------------------------' auto mean2_axes = pattern::wrap_type(); - auto mean2 = pattern::wrap_type({ squared_difference, mean2_axes }); + auto mean2 = pattern::wrap_type({squared_difference, mean2_axes}); // 1 / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) // `------------------------------------------add--' auto eps = pattern::wrap_type(); - auto add_eps = pattern::wrap_type({ mean2, eps }); + auto add_eps = pattern::wrap_type({mean2, eps}); // 1 / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) // `-power-------------------------------------------------' auto const_0_5 = pattern::wrap_type(value_is_equal_to({-0.5})); - auto power = pattern::wrap_type({ add_eps, const_0_5 }); + auto power = pattern::wrap_type({add_eps, const_0_5}); // gamma / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) // `---mul1----------------------------------------------------' auto gamma = pattern::wrap_type(); - auto mul1 = pattern::wrap_type({ power, gamma }); + auto mul1 = pattern::wrap_type({power, gamma}); // x * gamma / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) // `---mul2--------------------------------------------------------' - auto mul2 = pattern::wrap_type({ x, mul1 }); + auto mul2 = pattern::wrap_type({x, mul1}); // ReduceMean(x, axes) * gamma / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) - beta // `-------------------mul3----------------------------------------------------------' - auto mul3 = pattern::wrap_type({ mul1, mean1 }); + auto mul3 = pattern::wrap_type({mul1, mean1}); // beta - ReduceMean(x, axes) * gamma / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) // `---sub-----------------------------------------------------------------------------------' auto beta = pattern::wrap_type(); - auto sub = pattern::wrap_type({ beta, mul3 }); + auto sub = pattern::wrap_type({beta, mul3}); // Final Add // x * gamma / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) + // beta - ReduceMean(x, axes) * gamma / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) = // gamma * (x - ReduceMean(x, axes)) / Sqrt(ReduceMean((x - ReduceMean(x, axes)) ^ 2) + eps) + beta - auto add = pattern::wrap_type({ mul2, sub }); + auto add = pattern::wrap_type({mul2, sub}); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); auto x_output = pattern_to_output.at(x); - auto const_0_5_node = std::dynamic_pointer_cast(pattern_to_output.at(const_0_5).get_node_shared_ptr()); - auto const_gamma_node = std::dynamic_pointer_cast(pattern_to_output.at(gamma).get_node_shared_ptr()); - auto const_beta_node = std::dynamic_pointer_cast(pattern_to_output.at(beta).get_node_shared_ptr()); - auto const_eps_node = std::dynamic_pointer_cast(pattern_to_output.at(eps).get_node_shared_ptr()); + auto const_0_5_node = + std::dynamic_pointer_cast(pattern_to_output.at(const_0_5).get_node_shared_ptr()); + auto const_gamma_node = + std::dynamic_pointer_cast(pattern_to_output.at(gamma).get_node_shared_ptr()); + auto const_beta_node = + std::dynamic_pointer_cast(pattern_to_output.at(beta).get_node_shared_ptr()); + auto const_eps_node = + std::dynamic_pointer_cast(pattern_to_output.at(eps).get_node_shared_ptr()); if (!const_0_5_node || !const_beta_node || !const_gamma_node || !const_eps_node) { return false; } float eps_value; - bool valid_constant_values = op::util::has_constant_value(const_0_5_node, -0.5) && op::util::get_single_value(const_eps_node, eps_value); + bool valid_constant_values = op::util::has_constant_value(const_0_5_node, -0.5) && + op::util::get_single_value(const_eps_node, eps_value); if (!valid_constant_values) { return false; } - auto axes_1_node = std::dynamic_pointer_cast(pattern_to_output.at(mean1_axes).get_node_shared_ptr()); - auto axes_2_node = std::dynamic_pointer_cast(pattern_to_output.at(mean2_axes).get_node_shared_ptr()); + auto axes_1_node = + std::dynamic_pointer_cast(pattern_to_output.at(mean1_axes).get_node_shared_ptr()); + auto axes_2_node = + std::dynamic_pointer_cast(pattern_to_output.at(mean2_axes).get_node_shared_ptr()); if (!axes_1_node || !axes_2_node) { return false; } @@ -277,11 +288,12 @@ ngraph::pass::MVNFusionWithConstantsInside::MVNFusionWithConstantsInside() { return false; } - auto mvn = std::make_shared(x_output, axes_1_node, true, eps_value, op::MVNEpsMode::INSIDE_SQRT); + auto mvn = + std::make_shared(x_output, axes_1_node, true, eps_value, op::MVNEpsMode::INSIDE_SQRT); auto mul_gamma = std::make_shared(mvn, const_gamma_node); auto add_beta = std::make_shared(mul_gamma, const_beta_node); - ngraph::copy_runtime_info({ pattern_to_output.at(mean1).get_node_shared_ptr(), + ngraph::copy_runtime_info({pattern_to_output.at(mean1).get_node_shared_ptr(), pattern_to_output.at(squared_difference).get_node_shared_ptr(), pattern_to_output.at(add_eps).get_node_shared_ptr(), pattern_to_output.at(power).get_node_shared_ptr(), @@ -289,8 +301,8 @@ ngraph::pass::MVNFusionWithConstantsInside::MVNFusionWithConstantsInside() { pattern_to_output.at(mul2).get_node_shared_ptr(), pattern_to_output.at(mul3).get_node_shared_ptr(), pattern_to_output.at(sub).get_node_shared_ptr(), - pattern_to_output.at(add).get_node_shared_ptr() }, - { mvn, const_gamma_node, mul_gamma, const_beta_node, add_beta }); + pattern_to_output.at(add).get_node_shared_ptr()}, + {mvn, const_gamma_node, mul_gamma, const_beta_node, add_beta}); add_beta->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::replace_node(m.get_match_root(), add_beta); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.cpp index 4478bfb56ed..47187c18ab5 100644 --- a/src/common/transformations/src/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/nearest_neighbor_upsampling_fusion.cpp @@ -2,21 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/nearest_neighbor_upsampling_fusion.hpp" -#include "transformations/utils/utils.hpp" #include #include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include +#include "itt.hpp" +#include "transformations/utils/utils.hpp" namespace { using namespace ngraph; @@ -28,7 +28,8 @@ using namespace ngraph; // [S_1, S_2, ..., S_i, ..., S_{r - 2}], // when the shape, 's', has the form (1), and the empty vector otherwise. std::vector get_scales_from_mul_const_shape(const Shape& s, uint64_t input_rank) { - if (input_rank < 4 || s.size() != 2 * input_rank - 2) return {}; + if (input_rank < 4 || s.size() != 2 * input_rank - 2) + return {}; ngraph::Shape expected_shape(2 * input_rank - 2, 1); std::vector scales(input_rank - 2); @@ -37,7 +38,8 @@ std::vector get_scales_from_mul_const_shape(const Shape& s, uint64_t inpu scales[i - 1] = static_cast(s[2 * i]); } - if (s != expected_shape) return {}; + if (s != expected_shape) + return {}; return scales; } @@ -48,21 +50,28 @@ bool check_concat_1(const std::shared_ptr& concat, const Shape& const auto inputs = concat->input_values(); size_t num_of_input_values = inputs.size(); - if (num_of_input_values != 2 * rank - 2) return false; + if (num_of_input_values != 2 * rank - 2) + return false; std::vector input_constants(num_of_input_values, 1); for (size_t i = 1; i < num_of_input_values; ++i) { const auto& current_input = std::dynamic_pointer_cast(inputs[i].get_node_shared_ptr()); - if (!current_input) return false; + if (!current_input) + return false; - const auto current_input_axis = std::dynamic_pointer_cast(current_input->input_value(1).get_node_shared_ptr()); - if (!current_input_axis || current_input_axis->cast_vector() != std::vector{0}) return false; + const auto current_input_axis = + std::dynamic_pointer_cast(current_input->input_value(1).get_node_shared_ptr()); + if (!current_input_axis || current_input_axis->cast_vector() != std::vector{0}) + return false; - const auto unsqueezed_const = std::dynamic_pointer_cast(current_input->input_value(0).get_node_shared_ptr()); - if (!unsqueezed_const) return false; + const auto unsqueezed_const = + std::dynamic_pointer_cast(current_input->input_value(0).get_node_shared_ptr()); + if (!unsqueezed_const) + return false; const auto unsqueezed_const_value = unsqueezed_const->cast_vector(); - if (unsqueezed_const_value.size() != 1) return false; + if (unsqueezed_const_value.size() != 1) + return false; input_constants[i] = unsqueezed_const_value[0]; } @@ -73,59 +82,69 @@ bool check_concat_1(const std::shared_ptr& concat, const Shape& } expected_input_constants.back() = static_cast(shape.back()); - if (input_constants != expected_input_constants) return false; + if (input_constants != expected_input_constants) + return false; return true; } -// In this transformation 'concat_2' must have r inputs (where r is an output rank of the root of the transformation pattern). -// And (r - 1) inputs must be unsqueezed constants, and the list of these constants is +// In this transformation 'concat_2' must have r inputs (where r is an output rank of the root of the transformation +// pattern). And (r - 1) inputs must be unsqueezed constants, and the list of these constants is // [newD_1, newD_2, ..., newD_{r - 2}, C], // where C is number of channels in the output shape of the root of the transformation pattern. // // This function gets a new spatial shape from unsqueezed constants of 'concat_2', that is, the vector with elements // [newD_1, newD_2, ..., newD_{r - 2}]. -std::vector get_new_spatial_shape_from_concat_2(const std::shared_ptr& concat, const Shape& input_shape) { +std::vector get_new_spatial_shape_from_concat_2(const std::shared_ptr& concat, + const Shape& input_shape) { size_t rank = input_shape.size(); const auto inputs = concat->input_values(); size_t num_of_input_values = inputs.size(); - if (num_of_input_values != rank) return {}; + if (num_of_input_values != rank) + return {}; std::vector input_constants(num_of_input_values - 1, 0); for (size_t i = 1; i < num_of_input_values; ++i) { const auto& current_input = std::dynamic_pointer_cast(inputs[i].get_node_shared_ptr()); - if (!current_input) return {}; + if (!current_input) + return {}; - const auto current_input_axis = std::dynamic_pointer_cast(current_input->input_value(1).get_node_shared_ptr()); - if (!current_input_axis || current_input_axis->cast_vector() != std::vector{0}) return {}; + const auto current_input_axis = + std::dynamic_pointer_cast(current_input->input_value(1).get_node_shared_ptr()); + if (!current_input_axis || current_input_axis->cast_vector() != std::vector{0}) + return {}; - const auto unsqueezed_const = std::dynamic_pointer_cast(current_input->input_value(0).get_node_shared_ptr()); - if (!unsqueezed_const) return {}; + const auto unsqueezed_const = + std::dynamic_pointer_cast(current_input->input_value(0).get_node_shared_ptr()); + if (!unsqueezed_const) + return {}; const auto unsqueezed_const_value = unsqueezed_const->cast_vector(); - if (unsqueezed_const_value.size() != 1) return {}; + if (unsqueezed_const_value.size() != 1) + return {}; input_constants[i - 1] = unsqueezed_const_value[0]; } - if (input_constants.back() != static_cast(input_shape.back())) return {}; + if (input_constants.back() != static_cast(input_shape.back())) + return {}; input_constants.pop_back(); return input_constants; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::NearestNeighborUpsamplingFusion, "NearestNeighborUpsamplingFusion", 0); ngraph::pass::NearestNeighborUpsamplingFusion::NearestNeighborUpsamplingFusion() { MATCHER_SCOPE(NearestNeighborUpsamplingFusion); - // This transformation looks for Interpolate layer implemented using simple operations, namely ShapeOf, StridedSlice, Concat, - // Reshape, Mul, and replaces found pattern with a sequence of Shape, StridedSlice, Const, Mul, Interpolate. - // Found pattern (for 4D case, in a general case the pattern is similar): + // This transformation looks for Interpolate layer implemented using simple operations, namely ShapeOf, + // StridedSlice, Concat, Reshape, Mul, and replaces found pattern with a sequence of Shape, StridedSlice, Const, + // Mul, Interpolate. Found pattern (for 4D case, in a general case the pattern is similar): // // |---------| // | op | @@ -147,36 +166,35 @@ ngraph::pass::NearestNeighborUpsamplingFusion::NearestNeighborUpsamplingFusion() // | | 0|<-----------------------| | 0|<----------| // | | | | | // | | | | | - // | | | |-------------| |------------| | | |-------------| |--------------| - // | | | | | | Constant | | | | | | Constant | - // | | | | 0|<--| value: H | | | | 0|<--| value: new_H | - // | | | | | |------------| | | | | |--------------| - // | | | | | | | | | - // | | | | Unsqueeze | |------------| | | | Unsqueeze | |------------| - // | | | | | | Constant | | | | | | Constant | - // | | 1|<-----| 1|<--| value: 0 | | 1|<-----| 1|<--| value: 0 | - // | | | |-------------| |------------| | | |-------------| |------------| - // | | | | | - // | | | |-------------| |------------| | | |-------------| |--------------| - // | | | | | | Constant | | | | | | Constant | - // | | | | 0|<--| value: 1 | | | | 0|<--| value: new_W | - // | | | | | |------------| | | | | |--------------| - // | | | | | | | | | - // | | | | Unsqueeze | |------------| | | | Unsqueeze | |------------| - // | | | | | | Constant | | | | | | Constant | - // | | 2|<-----| 1|<--| value: 0 | | 2|<-----| 1|<--| value: 0 | - // | | | |-------------| |------------| | | |-------------| |------------| - // | | | | | - // | | | |-------------| |------------| | | |-------------| |------------| - // | | | | | | Constant | | | | | | Constant | - // | | | | 0|<--| value: W | | | | 0|<--| value: C | - // | | | | | |------------| | | | | |------------| - // | | | | | | | | | - // | | | | Unsqueeze | |------------| | | | Unsqueeze | |------------| - // | | | | | | Constant | | | | | | Constant | - // | | 3|<-----| 1|<--| value: 0 | | 3|<-----| 1|<--| value: 0 | - // | | | |-------------| |------------| |------|--------| |-------------| |------------| - // | | | | + // | | | |-------------| |------------| | | |-------------| + // |--------------| | | | | | | Constant | | | | + // | | Constant | | | | | 0|<--| value: H | | | | + // 0|<--| value: new_H | | | | | | |------------| | | + // | | |--------------| | | | | | | | | + // | | | | | Unsqueeze | |------------| | | | Unsqueeze | + // |------------| | | | | | | Constant | | | | + // | | Constant | | | 1|<-----| 1|<--| value: 0 | | 1|<-----| 1|<--| + // value: 0 | | | | |-------------| |------------| | | + // |-------------| |------------| | | | | | | | + // | |-------------| |------------| | | |-------------| |--------------| | | | + // | | | Constant | | | | | | Constant | | | | | + // 0|<--| value: 1 | | | | 0|<--| value: new_W | | | | + // | | |------------| | | | | |--------------| | | | | + // | | | | | | | | | Unsqueeze | + // |------------| | | | Unsqueeze | |------------| | | | | + // | | Constant | | | | | | Constant | | | 2|<-----| 1|<--| + // value: 0 | | 2|<-----| 1|<--| value: 0 | | | | + // |-------------| |------------| | | |-------------| |------------| | | | | | + // | | | |-------------| |------------| | | |-------------| + // |------------| | | | | | | Constant | | | | + // | | Constant | | | | | 0|<--| value: W | | | | + // 0|<--| value: C | | | | | | |------------| | | | + // | |------------| | | | | | | | | | + // | | | | Unsqueeze | |------------| | | | Unsqueeze | + // |------------| | | | | | | Constant | | | | + // | | Constant | | | 3|<-----| 1|<--| value: 0 | | 3|<-----| 1|<--| + // value: 0 | | | | |-------------| |------------| |------|--------| + // |-------------| |------------| | | | | // | | | |-------------| |------------| | // | | | | | | Constant | | // | | | | 0|<--| value: 1 | | @@ -230,8 +248,8 @@ ngraph::pass::NearestNeighborUpsamplingFusion::NearestNeighborUpsamplingFusion() // ... // D_i for the input port 2 * (i - 1) + 1 of 'concat_1' and 1 for the input port 2 * i of 'concat_1'; // ... - // D_{r - 2} for the input port 2 * ((r - 2) - 1) + 1 of 'concat_1' and 1 for the input port 2 * (r - 2) of 'concat_1'; - // C for the input port 2 * (r - 2) + 1 of 'concat_1'; + // D_{r - 2} for the input port 2 * ((r - 2) - 1) + 1 of 'concat_1' and 1 for the input port 2 * (r - 2) of + // 'concat_1'; C for the input port 2 * (r - 2) + 1 of 'concat_1'; // 4) unsqueezed constants for 'concat_2' are // newD_1 for the input port 1 of 'concat_1'; // newD_2 for the input port 2 of 'concat_1'; @@ -240,8 +258,8 @@ ngraph::pass::NearestNeighborUpsamplingFusion::NearestNeighborUpsamplingFusion() // ... // newD_{r - 2} for the input port (r - 2) of 'concat_1'; // C for the input port (r - 2) + 1 of 'concat_1'; - // 5) the shape of 'mul_const' is [1, 1, S_1, 1, S_2, ..., 1, S_i, ..., 1, S_{r - 2}, 1] where S_i is a scale for the axis i; - // 6) all elements of 'mul_const' are equal to 1.0. + // 5) the shape of 'mul_const' is [1, 1, S_1, 1, S_2, ..., 1, S_i, ..., 1, S_{r - 2}, 1] where S_i is a scale + // for the axis i; 6) all elements of 'mul_const' are equal to 1.0. // // Such subgraph can be replaced by the Interpolate node with // 1) mode='nearest' and shape_calculation_mode='scales'; @@ -259,48 +277,75 @@ ngraph::pass::NearestNeighborUpsamplingFusion::NearestNeighborUpsamplingFusion() auto reshape_2 = pattern::wrap_type({mul, concat_2}); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - const auto &pattern_to_output = m.get_pattern_value_map(); + const auto& pattern_to_output = m.get_pattern_value_map(); - const auto reshape_2_node = std::dynamic_pointer_cast(pattern_to_output.at(reshape_2).get_node_shared_ptr()); - const auto mul_node = std::dynamic_pointer_cast(pattern_to_output.at(mul).get_node_shared_ptr()); - if (!reshape_2_node || !mul_node) return false; + const auto reshape_2_node = + std::dynamic_pointer_cast(pattern_to_output.at(reshape_2).get_node_shared_ptr()); + const auto mul_node = + std::dynamic_pointer_cast(pattern_to_output.at(mul).get_node_shared_ptr()); + if (!reshape_2_node || !mul_node) + return false; - const auto mul_const_node = std::dynamic_pointer_cast(pattern_to_output.at(mul_const).get_node_shared_ptr()); - if (!mul_const_node) return false; + const auto mul_const_node = + std::dynamic_pointer_cast(pattern_to_output.at(mul_const).get_node_shared_ptr()); + if (!mul_const_node) + return false; - const auto reshape_1_node = std::dynamic_pointer_cast(pattern_to_output.at(reshape_1).get_node_shared_ptr()); - if (!reshape_1_node) return false; + const auto reshape_1_node = + std::dynamic_pointer_cast(pattern_to_output.at(reshape_1).get_node_shared_ptr()); + if (!reshape_1_node) + return false; uint64_t input_rank = static_cast(reshape_1_node->get_input_partial_shape(0).rank().get_length()); const auto mul_const_shape = mul_const_node->get_output_shape(0); const auto scales = get_scales_from_mul_const_shape(mul_const_shape, input_rank); - if (scales.empty() || std::all_of(scales.begin(), scales.end(), [](float s) { return s == 1.0f;})) { return false; } + if (scales.empty() || std::all_of(scales.begin(), scales.end(), [](float s) { + return s == 1.0f; + })) { + return false; + } const auto mul_const_value = mul_const_node->cast_vector(); - if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [](float x){ return x != 1.0f; })) { return false; } + if (std::any_of(mul_const_value.begin(), mul_const_value.end(), [](float x) { + return x != 1.0f; + })) { + return false; + } - const auto concat_1_node = std::dynamic_pointer_cast(pattern_to_output.at(concat_1).get_node_shared_ptr()); - if (!concat_1_node) return false; + const auto concat_1_node = + std::dynamic_pointer_cast(pattern_to_output.at(concat_1).get_node_shared_ptr()); + if (!concat_1_node) + return false; const auto input_shape = reshape_1_node->get_input_shape(0); - if (!check_concat_1(concat_1_node, input_shape)) return false; + if (!check_concat_1(concat_1_node, input_shape)) + return false; - const auto concat_2_node = std::dynamic_pointer_cast(pattern_to_output.at(concat_2).get_node_shared_ptr()); - if (!concat_2_node) return false; + const auto concat_2_node = + std::dynamic_pointer_cast(pattern_to_output.at(concat_2).get_node_shared_ptr()); + if (!concat_2_node) + return false; const auto new_spatial_shape = get_new_spatial_shape_from_concat_2(concat_2_node, input_shape); - if (new_spatial_shape.empty()) return false; + if (new_spatial_shape.empty()) + return false; - const auto ss_before_concat_1 = std::dynamic_pointer_cast(concat_1_node->input_value(0).get_node_shared_ptr()); - const auto ss_before_concat_2 = std::dynamic_pointer_cast(concat_2_node->input_value(0).get_node_shared_ptr()); - if (!ss_before_concat_1 || !ss_before_concat_2 || ss_before_concat_1.get() != ss_before_concat_2.get()) return false; + const auto ss_before_concat_1 = + std::dynamic_pointer_cast(concat_1_node->input_value(0).get_node_shared_ptr()); + const auto ss_before_concat_2 = + std::dynamic_pointer_cast(concat_2_node->input_value(0).get_node_shared_ptr()); + if (!ss_before_concat_1 || !ss_before_concat_2 || ss_before_concat_1.get() != ss_before_concat_2.get()) + return false; - const auto shapeof_node = std::dynamic_pointer_cast(ss_before_concat_1->input_value(0).get_node_shared_ptr()); - if (!shapeof_node) return false; + const auto shapeof_node = + std::dynamic_pointer_cast(ss_before_concat_1->input_value(0).get_node_shared_ptr()); + if (!shapeof_node) + return false; const auto before_shapeof = shapeof_node->input_value(0); const auto before_reshape_1 = reshape_1_node->input_value(0); - if (before_shapeof.get_node() != before_reshape_1.get_node()) return false; + if (before_shapeof.get_node() != before_reshape_1.get_node()) + return false; opset8::Interpolate::InterpolateAttrs attrs; attrs.mode = opset8::Interpolate::InterpolateMode::NEAREST; @@ -321,11 +366,13 @@ ngraph::pass::NearestNeighborUpsamplingFusion::NearestNeighborUpsamplingFusion() std::iota(axes.begin(), axes.end(), static_cast(1)); const auto axes_node = opset8::Constant::create(element::i64, {axes.size()}, axes); - auto interpolate = register_new_node(before_shapeof, sizes_node, scales_node, axes_node, attrs); + auto interpolate = + register_new_node(before_shapeof, sizes_node, scales_node, axes_node, attrs); interpolate->set_friendly_name(reshape_2_node->get_friendly_name()); - copy_runtime_info({reshape_2_node, mul_node, mul_const_node, concat_1_node, concat_2_node, ss_before_concat_1, shapeof_node}, - {scales_node, sizes_node, axes_node, interpolate}); + copy_runtime_info( + {reshape_2_node, mul_node, mul_const_node, concat_1_node, concat_2_node, ss_before_concat_1, shapeof_node}, + {scales_node, sizes_node, axes_node, interpolate}); replace_node(reshape_2_node, interpolate); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp index 22fb21b074a..fb508dbba33 100644 --- a/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/nop_elimination.cpp @@ -2,18 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include #include -#include - +#include #include #include +#include #include -#include +#include #include #include -#include + +#include "itt.hpp" using namespace std; using namespace ngraph; @@ -56,8 +56,7 @@ static bool simplify_gather(std::shared_ptr node) { // op has Nop // check if the indices is constant - auto constant_indices = - ov::as_type_ptr(gather->input_value(1).get_node_shared_ptr()); + auto constant_indices = ov::as_type_ptr(gather->input_value(1).get_node_shared_ptr()); if (!constant_indices) { return false; } else { @@ -75,8 +74,7 @@ static bool simplify_gather(std::shared_ptr node) { static bool eliminate_nop(const std::shared_ptr& node) { // skip if shapes are dynamic - if (node->get_input_partial_shape(0).is_dynamic() || - node->get_output_partial_shape(0).is_dynamic()) { + if (node->get_input_partial_shape(0).is_dynamic() || node->get_output_partial_shape(0).is_dynamic()) { return false; } @@ -99,8 +97,7 @@ static bool eliminate_reshape_v1(const std::shared_ptr& node) { } // eliminate redundant reshape, squeeze, or unsqueeze auto input_node = input.get_node_shared_ptr(); - if (ov::as_type_ptr(input_node) || - ov::as_type_ptr(input_node) || + if (ov::as_type_ptr(input_node) || ov::as_type_ptr(input_node) || ov::as_type_ptr(input_node)) { if (input_node->get_output_target_inputs(0).size() != 1) return false; @@ -114,8 +111,7 @@ static bool eliminate_reshape_v1(const std::shared_ptr& node) { std::vector vi; vi.assign(shape.begin(), shape.end()); auto pat = opset3::Constant::create(element::i64, Shape{vi.size()}, vi); - auto new_reshape = - make_shared(input.get_node()->input_value(0), pat, false); + auto new_reshape = make_shared(input.get_node()->input_value(0), pat, false); new_reshape->set_friendly_name(node->get_friendly_name()); copy_runtime_info({input_node, node}, new_reshape); replace_node(node, new_reshape); @@ -176,8 +172,7 @@ static bool replace_squeeze_unsqueeze(const std::shared_ptr& node) { } } -static std::vector get_unsqueeze_axes(const PartialShape& data_shape, - const PartialShape& out_shape) { +static std::vector get_unsqueeze_axes(const PartialShape& data_shape, const PartialShape& out_shape) { std::vector axes; int64_t i = 0; for (auto o = 0; o < out_shape.rank().get_length(); o++) { @@ -192,8 +187,7 @@ static std::vector get_unsqueeze_axes(const PartialShape& data_shape, return axes; } -static std::vector get_squeeze_axes(const PartialShape& data_shape, - const PartialShape& out_shape) { +static std::vector get_squeeze_axes(const PartialShape& data_shape, const PartialShape& out_shape) { std::vector axes; int64_t out_i = 0; for (auto i = 0; i < data_shape.rank().get_length(); i++) { @@ -223,8 +217,7 @@ static bool eliminate_unsqueeze(const std::shared_ptr& node) { auto replace_unsqueeze_only = [&](const vector& axes) { auto axes_const = opset3::Constant::create(element::i64, Shape{axes.size()}, axes); auto new_unsq = make_shared(input->input_value(0), axes_const); - if (unsqueeze->get_output_partial_shape(0).same_scheme( - new_unsq->get_output_partial_shape(0))) { + if (unsqueeze->get_output_partial_shape(0).same_scheme(new_unsq->get_output_partial_shape(0))) { return replace_node_update_name(unsqueeze, new_unsq); } return false; @@ -250,11 +243,9 @@ static bool eliminate_unsqueeze(const std::shared_ptr& node) { // check if single squeeze can handle this auto axes = get_squeeze_axes(data_shape, out_shape); if (data_shape.rank().get_length() - static_cast(axes.size()) == out_shape.rank().get_length()) { - auto axes_const = - opset3::Constant::create(element::i64, Shape{axes.size()}, axes); + auto axes_const = opset3::Constant::create(element::i64, Shape{axes.size()}, axes); auto new_sq = make_shared(input->input_value(0), axes_const); - if (unsqueeze->get_output_partial_shape(0).same_scheme( - new_sq->get_output_partial_shape(0))) { + if (unsqueeze->get_output_partial_shape(0).same_scheme(new_sq->get_output_partial_shape(0))) { return replace_node_update_name(unsqueeze, new_sq); } return false; @@ -277,29 +268,28 @@ static bool eliminate_unsqueeze(const std::shared_ptr& node) { } #define ECHO(NAME) #NAME -#define STR(NAME) ECHO(NAME) -#define SIMPLE_MATCHER_PASS_DEFINITION(NAME, OP, FUNC) \ -class NAME : public ngraph::pass::MatcherPass { \ -public: \ -NGRAPH_RTTI_DECLARATION; \ -NAME() { \ - MATCHER_SCOPE(NAME); \ - auto match_node = ngraph::pattern::wrap_type(); \ - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { \ - return FUNC(m.get_match_root()); \ - }; \ - auto m = std::make_shared(match_node, matcher_name); \ - register_matcher(m, callback); \ -} \ -}; \ -NGRAPH_RTTI_DEFINITION(NAME, STR(NAME), 0); +#define STR(NAME) ECHO(NAME) +#define SIMPLE_MATCHER_PASS_DEFINITION(NAME, OP, FUNC) \ + class NAME : public ngraph::pass::MatcherPass { \ + public: \ + NGRAPH_RTTI_DECLARATION; \ + NAME() { \ + MATCHER_SCOPE(NAME); \ + auto match_node = ngraph::pattern::wrap_type(); \ + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { \ + return FUNC(m.get_match_root()); \ + }; \ + auto m = std::make_shared(match_node, matcher_name); \ + register_matcher(m, callback); \ + } \ + }; \ + NGRAPH_RTTI_DEFINITION(NAME, STR(NAME), 0); SIMPLE_MATCHER_PASS_DEFINITION(EliminateReshape, opset3::Reshape, eliminate_reshape_v1); SIMPLE_MATCHER_PASS_DEFINITION(EliminateUnsqueeze, opset3::Unsqueeze, eliminate_unsqueeze); SIMPLE_MATCHER_PASS_DEFINITION(EliminateBroadcast, op::v1::Broadcast, eliminate_nop); SIMPLE_MATCHER_PASS_DEFINITION(EliminateGather, opset3::Gather, simplify_gather); - NGRAPH_RTTI_DEFINITION(pass::EliminatePad, "EliminatePad", 0); pass::EliminatePad::EliminatePad() { @@ -319,8 +309,14 @@ pass::EliminatePad::EliminatePad() { const auto pad_begin_value = pad_begin_const->cast_vector(); const auto pad_end_value = pad_end_const->cast_vector(); - if (std::any_of(pad_begin_value.begin(), pad_begin_value.end(), [](int64_t value) { return value != 0; }) || - std::any_of(pad_end_value.begin(), pad_end_value.end(), [](int64_t value) { return value != 0; })) { + if (std::any_of(pad_begin_value.begin(), + pad_begin_value.end(), + [](int64_t value) { + return value != 0; + }) || + std::any_of(pad_end_value.begin(), pad_end_value.end(), [](int64_t value) { + return value != 0; + })) { return false; } @@ -360,7 +356,7 @@ pass::EliminateConvertNonZero::EliminateConvertNonZero() { auto non_zero = pattern::wrap_type({convert_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_map(); + const auto& pattern_map = m.get_pattern_map(); auto convert = pattern_map.at(convert_pattern); // remove convert convert->output(0).replace(convert->input_value(0)); @@ -502,13 +498,13 @@ pass::EliminateTranspose::EliminateTranspose() { auto transpose_pattern = pattern::wrap_type({pattern::any_input(), order}); matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_map(); + const auto& pattern_map = m.get_pattern_map(); auto order_const = std::dynamic_pointer_cast(pattern_map.at(order)); if (!order_const) { return false; } - const auto & order_values = order_const->cast_vector(); + const auto& order_values = order_const->cast_vector(); vector ref_values(order_values.size()); std::iota(ref_values.begin(), ref_values.end(), 0); if (order_values != ref_values) { @@ -529,10 +525,8 @@ pass::EliminateEltwise::EliminateEltwise() { MATCHER_SCOPE(EliminateEltwise); auto input = pattern::any_input(); auto constant_pattern = pattern::wrap_type(); - auto eltwise_pattern = pattern::wrap_type({input, constant_pattern}); + auto eltwise_pattern = + pattern::wrap_type({input, constant_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); diff --git a/src/common/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp index cc9dcebab48..4b2ca7bdd7e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/normalize_l2_fusion.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/normalize_l2_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::NormalizeL2Fusion, "NormalizeL2Fusion", 0); @@ -37,9 +37,12 @@ ngraph::pass::NormalizeL2Fusion::NormalizeL2Fusion() { const auto& pattern_to_output = m.get_pattern_value_map(); const auto data_input = pattern_to_output.at(input); - const auto exp_input = std::dynamic_pointer_cast(pattern_to_output.at(exp).get_node_shared_ptr()); - const auto axes_input = std::dynamic_pointer_cast(pattern_to_output.at(axes).get_node_shared_ptr()); - const auto eps_attr = std::dynamic_pointer_cast(pattern_to_output.at(eps_const).get_node_shared_ptr()); + const auto exp_input = + std::dynamic_pointer_cast(pattern_to_output.at(exp).get_node_shared_ptr()); + const auto axes_input = + std::dynamic_pointer_cast(pattern_to_output.at(axes).get_node_shared_ptr()); + const auto eps_attr = + std::dynamic_pointer_cast(pattern_to_output.at(eps_const).get_node_shared_ptr()); if (!exp_input || !axes_input || !eps_attr) { return false; @@ -75,9 +78,8 @@ ngraph::pass::NormalizeL2Fusion::NormalizeL2Fusion() { pattern_to_output.at(reduce_sum).get_node_shared_ptr(), pattern_to_output.at(sqrt).get_node_shared_ptr(), pattern_to_output.at(divide).get_node_shared_ptr(), - eps_node.get_node_shared_ptr() - }, - normalize_l2); + eps_node.get_node_shared_ptr()}, + normalize_l2); ngraph::replace_node(m.get_match_root(), normalize_l2); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp index 32e01e0b4e1..96a5073a41d 100644 --- a/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/optimize_strided_slice.cpp @@ -3,17 +3,15 @@ // #include -#include - -#include "itt.hpp" -#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" - -#include #include #include #include #include +#include +#include +#include "itt.hpp" +#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::StridedSliceOptimization, "StridedSliceOptimization", 0); @@ -22,7 +20,7 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::UselessStridedSliceEraser, "UselessStridedS bool ngraph::pass::UselessStridedSliceEraser::run_on_model(const std::shared_ptr& f) { RUN_ON_FUNCTION_SCOPE(UselessStridedSliceEraser); bool rewritten = false; - for (auto & node : f->get_ordered_ops()) { + for (auto& node : f->get_ordered_ops()) { // Recursively apply transformation for sub-graph based operations if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { if (auto sub_graph = sub_graph_node->get_function()) { @@ -35,10 +33,13 @@ bool ngraph::pass::UselessStridedSliceEraser::run_on_model(const std::shared_ptr if (ss->input(0).get_shape() != ss->output(0).get_shape()) continue; - auto stridesNode = std::dynamic_pointer_cast(ss->input_value(3).get_node_shared_ptr()); + auto stridesNode = + std::dynamic_pointer_cast(ss->input_value(3).get_node_shared_ptr()); if (stridesNode) { auto strides = stridesNode->cast_vector(); - if (!std::any_of(strides.begin(), strides.end(), [](int64_t strd) { return strd < 0;})) + if (!std::any_of(strides.begin(), strides.end(), [](int64_t strd) { + return strd < 0; + })) rewritten |= replace_output_update_name(ss->output(0), ss->input_value(0)); } } @@ -93,7 +94,7 @@ bool strided_slices_perform_the_same(std::shared_ptr, std::vector>> source_to_ss; - for (const auto & node : f->get_ordered_ops()) { + for (const auto& node : f->get_ordered_ops()) { // Recursively apply transformation for sub-graph based operations if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { if (auto sub_graph = sub_graph_node->get_function()) { @@ -119,7 +120,8 @@ bool ngraph::pass::SharedStridedSliceEraser::run_on_model(const std::shared_ptr< continue; auto root_ss = pair.second[0]; for (auto& child_ss : pair.second) { - if (root_ss->get_instance_id() != child_ss->get_instance_id() && strided_slices_perform_the_same(root_ss, child_ss)) { + if (root_ss->get_instance_id() != child_ss->get_instance_id() && + strided_slices_perform_the_same(root_ss, child_ss)) { graph_rewritten |= replace_output_update_name(child_ss->output(0), root_ss->output(0)); } } @@ -135,7 +137,7 @@ bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_model(const std::shared_ using planned_slice = std::pair, ngraph::SlicePlan>; std::map, std::vector> source_to_ss_with_plan; - for (const auto & node : f->get_ordered_ops()) { + for (const auto& node : f->get_ordered_ops()) { // Recursively apply transformation for sub-graph based operations if (auto sub_graph_node = std::dynamic_pointer_cast(node)) { if (auto sub_graph = sub_graph_node->get_function()) { @@ -157,14 +159,15 @@ bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_model(const std::shared_ bool valid_for_replacement = true; auto root_plan = pair.second[0].second; - for (const auto & ss_plan : pair.second) { + for (const auto& ss_plan : pair.second) { valid_for_replacement &= (ss_plan.second.begins.size() == root_plan.begins.size()); - valid_for_replacement &= (ss_plan.first->get_ellipsis_mask().empty() && - ss_plan.first->get_new_axis_mask().empty() && - ss_plan.first->get_shrink_axis_mask().empty()); + valid_for_replacement &= + (ss_plan.first->get_ellipsis_mask().empty() && ss_plan.first->get_new_axis_mask().empty() && + ss_plan.first->get_shrink_axis_mask().empty()); } - if (!valid_for_replacement) continue; + if (!valid_for_replacement) + continue; auto input_shape = pair.first.get_shape(); auto axis = -1; @@ -177,7 +180,7 @@ bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_model(const std::shared_ std::vector output_to_partition; for (size_t i = 0; i < input_shape.size(); ++i) { - for (const auto & ss_plan : pair.second) { + for (const auto& ss_plan : pair.second) { if (ss_plan.second.begins[i] != 0 || ss_plan.second.ends[i] != static_cast(input_shape[i])) { if (axis == -1 || axis == static_cast(i)) axis = static_cast(i); @@ -192,31 +195,38 @@ bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_model(const std::shared_ break; } } - output_to_partition.push_back({ss_plan.first->output(0), ss_plan.second.begins[i], ss_plan.second.ends[i]}); + output_to_partition.push_back( + {ss_plan.first->output(0), ss_plan.second.begins[i], ss_plan.second.ends[i]}); } - if (!valid_for_replacement) break; + if (!valid_for_replacement) + break; } - if (!valid_for_replacement) break; + if (!valid_for_replacement) + break; } - if (!valid_for_replacement || output_to_partition.size() < 2 || axis == -1) continue; + if (!valid_for_replacement || output_to_partition.size() < 2 || axis == -1) + continue; - std::sort(output_to_partition.begin(), output_to_partition.end(), - [](OutputToPatrition lhs, OutputToPatrition rhs) - {return lhs.begin < rhs.begin;}); + std::sort(output_to_partition.begin(), + output_to_partition.end(), + [](OutputToPatrition lhs, OutputToPatrition rhs) { + return lhs.begin < rhs.begin; + }); std::vector, uint64_t>> output_to_size; int64_t prev_r = 0; - for (auto & record : output_to_partition) { + for (auto& record : output_to_partition) { valid_for_replacement &= (record.begin >= prev_r); prev_r = record.end; } valid_for_replacement &= (static_cast(prev_r) <= input_shape[axis]); - if (!valid_for_replacement) continue; + if (!valid_for_replacement) + continue; prev_r = 0; Output fake_output; - for (auto & record : output_to_partition) { + for (auto& record : output_to_partition) { if (record.begin > prev_r) output_to_size.emplace_back(fake_output, record.begin - prev_r); prev_r = record.end; @@ -229,14 +239,16 @@ bool ngraph::pass::GroupedStridedSliceOptimizer::run_on_model(const std::shared_ auto axis_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{}, {axis}); std::vector size_splits; - for (const auto & item : output_to_size) + for (const auto& item : output_to_size) size_splits.push_back(item.second); - auto size_splits_const = ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{size_splits.size()}, size_splits); - auto variadic_split = std::make_shared(pair.first, axis_const, size_splits_const); + auto size_splits_const = + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{size_splits.size()}, size_splits); + auto variadic_split = + std::make_shared(pair.first, axis_const, size_splits_const); auto i = 0; NodeVector ops_to_replace; - for (auto & record : output_to_size) { + for (auto& record : output_to_size) { if (record.first != fake_output) { record.first.replace(variadic_split->output(i)); ops_to_replace.push_back(record.first.get_node_shared_ptr()); diff --git a/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp index 0030e9cc6bf..e1cde82025e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/pad_fusion.cpp @@ -3,23 +3,24 @@ // #include "transformations/common_optimizations/pad_fusion.hpp" -#include "transformations/utils/utils.hpp" -#include "itt.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" +#include "transformations/utils/utils.hpp" using namespace ngraph; NGRAPH_RTTI_DEFINITION(pass::PadFusion, "PadFusion", 0); template -static bool can_be_fused(const std::shared_ptr& pad, const std::shared_ptr& node, +static bool can_be_fused(const std::shared_ptr& pad, + const std::shared_ptr& node, const std::shared_ptr& pad_value_node, const std::shared_ptr& pads_begin, const std::shared_ptr& pads_end) { @@ -63,12 +64,20 @@ static std::tuple new_pooling_pad_values(const std::shared_ptrcast_vector(); auto pads_end_val = pads_end->cast_vector(); - std::transform(node_pads_begin.begin(), node_pads_begin.end(), - pads_begin_val.begin() + 2, node_pads_begin.begin(), - [] (size_t a, size_t b) -> size_t { return a + b; }); - std::transform(node_pads_end.begin(), node_pads_end.end(), - pads_end_val.begin() + 2, node_pads_end.begin(), - [] (size_t a, size_t b) -> size_t { return a + b; }); + std::transform(node_pads_begin.begin(), + node_pads_begin.end(), + pads_begin_val.begin() + 2, + node_pads_begin.begin(), + [](size_t a, size_t b) -> size_t { + return a + b; + }); + std::transform(node_pads_end.begin(), + node_pads_end.end(), + pads_end_val.begin() + 2, + node_pads_end.begin(), + [](size_t a, size_t b) -> size_t { + return a + b; + }); return std::make_tuple(node_pads_begin, node_pads_end); } @@ -81,9 +90,9 @@ pass::PadFusionAvgPool::PadFusionAvgPool() { auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); auto pad_value_pattern = pattern::any_input(); - auto pad_node_pattern = pattern::wrap_type({data_pattern, pads_begin_pattern, - pads_end_pattern, pad_value_pattern}, - pattern::consumers_count(1)); + auto pad_node_pattern = + pattern::wrap_type({data_pattern, pads_begin_pattern, pads_end_pattern, pad_value_pattern}, + pattern::consumers_count(1)); auto avg_pool_pattern = pattern::wrap_type({pad_node_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { @@ -91,8 +100,10 @@ pass::PadFusionAvgPool::PadFusionAvgPool() { auto data = pattern_map[data_pattern]; auto pad = std::dynamic_pointer_cast(pattern_map[pad_node_pattern].get_node_shared_ptr()); auto pad_value = pattern_map[pad_value_pattern].get_node_shared_ptr(); - auto pads_begin = std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); - auto pads_end = std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); + auto pads_begin = + std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); + auto pads_end = + std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); auto avg_pool = std::dynamic_pointer_cast(pattern_map[avg_pool_pattern].get_node_shared_ptr()); if (!can_be_fused(pad, avg_pool, pad_value, pads_begin, pads_end)) return false; @@ -101,24 +112,34 @@ pass::PadFusionAvgPool::PadFusionAvgPool() { if (avg_pool->get_exclude_pad()) { const auto& avg_pads_begin = avg_pool->get_pads_begin(); const auto& avg_pads_end = avg_pool->get_pads_end(); - bool avg_pads_begin_are_zeros = std::all_of(avg_pads_begin.begin(), avg_pads_begin.end(), [] (size_t p) -> bool { return p == 0; }); - bool avg_pads_end_are_zeros = std::all_of(avg_pads_end.begin(), avg_pads_end.end(), [] (size_t p) -> bool { return p == 0; }); + bool avg_pads_begin_are_zeros = + std::all_of(avg_pads_begin.begin(), avg_pads_begin.end(), [](size_t p) -> bool { + return p == 0; + }); + bool avg_pads_end_are_zeros = std::all_of(avg_pads_end.begin(), avg_pads_end.end(), [](size_t p) -> bool { + return p == 0; + }); if (!avg_pads_begin_are_zeros || !avg_pads_end_are_zeros) return false; auto pads_begin_val = pads_begin->cast_vector(); auto pads_end_val = pads_end->cast_vector(); - new_avg_pool = std::make_shared(data, avg_pool->get_strides(), + new_avg_pool = std::make_shared(data, + avg_pool->get_strides(), Shape{pads_begin_val.begin() + 2, pads_begin_val.end()}, Shape{pads_end_val.begin() + 2, pads_end_val.end()}, - avg_pool->get_kernel(), false, + avg_pool->get_kernel(), + false, avg_pool->get_rounding_type(), op::PadType::EXPLICIT); } else { Shape new_pads_begin, new_pads_end; std::tie(new_pads_begin, new_pads_end) = new_pooling_pad_values(pads_begin, pads_end, avg_pool); - new_avg_pool = std::make_shared(data, avg_pool->get_strides(), - new_pads_begin, new_pads_end, - avg_pool->get_kernel(), false, + new_avg_pool = std::make_shared(data, + avg_pool->get_strides(), + new_pads_begin, + new_pads_end, + avg_pool->get_kernel(), + false, avg_pool->get_rounding_type(), op::PadType::EXPLICIT); } @@ -135,20 +156,29 @@ pass::PadFusionAvgPool::PadFusionAvgPool() { } template -static std::tuple new_conv_pad_values(const std::shared_ptr& pads_begin, - const std::shared_ptr& pads_end, - const std::shared_ptr& node) { +static std::tuple new_conv_pad_values( + const std::shared_ptr& pads_begin, + const std::shared_ptr& pads_end, + const std::shared_ptr& node) { auto node_pads_begin = node->get_pads_begin(); auto node_pads_end = node->get_pads_end(); auto pads_begin_val = pads_begin->cast_vector(); auto pads_end_val = pads_end->cast_vector(); - std::transform(node_pads_begin.begin(), node_pads_begin.end(), - pads_begin_val.begin() + 2, node_pads_begin.begin(), - [] (std::ptrdiff_t a, size_t b) -> std::ptrdiff_t { return a + b; }); - std::transform(node_pads_end.begin(), node_pads_end.end(), - pads_end_val.begin() + 2, node_pads_end.begin(), - [] (std::ptrdiff_t a, size_t b) -> std::ptrdiff_t { return a + b; }); + std::transform(node_pads_begin.begin(), + node_pads_begin.end(), + pads_begin_val.begin() + 2, + node_pads_begin.begin(), + [](std::ptrdiff_t a, size_t b) -> std::ptrdiff_t { + return a + b; + }); + std::transform(node_pads_end.begin(), + node_pads_end.end(), + pads_end_val.begin() + 2, + node_pads_end.begin(), + [](std::ptrdiff_t a, size_t b) -> std::ptrdiff_t { + return a + b; + }); return std::make_tuple(node_pads_begin, node_pads_end); } @@ -162,9 +192,9 @@ pass::PadFusionConvolution::PadFusionConvolution() { auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); auto pad_value_pattern = pattern::any_input(); - auto pad_node_pattern = pattern::wrap_type({data_pattern, pads_begin_pattern, - pads_end_pattern, pad_value_pattern}, - pattern::consumers_count(1)); + auto pad_node_pattern = + pattern::wrap_type({data_pattern, pads_begin_pattern, pads_end_pattern, pad_value_pattern}, + pattern::consumers_count(1)); auto conv_pattern = pattern::wrap_type({pad_node_pattern, filter_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { @@ -173,17 +203,23 @@ pass::PadFusionConvolution::PadFusionConvolution() { auto filter = pattern_map[filter_pattern]; auto pad = std::dynamic_pointer_cast(pattern_map[pad_node_pattern].get_node_shared_ptr()); auto pad_value = pattern_map[pad_value_pattern].get_node_shared_ptr(); - auto pads_begin = std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); - auto pads_end = std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); + auto pads_begin = + std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); + auto pads_end = + std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); auto conv = std::dynamic_pointer_cast(pattern_map[conv_pattern].get_node_shared_ptr()); if (!can_be_fused(pad, conv, pad_value, pads_begin, pads_end)) return false; CoordinateDiff new_pads_begin, new_pads_end; std::tie(new_pads_begin, new_pads_end) = new_conv_pad_values(pads_begin, pads_end, conv); - auto new_conv = std::make_shared(data, filter, conv->get_strides(), - new_pads_begin, new_pads_end, - conv->get_dilations(), op::PadType::EXPLICIT); + auto new_conv = std::make_shared(data, + filter, + conv->get_strides(), + new_pads_begin, + new_pads_end, + conv->get_dilations(), + op::PadType::EXPLICIT); new_conv->set_friendly_name(conv->get_friendly_name()); copy_runtime_info({pad, conv}, new_conv); @@ -205,9 +241,9 @@ pass::PadFusionConvolutionBackpropData::PadFusionConvolutionBackpropData() { auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); auto pad_value_pattern = pattern::any_input(); - auto pad_node_pattern = pattern::wrap_type({data_pattern, pads_begin_pattern, - pads_end_pattern, pad_value_pattern}, - pattern::consumers_count(1)); + auto pad_node_pattern = + pattern::wrap_type({data_pattern, pads_begin_pattern, pads_end_pattern, pad_value_pattern}, + pattern::consumers_count(1)); auto conv_pattern = pattern::wrap_type({pad_node_pattern, filter_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { @@ -216,9 +252,12 @@ pass::PadFusionConvolutionBackpropData::PadFusionConvolutionBackpropData() { auto filter = pattern_map[filter_pattern]; auto pad = std::dynamic_pointer_cast(pattern_map[pad_node_pattern].get_node_shared_ptr()); auto pad_value = pattern_map[pad_value_pattern].get_node_shared_ptr(); - auto pads_begin = std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); - auto pads_end = std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); - auto conv = std::dynamic_pointer_cast(pattern_map[conv_pattern].get_node_shared_ptr()); + auto pads_begin = + std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); + auto pads_end = + std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); + auto conv = + std::dynamic_pointer_cast(pattern_map[conv_pattern].get_node_shared_ptr()); if (!can_be_fused(pad, conv, pad_value, pads_begin, pads_end)) return false; @@ -227,16 +266,19 @@ pass::PadFusionConvolutionBackpropData::PadFusionConvolutionBackpropData() { auto pads_begin_val = pads_begin->cast_vector(); auto pads_end_val = pads_end->cast_vector(); for (size_t i = 0; i < conv_pads_begin.size(); i++) { - if (conv_pads_begin[i] < pads_begin_val[i + 2] || - conv_pads_end[i] < pads_end_val[i + 2]) + if (conv_pads_begin[i] < pads_begin_val[i + 2] || conv_pads_end[i] < pads_end_val[i + 2]) return false; conv_pads_begin[i] -= pads_begin_val[i + 2]; conv_pads_end[i] -= pads_end_val[i + 2]; } - auto new_conv = std::make_shared(data, filter, conv->get_strides(), - conv_pads_begin, conv_pads_end, - conv->get_dilations(), op::PadType::EXPLICIT, + auto new_conv = std::make_shared(data, + filter, + conv->get_strides(), + conv_pads_begin, + conv_pads_end, + conv->get_dilations(), + op::PadType::EXPLICIT, conv->get_output_padding()); new_conv->set_friendly_name(conv->get_friendly_name()); @@ -259,9 +301,9 @@ pass::PadFusionGroupConvolution::PadFusionGroupConvolution() { auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); auto pad_value_pattern = pattern::any_input(); - auto pad_node_pattern = pattern::wrap_type({data_pattern, pads_begin_pattern, - pads_end_pattern, pad_value_pattern}, - pattern::consumers_count(1)); + auto pad_node_pattern = + pattern::wrap_type({data_pattern, pads_begin_pattern, pads_end_pattern, pad_value_pattern}, + pattern::consumers_count(1)); auto conv_pattern = pattern::wrap_type({pad_node_pattern, filter_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { @@ -270,17 +312,24 @@ pass::PadFusionGroupConvolution::PadFusionGroupConvolution() { auto filter = pattern_map[filter_pattern]; auto pad = std::dynamic_pointer_cast(pattern_map[pad_node_pattern].get_node_shared_ptr()); auto pad_value = pattern_map[pad_value_pattern].get_node_shared_ptr(); - auto pads_begin = std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); - auto pads_end = std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); - auto conv = std::dynamic_pointer_cast(pattern_map[conv_pattern].get_node_shared_ptr()); + auto pads_begin = + std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); + auto pads_end = + std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); + auto conv = + std::dynamic_pointer_cast(pattern_map[conv_pattern].get_node_shared_ptr()); if (!can_be_fused(pad, conv, pad_value, pads_begin, pads_end)) return false; CoordinateDiff new_pads_begin, new_pads_end; std::tie(new_pads_begin, new_pads_end) = new_conv_pad_values(pads_begin, pads_end, conv); - auto new_conv = std::make_shared(data, filter, conv->get_strides(), - new_pads_begin, new_pads_end, - conv->get_dilations(), op::PadType::EXPLICIT); + auto new_conv = std::make_shared(data, + filter, + conv->get_strides(), + new_pads_begin, + new_pads_end, + conv->get_dilations(), + op::PadType::EXPLICIT); new_conv->set_friendly_name(conv->get_friendly_name()); copy_runtime_info({pad, conv}, new_conv); @@ -302,9 +351,9 @@ pass::PadFusionGroupConvolutionBackpropData::PadFusionGroupConvolutionBackpropDa auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); auto pad_value_pattern = pattern::any_input(); - auto pad_node_pattern = pattern::wrap_type({data_pattern, pads_begin_pattern, - pads_end_pattern, pad_value_pattern}, - pattern::consumers_count(1)); + auto pad_node_pattern = + pattern::wrap_type({data_pattern, pads_begin_pattern, pads_end_pattern, pad_value_pattern}, + pattern::consumers_count(1)); auto conv_pattern = pattern::wrap_type({pad_node_pattern, filter_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { @@ -313,9 +362,12 @@ pass::PadFusionGroupConvolutionBackpropData::PadFusionGroupConvolutionBackpropDa auto filter = pattern_map[filter_pattern]; auto pad = std::dynamic_pointer_cast(pattern_map[pad_node_pattern].get_node_shared_ptr()); auto pad_value = pattern_map[pad_value_pattern].get_node_shared_ptr(); - auto pads_begin = std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); - auto pads_end = std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); - auto conv = std::dynamic_pointer_cast(pattern_map[conv_pattern].get_node_shared_ptr()); + auto pads_begin = + std::dynamic_pointer_cast(pattern_map[pads_begin_pattern].get_node_shared_ptr()); + auto pads_end = + std::dynamic_pointer_cast(pattern_map[pads_end_pattern].get_node_shared_ptr()); + auto conv = std::dynamic_pointer_cast( + pattern_map[conv_pattern].get_node_shared_ptr()); if (!can_be_fused(pad, conv, pad_value, pads_begin, pads_end)) return false; @@ -324,17 +376,20 @@ pass::PadFusionGroupConvolutionBackpropData::PadFusionGroupConvolutionBackpropDa auto pads_begin_val = pads_begin->cast_vector(); auto pads_end_val = pads_end->cast_vector(); for (size_t i = 0; i < conv_pads_begin.size(); i++) { - if (conv_pads_begin[i] < pads_begin_val[i + 2] || - conv_pads_end[i] < pads_end_val[i + 2]) + if (conv_pads_begin[i] < pads_begin_val[i + 2] || conv_pads_end[i] < pads_end_val[i + 2]) return false; conv_pads_begin[i] -= pads_begin_val[i + 2]; conv_pads_end[i] -= pads_end_val[i + 2]; } - auto new_conv = std::make_shared(data, filter, conv->get_strides(), - conv_pads_begin, conv_pads_end, - conv->get_dilations(), op::PadType::EXPLICIT, - conv->get_output_padding()); + auto new_conv = std::make_shared(data, + filter, + conv->get_strides(), + conv_pads_begin, + conv_pads_end, + conv->get_dilations(), + op::PadType::EXPLICIT, + conv->get_output_padding()); new_conv->set_friendly_name(conv->get_friendly_name()); copy_runtime_info({pad, conv}, new_conv); diff --git a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp index 4c6298e3012..e3609ad5dbb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/pull_transpose_through_fq.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/pull_transpose_through_fq.hpp" #include -#include - #include -#include #include +#include #include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::PullTransposeThroughFQUp, "PullTransposeThroughFQUp", 0); @@ -24,21 +24,21 @@ ngraph::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() { pattern::any_input(pattern::has_static_shape()), pattern::any_input(pattern::has_static_shape()), pattern::any_input(pattern::has_static_shape())}, - pattern::consumers_count(1)); + pattern::consumers_count(1)); auto m_transpose_perm = pattern::wrap_type(); auto m_transpose = pattern::wrap_type({m_fq, m_transpose_perm}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { - auto & pattern_map = m.get_pattern_value_map(); + auto& pattern_map = m.get_pattern_value_map(); auto transpose = pattern_map[m_transpose].get_node_shared_ptr(); auto fq = pattern_map[m_fq].get_node_shared_ptr(); - auto are_inputs_scalars = shape_size(fq->input_value(1).get_shape()) == 1 && - shape_size(fq->input_value(2).get_shape()) == 1 && - shape_size(fq->input_value(3).get_shape()) == 1 && - shape_size(fq->input_value(4).get_shape()) == 1; + auto are_inputs_scalars = + shape_size(fq->input_value(1).get_shape()) == 1 && shape_size(fq->input_value(2).get_shape()) == 1 && + shape_size(fq->input_value(3).get_shape()) == 1 && shape_size(fq->input_value(4).get_shape()) == 1; if (!are_inputs_scalars) { - auto perm = std::dynamic_pointer_cast(pattern_map[m_transpose_perm].get_node_shared_ptr()); + auto perm = + std::dynamic_pointer_cast(pattern_map[m_transpose_perm].get_node_shared_ptr()); if (!perm) return false; auto perm_val = perm->cast_vector(); @@ -58,8 +58,9 @@ ngraph::pass::PullTransposeThroughFQUp::PullTransposeThroughFQUp() { unsqueeze_axes.push_back(j); } if (!unsqueeze_axes.empty()) { - fq_input = std::make_shared(fq_input, - opset1::Constant::create(element::i64, Shape{unsqueeze_axes.size()}, unsqueeze_axes)); + fq_input = std::make_shared( + fq_input, + opset1::Constant::create(element::i64, Shape{unsqueeze_axes.size()}, unsqueeze_axes)); new_ops.push_back(fq_input.get_node_shared_ptr()); } fq_input = std::make_shared(fq_input, transpose->input_value(1)); diff --git a/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp index 2e5017f7cc4..9cc68dbcaf7 100644 --- a/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/random_uniform_fusion.cpp @@ -5,11 +5,11 @@ #include "transformations/common_optimizations/random_uniform_fusion.hpp" #include +#include #include #include #include #include -#include #include "itt.hpp" @@ -64,9 +64,8 @@ ngraph::pass::RandomUniformFusion::RandomUniformFusion() { const auto& folded_const1 = ngraph::get_constant_from_source(new_mul_add1); const auto& folded_const2 = ngraph::get_constant_from_source(new_mul_add2); - const auto new_ru = ru->clone_with_new_inputs({data, - folded_const1 ? folded_const1 : new_mul_add1, - folded_const2 ? folded_const2 : new_mul_add2}); + const auto new_ru = ru->clone_with_new_inputs( + {data, folded_const1 ? folded_const1 : new_mul_add1, folded_const2 ? folded_const2 : new_mul_add2}); if (pattern_map.count(convert_pattern)) { const auto& convert = pattern_map.at(convert_pattern); @@ -76,7 +75,8 @@ ngraph::pass::RandomUniformFusion::RandomUniformFusion() { if (!cvt->get_element_type().is_real()) return false; const auto new_ru_conv = cvt->clone_with_new_inputs({new_ru}); - copy_runtime_info({ru, cvt, mul_add.get_node_shared_ptr()}, {new_mul_add1, new_mul_add2, new_ru, new_ru_conv}); + copy_runtime_info({ru, cvt, mul_add.get_node_shared_ptr()}, + {new_mul_add1, new_mul_add2, new_ru, new_ru_conv}); new_ru_conv->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::replace_node(m.get_match_root(), new_ru_conv); } else { diff --git a/src/common/transformations/src/transformations/common_optimizations/relu_fake_quantize_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/relu_fake_quantize_fusion.cpp index 96600f8e649..4d05cbee47b 100644 --- a/src/common/transformations/src/transformations/common_optimizations/relu_fake_quantize_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/relu_fake_quantize_fusion.cpp @@ -3,16 +3,15 @@ // #include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" -#include "transformations/utils/utils.hpp" -#include "itt.hpp" #include +#include +#include +#include #include -#include -#include -#include - +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ReluFakeQuantizeFusion, "ReluFakeQuantizeFusion", 0); @@ -21,7 +20,8 @@ ngraph::pass::ReluFakeQuantizeFusion::ReluFakeQuantizeFusion() { auto data_pattern = ngraph::pattern::any_input(); auto relu_pattern = ngraph::pattern::wrap_type({data_pattern}, pattern::consumers_count(1)); auto input_low_pattern = ngraph::pattern::wrap_type(); - auto fq_pattern = ngraph::pattern::wrap_type({relu_pattern, input_low_pattern, + auto fq_pattern = ngraph::pattern::wrap_type({relu_pattern, + input_low_pattern, ngraph::pattern::any_input(), ngraph::pattern::any_input(), ngraph::pattern::any_input()}); @@ -35,7 +35,9 @@ ngraph::pass::ReluFakeQuantizeFusion::ReluFakeQuantizeFusion() { if (!input_low_const) return false; auto input_low_values = input_low_const->cast_vector(); - if (std::any_of(input_low_values.begin(), input_low_values.end(), [] (float f) -> bool { return f < 0; })) + if (std::any_of(input_low_values.begin(), input_low_values.end(), [](float f) -> bool { + return f < 0; + })) return false; auto fq = std::dynamic_pointer_cast(pattern_map[fq_pattern].get_node_shared_ptr()); if (!fq) diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_concat_zero_dim_input.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_concat_zero_dim_input.cpp index d7faaaf2a28..9ced2a787ce 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_concat_zero_dim_input.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_concat_zero_dim_input.cpp @@ -3,17 +3,16 @@ // #include "transformations/common_optimizations/remove_concat_zero_dim_input.hpp" -#include "transformations/utils/utils.hpp" -#include -#include #include - -#include -#include "openvino/pass/pattern/op/wrap_type.hpp" +#include #include -#include "itt.hpp" +#include +#include +#include "itt.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ov::pass::RemoveConcatZeroDimInput, "RemoveConcatZeroDimInput", 0); @@ -23,18 +22,23 @@ ov::pass::RemoveConcatZeroDimInput::RemoveConcatZeroDimInput() { ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto concat = m.get_match_root(); auto concat_inputs = concat->input_values(); - concat_inputs.erase(std::remove_if(concat_inputs.begin(), concat_inputs.end(), - [](const Output& input) { - const auto& in_shape = input.get_partial_shape(); - if (in_shape.rank().is_static()) { - return std::any_of(std::begin(in_shape), std::end(in_shape), [](const ov::Dimension& dim) { - if (dim.is_static() && dim.get_length() == 0) { - return true; - } - return false; - });} + concat_inputs.erase( + std::remove_if( + concat_inputs.begin(), + concat_inputs.end(), + [](const Output& input) { + const auto& in_shape = input.get_partial_shape(); + if (in_shape.rank().is_static()) { + return std::any_of(std::begin(in_shape), std::end(in_shape), [](const ov::Dimension& dim) { + if (dim.is_static() && dim.get_length() == 0) { + return true; + } + return false; + }); + } return false; - }), concat_inputs.end()); + }), + concat_inputs.end()); bool inputs_removed = concat->get_input_size() > concat_inputs.size(); if (inputs_removed) { diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_filtering_boxes_by_size.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_filtering_boxes_by_size.cpp index 6c1fc703056..00b308b6095 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_filtering_boxes_by_size.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_filtering_boxes_by_size.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" +#include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp" + #include +#include +#include +#include #include -#include -#include -#include - -#include "transformations/common_optimizations/remove_filtering_boxes_by_size.hpp" +#include "itt.hpp" #include "transformations/common_optimizations/subtract_fusion.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::FuseFilteringBoxesBySize, "FuseFilteringBoxesBySize", 0); @@ -25,39 +25,41 @@ ngraph::pass::RemoveFilteringBoxesBySize::RemoveFilteringBoxesBySize() { MATCHER_SCOPE(RemoveFilteringBoxesBySize); // variadic split auto data = std::make_shared(element::f32, Shape{1000, 4}); - auto sizes = opset3::Constant::create(element::i64, Shape{4}, std::vector({1, 1, 1, 1})); - auto axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); + auto sizes = opset3::Constant::create(element::i64, Shape{4}, std::vector({1, 1, 1, 1})); + auto axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); auto split = std::make_shared(data, axis, sizes); // sub -> add - auto sub_2_0 = std::make_shared(split->output(2), split->output(0)); + auto sub_2_0 = std::make_shared(split->output(2), split->output(0)); auto term_1 = std::make_shared(element::f32, Shape{1}); auto add_1 = std::make_shared(sub_2_0, term_1); - auto sub_3_1 = std::make_shared(split->output(3), split->output(1)); + auto sub_3_1 = std::make_shared(split->output(3), split->output(1)); auto term_2 = std::make_shared(element::f32, Shape{1}); auto add_2 = std::make_shared(sub_3_1, term_2); // concat - auto concat = std::make_shared(ngraph::OutputVector({split->output(0), split->output(1), add_1->output(0), add_2->output(0)}), 1); + auto concat = std::make_shared( + ngraph::OutputVector({split->output(0), split->output(1), add_1->output(0), add_2->output(0)}), + 1); // second variadic split - auto sizes_1 = opset3::Constant::create(element::i64, Shape{4}, std::vector({1, 1, 1, 1})); - auto axis_1 = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); + auto sizes_1 = opset3::Constant::create(element::i64, Shape{4}, std::vector({1, 1, 1, 1})); + auto axis_1 = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); auto split_1 = std::make_shared(concat, axis_1, sizes_1); // squeeze - auto squeeze_1_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); + auto squeeze_1_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); auto squeeze_1 = std::make_shared(split_1->output(2), squeeze_1_axis); - auto squeeze_2_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); + auto squeeze_2_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); auto squeeze_2 = std::make_shared(split_1->output(3), squeeze_2_axis); // less - auto less_1_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector({0})); + auto less_1_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector({0})); auto less_1 = std::make_shared(squeeze_1, less_1_constant); - auto less_2_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector({0})); + auto less_2_constant = opset3::Constant::create(element::f32, Shape{1}, std::vector({0})); auto less_2 = std::make_shared(squeeze_2, less_2_constant); // Logical Not @@ -81,27 +83,27 @@ ngraph::pass::RemoveFilteringBoxesBySize::RemoveFilteringBoxesBySize() { // nonzero auto non_zero = std::make_shared(cast_32); - auto order = opset3::Constant::create(element::i64, Shape{2}, std::vector({1, 0})); + auto order = opset3::Constant::create(element::i64, Shape{2}, std::vector({1, 0})); auto transpose = std::make_shared(non_zero, order); - auto squeeze_3_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); + auto squeeze_3_axis = opset3::Constant::create(element::i64, Shape{1}, std::vector({1})); auto squeeze_3 = std::make_shared(transpose, squeeze_3_axis); auto cast = std::make_shared(squeeze_3, ngraph::element::i64); ngraph::matcher_pass_callback callback = [data](pattern::Matcher& m) { - auto start = opset3::Constant::create(element::i64, Shape{}, std::vector({0})); - auto step = opset3::Constant::create(element::i64, Shape{}, std::vector({1})); + auto start = opset3::Constant::create(element::i64, Shape{}, std::vector({0})); + auto step = opset3::Constant::create(element::i64, Shape{}, std::vector({1})); - const auto & pattern_map = m.get_pattern_map(); + const auto& pattern_map = m.get_pattern_map(); auto input = pattern_map.at(data); auto output = m.get_match_root(); auto input_shape = std::make_shared(input); - auto axis = opset3::Constant::create(element::i64, Shape{}, std::vector({0})); - auto index = opset3::Constant::create(element::i64, Shape{}, std::vector({0})); + auto axis = opset3::Constant::create(element::i64, Shape{}, std::vector({0})); + auto index = opset3::Constant::create(element::i64, Shape{}, std::vector({0})); auto stop = std::make_shared(input_shape, index, axis); auto range = std::make_shared(start, stop, step); diff --git a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp index 08c4ea78092..4fa80f62761 100644 --- a/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.cpp @@ -3,17 +3,16 @@ // #include "transformations/common_optimizations/remove_multi_subgraph_op_dangling_params.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include #include -#include -#include -#include "openvino/pass/pattern/op/wrap_type.hpp" -#include #include "itt.hpp" - +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ov::pass::RemoveMultiSubGraphOpDanglingParams, "RemoveMultiSubGraphOpDanglingParams", 0); @@ -31,7 +30,7 @@ ov::pass::RemoveMultiSubGraphOpDanglingParams::RemoveMultiSubGraphOpDanglingPara std::vector> to_remove_descriptors_indexes; const auto subgraphs_size = multi_subgraph_op->get_internal_subgraphs_size(); to_remove_descriptors_indexes.resize(subgraphs_size); - for (size_t body_idx=0; body_idx < subgraphs_size; ++body_idx) { + for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& body_func = multi_subgraph_op->get_function(body_idx); auto& body_params = body_func->get_parameters(); auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(body_idx); @@ -44,46 +43,52 @@ ov::pass::RemoveMultiSubGraphOpDanglingParams::RemoveMultiSubGraphOpDanglingPara } else { // collecting required inputs is needed to detect cases where the input // is not needed in a one body, but the other one uses it (for example If case) - required_inputs.insert(op_inputs[body_in_descriptors[i]->m_input_index]); // only unique + required_inputs.insert(op_inputs[body_in_descriptors[i]->m_input_index]); // only unique } } } if (pass_required) { using DescType = op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector; - auto update_body_param_desc = [](DescType& descriptors, uint64_t removed_body_idx){ + auto update_body_param_desc = [](DescType& descriptors, uint64_t removed_body_idx) { for (auto& desc : descriptors) { if (desc->m_body_parameter_index > removed_body_idx) { desc->m_body_parameter_index--; } - }}; - auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr& op, uint64_t removed_loop_idx){ - for (size_t body_idx=0; body_idx < subgraphs_size; ++body_idx) { + } + }; + auto update_op_inputs_desc = [&subgraphs_size](const std::shared_ptr& op, + uint64_t removed_loop_idx) { + for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& descriptors = op->get_input_descriptions(body_idx); for (auto& desc : descriptors) { if (desc->m_input_index > removed_loop_idx) { desc->m_input_index--; } } - }}; + } + }; // Remove dangling body params and input and update input descriptors - for (size_t body_idx=0; body_idx < subgraphs_size; ++body_idx) { + for (size_t body_idx = 0; body_idx < subgraphs_size; ++body_idx) { auto& body_in_descriptors = multi_subgraph_op->get_input_descriptions(body_idx); auto& body_func = multi_subgraph_op->get_function(body_idx); auto& body_params = body_func->get_parameters(); op::util::MultiSubGraphOp::MultiSubgraphInputDescriptionVector updated_body_in_descriptors; for (size_t desc_idx = 0; desc_idx < body_in_descriptors.size(); ++desc_idx) { - if (std::count(std::begin(to_remove_descriptors_indexes[body_idx]), std::end(to_remove_descriptors_indexes[body_idx]), desc_idx) > 0) { + if (std::count(std::begin(to_remove_descriptors_indexes[body_idx]), + std::end(to_remove_descriptors_indexes[body_idx]), + desc_idx) > 0) { auto& body_param = body_params[body_in_descriptors[desc_idx]->m_body_parameter_index]; body_func->remove_parameter(body_param); // Move all body indexes which are after these indicated by to_remove_descriptors_indexes - update_body_param_desc(body_in_descriptors, body_in_descriptors[desc_idx]->m_body_parameter_index); + update_body_param_desc(body_in_descriptors, + body_in_descriptors[desc_idx]->m_body_parameter_index); // remove dangling input of MultiSubGraphOp which was not removed earlier auto& current_input = op_inputs[body_in_descriptors[desc_idx]->m_input_index]; - if (std::count(std::begin(required_inputs), std::end(required_inputs), current_input) == 0 - && std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) { + if (std::count(std::begin(required_inputs), std::end(required_inputs), current_input) == 0 && + std::count(std::begin(op_inputs), std::end(op_inputs), current_input) > 0) { op_inputs.erase(std::next(op_inputs.begin(), body_in_descriptors[desc_idx]->m_input_index)); - // Move all input indexes (in all bodies) which are after these indicated by to_remove_descriptors_indexes - // and are not used in any body + // Move all input indexes (in all bodies) which are after these indicated by + // to_remove_descriptors_indexes and are not used in any body update_op_inputs_desc(multi_subgraph_op, body_in_descriptors[desc_idx]->m_input_index); } } else { diff --git a/src/common/transformations/src/transformations/common_optimizations/reshape_sequence_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/reshape_sequence_fusion.cpp index 847d0a196d4..f3ca206f315 100644 --- a/src/common/transformations/src/transformations/common_optimizations/reshape_sequence_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/reshape_sequence_fusion.cpp @@ -3,17 +3,16 @@ // #include "transformations/common_optimizations/reshape_sequence_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ReshapeSequenceFusion, "ReshapeSequenceFusion", 0); @@ -23,45 +22,59 @@ bool has_valid_pattern(const ov::Output& node_out) { if (!const_node) { // Lower bound of the value auto lb = ngraph::evaluate_lower_bound(node_out); - if (!lb) return false; + if (!lb) + return false; const auto lb_const_node = std::make_shared(lb); - const auto & lb_values = lb_const_node->cast_vector(); + const auto& lb_values = lb_const_node->cast_vector(); // The pattern is valid if all lower bound values are higher than zero (not a special number) // or if the lower and upper bounds values are a sign of full dynamism - const bool lb_has_special_val = std::any_of(lb_values.cbegin(), lb_values.cend(), [](int64_t value) { return value < 1;}); - if (!lb_has_special_val) return true; + const bool lb_has_special_val = std::any_of(lb_values.cbegin(), lb_values.cend(), [](int64_t value) { + return value < 1; + }); + if (!lb_has_special_val) + return true; // Upper bound of the value auto ub = ngraph::evaluate_upper_bound(node_out); - if (!ub) return false; + if (!ub) + return false; const auto ub_const_node = std::make_shared(ub); - const auto & ub_values = ub_const_node->cast_vector(); - if (lb_values.size() != ub_values.size()) return false; + const auto& ub_values = ub_const_node->cast_vector(); + if (lb_values.size() != ub_values.size()) + return false; // Check if zero values are paired with max value as a sign of full dynamism - const int64_t ub_max = node_out.get_element_type() == ov::element::i32 ? std::numeric_limits::max() : std::numeric_limits::max(); - const auto mismatch_iters = std::mismatch(lb_values.cbegin(), lb_values.cend(), ub_values.cbegin(), - [ub_max](int64_t lb_val, int64_t ub_val){ return lb_val > 0 || (lb_val == 0 && ub_val == ub_max);}); + const int64_t ub_max = node_out.get_element_type() == ov::element::i32 ? std::numeric_limits::max() + : std::numeric_limits::max(); + const auto mismatch_iters = std::mismatch(lb_values.cbegin(), + lb_values.cend(), + ub_values.cbegin(), + [ub_max](int64_t lb_val, int64_t ub_val) { + return lb_val > 0 || (lb_val == 0 && ub_val == ub_max); + }); return mismatch_iters.first == lb_values.cend(); } - const auto & values = const_node->cast_vector(); + const auto& values = const_node->cast_vector(); // We can not fuse Reshapes if their pattern values have special numbers like -1 and 0 - return std::all_of(values.cbegin(), values.cend(), [](int64_t value) { return value > 0;}); + return std::all_of(values.cbegin(), values.cend(), [](int64_t value) { + return value > 0; + }); } -} // namespace +} // namespace ngraph::pass::ReshapeSequenceFusion::ReshapeSequenceFusion(bool use_shape_for_elimination) { MATCHER_SCOPE(ReshapeSequenceFusion); auto reshape_input = pattern::any_input(); auto reshape_a_pattern = pattern::wrap_type(); - auto reshape_a = pattern::wrap_type({reshape_input, reshape_a_pattern}, pattern::consumers_count(1)); + auto reshape_a = + pattern::wrap_type({reshape_input, reshape_a_pattern}, pattern::consumers_count(1)); auto reshape_b_pattern = pattern::any_input(); auto reshape_b = pattern::wrap_type({reshape_a, reshape_b_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); + const auto& pattern_map = m.get_pattern_value_map(); auto input = pattern_map.at(reshape_input); auto reshape = m.get_match_root(); @@ -76,8 +89,7 @@ ngraph::pass::ReshapeSequenceFusion::ReshapeSequenceFusion(bool use_shape_for_el NodeVector nodes{pattern_map.at(reshape_a).get_node_shared_ptr(), reshape}; while (std::dynamic_pointer_cast(input.get_node_shared_ptr())) { auto node = input.get_node_shared_ptr(); - if (!has_valid_pattern(node->get_input_node_shared_ptr(1)) || - input.get_target_inputs().size() != 1) { + if (!has_valid_pattern(node->get_input_node_shared_ptr(1)) || input.get_target_inputs().size() != 1) { break; } nodes.push_back(node); @@ -86,8 +98,8 @@ ngraph::pass::ReshapeSequenceFusion::ReshapeSequenceFusion(bool use_shape_for_el // remove redundant reshapes bool replaced = false; - if (use_shape_for_elimination && input.get_partial_shape().is_static() && reshape->get_output_partial_shape(0).is_static() && - input.get_shape() == reshape->get_output_shape(0)) { + if (use_shape_for_elimination && input.get_partial_shape().is_static() && + reshape->get_output_partial_shape(0).is_static() && input.get_shape() == reshape->get_output_shape(0)) { // in case if elimination is not allowed we still can eliminate all transposes except last one replaced = replace_output_update_name(reshape->output(0), input); } @@ -95,7 +107,7 @@ ngraph::pass::ReshapeSequenceFusion::ReshapeSequenceFusion(bool use_shape_for_el if (!replaced) { reshape->input(0).replace_source_output(input); copy_runtime_info(nodes, reshape); - return false; // because root node wasn't replaced + return false; // because root node wasn't replaced } return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp index d25065328a2..e1eb4d5e815 100644 --- a/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/ric_fusion.cpp @@ -3,23 +3,22 @@ // #include "transformations/common_optimizations/ric_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ReverseInputChannelsFusion, "ReverseInputChannelsFusion", 0); @@ -32,10 +31,13 @@ namespace ric_attr { // In addition, attribute has some functionality and properties for propagation. class Attribute { public: - using callback_t = std::function, const Attribute &)>; + using callback_t = std::function, const Attribute&)>; Attribute(std::vector order, int64_t axis, bool is_final = false, bool is_initial = false) - : m_order(std::move(order)), m_axis(axis), m_is_final(is_final), m_is_initial(is_initial) { + : m_order(std::move(order)), + m_axis(axis), + m_is_final(is_final), + m_is_initial(is_initial) { m_can_be_fused.emplace_back(std::make_shared(true)); } @@ -48,13 +50,16 @@ public: return attr; } - void set_is_final(bool is_final) { m_is_final = is_final; } + void set_is_final(bool is_final) { + m_is_final = is_final; + } void set_can_be_fused(bool can_be_fused) { - std::for_each(m_can_be_fused.cbegin(), m_can_be_fused.cend(), - [can_be_fused](const std::shared_ptr & state) { - *state = can_be_fused; - }); + std::for_each(m_can_be_fused.cbegin(), + m_can_be_fused.cend(), + [can_be_fused](const std::shared_ptr& state) { + *state = can_be_fused; + }); } void set_callback(callback_t callback) { @@ -62,13 +67,12 @@ public: } // Apply callback to materialize RIC inside graph - void operator() (Input input) const { + void operator()(Input input) const { m_callback(input, *this); } bool can_be_fused() const { - return std::all_of(m_can_be_fused.cbegin(), m_can_be_fused.cend(), - [](const std::shared_ptr & state) { + return std::all_of(m_can_be_fused.cbegin(), m_can_be_fused.cend(), [](const std::shared_ptr& state) { return *state; }); } @@ -76,29 +80,39 @@ public: // For cases when we propagate through operation with multiple inputs like Eltwise // we have to merge RIC attrs from all inputs. To check that given attr be merged with // current we check the order and axis which must be the same. - bool can_be_merged_with(const Attribute & other) { + bool can_be_merged_with(const Attribute& other) { return (m_order.empty() || other.m_order.empty() || m_order == other.m_order) && m_axis == other.m_axis; } // When merging two and more attrs for further propagation we have to keep can_be_fused references // for cases when fusion is not possible, so we can update all related attrs. - void merge_with(const Attribute & other) { - m_can_be_fused.insert(m_can_be_fused.end(), - other.m_can_be_fused.begin(), - other.m_can_be_fused.end()); + void merge_with(const Attribute& other) { + m_can_be_fused.insert(m_can_be_fused.end(), other.m_can_be_fused.begin(), other.m_can_be_fused.end()); } - const std::vector & get_order() const { return m_order; } + const std::vector& get_order() const { + return m_order; + } - void set_order(const std::vector & order) { m_order = order; } + void set_order(const std::vector& order) { + m_order = order; + } - int64_t get_axis() const { return m_axis; } + int64_t get_axis() const { + return m_axis; + } - void set_axis(int64_t axis) { m_axis = axis; } + void set_axis(int64_t axis) { + m_axis = axis; + } - bool is_final() const { return m_is_final; } + bool is_final() const { + return m_is_final; + } - bool is_initial() const { return m_is_initial; } + bool is_initial() const { + return m_is_initial; + } private: // empty order means that the order is default and must be n, n-1, ..., 0 @@ -120,8 +134,7 @@ private: // Callback specifies the action for RIC materialization for given input port. // In most cases it should insert Gather operation for the input. - std::function, const Attribute &)> m_callback = - [](Input, const Attribute &) {}; + std::function, const Attribute&)> m_callback = [](Input, const Attribute&) {}; }; namespace { @@ -130,8 +143,8 @@ template using is_port = typename std::enable_if>::value>::type; template > -void set(T port, const Attribute & ric_attr) { - auto & attrs = port.get_rt_info(); +void set(T port, const Attribute& ric_attr) { + auto& attrs = port.get_rt_info(); attrs["reverse_input_channel_index"] = ric_attr; } @@ -141,14 +154,14 @@ void init(Output output, std::vector order, int64_t axis) { } template > -bool has(const T & port) { - const auto & attrs = port.get_rt_info(); +bool has(const T& port) { + const auto& attrs = port.get_rt_info(); return attrs.count("reverse_input_channel_index"); } template > -Attribute get(const T & port) { - const auto & attrs = port.get_rt_info(); +Attribute get(const T& port) { + const auto& attrs = port.get_rt_info(); auto res = attrs.find("reverse_input_channel_index"); if (res != attrs.end()) { return res->second.template as(); @@ -158,11 +171,11 @@ Attribute get(const T & port) { template > void erase(T port) { - auto & rt_info = port.get_rt_info(); + auto& rt_info = port.get_rt_info(); rt_info.erase("reverse_input_channel_index"); } -}// namespace -}// namespace ric_attr +} // namespace +} // namespace ric_attr namespace init { class SplitConcat : public ngraph::pass::MatcherPass { @@ -173,10 +186,11 @@ public: auto pattern_root = pattern::wrap_type({split_p, split_p, split_p}); auto callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); + const auto& pattern_map = m.get_pattern_value_map(); auto concat = ov::as_type_ptr(pattern_map.at(pattern_root).get_node_shared_ptr()); auto split = ov::as_type_ptr(pattern_map.at(split_p).get_node_shared_ptr()); - if (!concat || !split) return false; + if (!concat || !split) + return false; // Avoid cases with two consecutive Split->Concat if (ric_attr::has(split->input_value(0))) { @@ -186,13 +200,14 @@ public: std::vector order; order.reserve(split->get_num_splits()); - for (const auto & input : concat->inputs()) { + for (const auto& input : concat->inputs()) { auto split_output = input.get_source_output(); - if (split_output.get_node() != split.get()) return false; + if (split_output.get_node() != split.get()) + return false; // Check that Concat is the only Split consumer and order of Split outputs // satisfies expected order for reverse input channel case. - for (const auto & target_input : split_output.get_target_inputs()) { + for (const auto& target_input : split_output.get_target_inputs()) { if (target_input.get_node() != concat.get()) { return false; } @@ -226,11 +241,12 @@ public: auto pattern_root = pattern::wrap_type({input_p, indices_p, axis_p}); auto callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); - const auto & output = pattern_map.at(pattern_root); + const auto& pattern_map = m.get_pattern_value_map(); + const auto& output = pattern_map.at(pattern_root); auto axis = ov::get_constant_from_source(pattern_map.at(axis_p)); - if (!axis) return false; + if (!axis) + return false; const auto axis_value = axis->cast_vector().at(0); @@ -240,7 +256,8 @@ public: } auto order = ov::get_constant_from_source(pattern_map.at(indices_p)); - if (!order) return false; + if (!order) + return false; // Avoid cases with two consecutive Gathers if (ric_attr::has(pattern_map.at(input_p))) { @@ -248,17 +265,15 @@ public: } // This constraint helps to avoid detection of other Gathers that do not perform RIC - const auto & data_shape = m.get_match_root()->input(0).get_partial_shape(); - if (shape_size(order->get_shape()) == 1 || - axis_value < 0 || - axis_value >= data_shape.rank().get_length() || + const auto& data_shape = m.get_match_root()->input(0).get_partial_shape(); + if (shape_size(order->get_shape()) == 1 || axis_value < 0 || axis_value >= data_shape.rank().get_length() || data_shape[axis_value].is_dynamic() || shape_size(order->get_shape()) != static_cast(data_shape[axis_value].get_length())) { return false; } // Check that all order values are unique, otherwise it is not RIC - const auto & order_values = order->cast_vector(); + const auto& order_values = order->cast_vector(); std::set unique_values(order_values.cbegin(), order_values.cend()); if (unique_values.size() != order_values.size()) { return false; @@ -271,14 +286,14 @@ public: register_matcher(m, callback); } }; -}// namespace init +} // namespace init namespace prop { namespace { -std::shared_ptr create_const(const std::vector & values) { +std::shared_ptr create_const(const std::vector& values) { return opset8::Constant::create(ov::element::i64, ov::Shape{values.size()}, values); } -}// namespace +} // namespace class Binary : public ngraph::pass::MatcherPass { public: @@ -287,11 +302,11 @@ public: auto pattern_root = pattern::wrap_type(); auto callback = [=](pattern::Matcher& m) { - const auto & root = m.get_match_root(); - const auto & inputs = root->inputs(); + const auto& root = m.get_match_root(); + const auto& inputs = root->inputs(); std::map attrs; - for (const auto & input : inputs) { + for (const auto& input : inputs) { auto output = input.get_source_output(); if (ric_attr::has(output)) { attrs.insert({input.get_index(), ric_attr::get(output).propagate()}); @@ -302,32 +317,34 @@ public: } } - if (attrs.empty()) return false; + if (attrs.empty()) + return false; // Check that all RIC attrs can be merged and then merge them auto ric = attrs.begin()->second; auto rank = root->get_input_partial_shape(attrs.begin()->first).rank(); - if (rank.is_dynamic()) return false; + if (rank.is_dynamic()) + return false; auto data_rank = rank.get_length(); - for (const auto & item : attrs) { - const auto & input_rank = root->get_input_partial_shape(item.first).rank(); - if (input_rank.is_static() && - input_rank.get_length() == data_rank && - ric.can_be_merged_with(item.second) ) { + for (const auto& item : attrs) { + const auto& input_rank = root->get_input_partial_shape(item.first).rank(); + if (input_rank.is_static() && input_rank.get_length() == data_rank && + ric.can_be_merged_with(item.second)) { ric.merge_with(item.second); } else { return false; } } - for (const auto & input : inputs) { + for (const auto& input : inputs) { // Skip input that have RIC attribute - if (attrs.count(input.get_index())) continue; + if (attrs.count(input.get_index())) + continue; auto const_output = input.get_source_output(); - const auto & shape = const_output.get_shape(); - const int64_t & shape_rank = static_cast(shape.size()); + const auto& shape = const_output.get_shape(); + const int64_t& shape_rank = static_cast(shape.size()); if (shape_rank > data_rank) { // TODO: handle case when constant input broadcast another one return false; @@ -339,8 +356,8 @@ public: continue; } - const int64_t & new_axis = ric.get_axis() - (data_rank - shape_rank); - const auto & axis_dim = shape[new_axis]; + const int64_t& new_axis = ric.get_axis() - (data_rank - shape_rank); + const auto& axis_dim = shape[new_axis]; if (axis_dim == 1) { // we don't have to insert RIC for constant, so we keep propagating ric_attr::set(m.get_match_value(), ric); @@ -351,7 +368,7 @@ public: auto ric_const = ric; ric_const.set_axis(new_axis); ric_const.set_is_final(true); - ric_const.set_callback([axis_dim](Input input, const ric_attr::Attribute & attr) { + ric_const.set_callback([axis_dim](Input input, const ric_attr::Attribute& attr) { auto output = input.get_source_output(); // Handle case when the RIC order is default auto order = attr.get_order(); @@ -359,8 +376,8 @@ public: order.resize(axis_dim); std::iota(order.rbegin(), order.rend(), 0); } - auto gather = std::make_shared(output, create_const(order), - create_const({attr.get_axis()})); + auto gather = + std::make_shared(output, create_const(order), create_const({attr.get_axis()})); input.replace_source_output(gather); // TODO: copy runtime info from RIC sub-graph }); @@ -384,16 +401,18 @@ public: // a terminal node, so we do not propagate RIC attribute further and insert // final RIC attribute to the weights input. auto input_p = pattern::any_input(ric_attr::has>); - auto pattern_root = pattern::wrap_type({input_p, - pattern::wrap_type(pattern::has_static_dim(1/*output channel*/))}); + auto pattern_root = + pattern::wrap_type({input_p, + pattern::wrap_type( + pattern::has_static_dim(1 /*output channel*/))}); auto callback = [=](pattern::Matcher& m) { auto conv = m.get_match_root(); auto ric = ric_attr::get(conv->input_value(0)).propagate(); - if (ric.get_axis() != 1) return false; + if (ric.get_axis() != 1) + return false; ric.set_is_final(true); - ric.set_callback([](Input input, const ric_attr::Attribute & attr) { + ric.set_callback([](Input input, const ric_attr::Attribute& attr) { const auto output_channel_index = 1; auto order = attr.get_order(); // Handle case when the RIC order is default @@ -402,7 +421,8 @@ public: std::iota(order.rbegin(), order.rend(), 0); } auto weights = input.get_source_output(); - auto gather = std::make_shared(weights, create_const(order), + auto gather = std::make_shared(weights, + create_const(order), create_const({output_channel_index})); input.replace_source_output(gather); // TODO: copy runtime info from RIC sub-graph @@ -414,7 +434,7 @@ public: // Apply Binary transformation for FQ to handle 1..5 inputs ric.set_is_final(false); - ric_attr::set(fq->input_value(0), ric); // set ric attr to simulate propagation flow + ric_attr::set(fq->input_value(0), ric); // set ric attr to simulate propagation flow Binary().apply(fq); } else { ric_attr::set(conv->input(1), ric); @@ -432,15 +452,15 @@ public: GroupConvolution() { MATCHER_SCOPE(GroupConvolution); auto input_p = pattern::any_input(ric_attr::has>); - auto pattern_root = pattern::wrap_type({input_p, pattern::wrap_type(pattern::has_static_shape())}); + auto pattern_root = pattern::wrap_type( + {input_p, pattern::wrap_type(pattern::has_static_shape())}); auto callback = [=](pattern::Matcher& m) { auto conv = m.get_match_root(); - const auto & weights_shape = conv->input_value(1).get_shape(); - const int64_t & group = static_cast(weights_shape.at(0)); - const int64_t & channels = static_cast(weights_shape.at(1)); - const int64_t & in_channels = static_cast(weights_shape.at(2)); + const auto& weights_shape = conv->input_value(1).get_shape(); + const int64_t& group = static_cast(weights_shape.at(0)); + const int64_t& channels = static_cast(weights_shape.at(1)); + const int64_t& in_channels = static_cast(weights_shape.at(2)); auto ric = ric_attr::get(conv->input_value(0)).propagate(); auto order = ric.get_order(); @@ -460,9 +480,10 @@ public: auto ric_weights = ric; ric_weights.set_is_final(true); ric_weights.set_axis(0); - ric_weights.set_callback([](Input input, const ric_attr::Attribute & attr) { + ric_weights.set_callback([](Input input, const ric_attr::Attribute& attr) { auto weights = input.get_source_output(); - auto gather = std::make_shared(weights, create_const(attr.get_order()), + auto gather = std::make_shared(weights, + create_const(attr.get_order()), create_const({0} /* output channel */)); input.replace_source_output(gather); // TODO: copy runtime info from RIC sub-graph @@ -474,7 +495,7 @@ public: // Apply Binary transformation for FQ to handle 1..5 inputs ric_weights.set_is_final(false); - ric_attr::set(fq->input_value(0), ric_weights); // set ric attr to simulate propagation flow + ric_attr::set(fq->input_value(0), ric_weights); // set ric attr to simulate propagation flow Binary().apply(fq); } else { ric_attr::set(conv->input(1), ric_weights); @@ -484,7 +505,7 @@ public: const int64_t output_channels = group * channels; std::vector new_order; new_order.reserve(output_channels); - for (const auto & index : ric.get_order()) { + for (const auto& index : ric.get_order()) { for (int64_t pos = index * channels, i = 0; i < channels; ++i, ++pos) { new_order.emplace_back(pos); } @@ -521,12 +542,13 @@ class PassThrough : public ngraph::pass::MatcherPass { public: PassThrough() { MATCHER_SCOPE(PassThrough); - auto pattern_root = pattern::wrap_type(); + auto pattern_root = + pattern::wrap_type(); auto callback = [=](pattern::Matcher& m) { auto root = m.get_match_root(); - if (!ric_attr::has(root->input_value(0))) return false; + if (!ric_attr::has(root->input_value(0))) + return false; ric_attr::set(root->output(0), ric_attr::get(root->input_value(0)).propagate()); return true; }; @@ -545,11 +567,12 @@ public: auto pattern_root = pattern::wrap_type({input_p, order_p}); auto callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); + const auto& pattern_map = m.get_pattern_value_map(); auto input = pattern_map.at(input_p); auto ric = ric_attr::get(input).propagate(); - auto order_node = std::dynamic_pointer_cast(pattern_map.at(order_p).get_node_shared_ptr()); + auto order_node = + std::dynamic_pointer_cast(pattern_map.at(order_p).get_node_shared_ptr()); auto order = order_node->cast_vector(); int64_t new_axis = std::find(order.begin(), order.end(), ric.get_axis()) - order.begin(); @@ -570,7 +593,7 @@ public: MATCHER_SCOPE(Unsupported); auto pattern_root = pattern::any_input(); auto callback = [=](pattern::Matcher& m) { - for (const auto & input : m.get_match_root()->input_values()) { + for (const auto& input : m.get_match_root()->input_values()) { if (ric_attr::has(input)) { auto ric = ric_attr::get(input); ric.set_can_be_fused(false); @@ -584,16 +607,17 @@ public: register_matcher(m, callback); } }; -}// namespace prop +} // namespace prop namespace fuse { namespace { -bool need_to_erase_ric(const Output & output) { - if (!ric_attr::has(output)) return false; - const auto & ric = ric_attr::get(output); +bool need_to_erase_ric(const Output& output) { + if (!ric_attr::has(output)) + return false; + const auto& ric = ric_attr::get(output); return ric.can_be_fused() && ric.is_initial(); } -}// namespace +} // namespace class InsertReverseInputChannel : public ngraph::pass::MatcherPass { public: @@ -601,10 +625,11 @@ public: MATCHER_SCOPE(InsertReverseInputChannel); auto pattern_root = pattern::any_input(); auto callback = [](pattern::Matcher& m) { - const auto & node = m.get_match_root(); - for (const auto & input : node->inputs()) { - if (!ric_attr::has(input)) continue; - const auto & ric = ric_attr::get(input); + const auto& node = m.get_match_root(); + for (const auto& input : node->inputs()) { + if (!ric_attr::has(input)) + continue; + const auto& ric = ric_attr::get(input); if (ric.can_be_fused() && ric.is_final()) { ric(input); } @@ -626,7 +651,7 @@ public: auto pattern_root = pattern::wrap_type({split_p, split_p, split_p}, need_to_erase_ric); auto callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); + const auto& pattern_map = m.get_pattern_value_map(); auto output = pattern_map.at(pattern_root); auto input = pattern_map.at(input_p); output.replace(input); @@ -643,11 +668,10 @@ public: EraseGather() { MATCHER_SCOPE(EraseGather); auto input_p = pattern::any_input(); - auto pattern_root = pattern::wrap_type({input_p, pattern::any_input(), - pattern::any_input()}, - need_to_erase_ric); + auto pattern_root = pattern::wrap_type({input_p, pattern::any_input(), pattern::any_input()}, + need_to_erase_ric); auto callback = [=](pattern::Matcher& m) { - const auto & pattern_map = m.get_pattern_value_map(); + const auto& pattern_map = m.get_pattern_value_map(); auto output = pattern_map.at(pattern_root); auto input = pattern_map.at(input_p); output.replace(input); @@ -658,9 +682,9 @@ public: register_matcher(m, callback); } }; -}// namespace fuse +} // namespace fuse -bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_ptr & model) { +bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_ptr& model) { Manager m; m.set_per_pass_validation(false); @@ -687,5 +711,5 @@ bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_pt m.run_passes(model); return false; } -}// namespace pass -}// namespace ngraph \ No newline at end of file +} // namespace pass +} // namespace ngraph \ No newline at end of file diff --git a/src/common/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp index 9bd38afdcc9..9bfa4d74b95 100644 --- a/src/common/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/shuffle_channels_fusion.cpp @@ -3,20 +3,21 @@ // #include "transformations/common_optimizations/shuffle_channels_fusion.hpp" -#include "itt.hpp" #include -#include - #include #include #include +#include +#include "itt.hpp" #include "transformations/utils/utils.hpp" namespace { -bool check_shapes(const ngraph::PartialShape& pshape_input, const ngraph::PartialShape& pshape_reshape_before, - const ngraph::AxisVector& transpose_constant_values, const ngraph::PartialShape& pshape_reshape_after) { +bool check_shapes(const ngraph::PartialShape& pshape_input, + const ngraph::PartialShape& pshape_reshape_before, + const ngraph::AxisVector& transpose_constant_values, + const ngraph::PartialShape& pshape_reshape_after) { // x: [N, C, H, W] const auto rank = pshape_input.rank(); if (rank.is_dynamic() || rank.get_length() != 4) { @@ -24,7 +25,9 @@ bool check_shapes(const ngraph::PartialShape& pshape_input, const ngraph::Partia } // check that all dimensions except batch are static - if (std::any_of(pshape_input.begin() + 1, pshape_input.end(), [](const ngraph::Dimension& x) { return x.is_dynamic(); })) { + if (std::any_of(pshape_input.begin() + 1, pshape_input.end(), [](const ngraph::Dimension& x) { + return x.is_dynamic(); + })) { return false; } @@ -36,20 +39,16 @@ bool check_shapes(const ngraph::PartialShape& pshape_input, const ngraph::Partia const auto group = pshape_reshape_before[1].get_length(); ngraph::PartialShape expected_reshape_before; if (pshape_reshape_before.rank().get_length() == 4) { - expected_reshape_before = { - pshape_input[0], - group, - pshape_input[1].get_length() / group, - pshape_input[2].get_length() * pshape_input[3].get_length() - }; + expected_reshape_before = {pshape_input[0], + group, + pshape_input[1].get_length() / group, + pshape_input[2].get_length() * pshape_input[3].get_length()}; } else { - expected_reshape_before = { - pshape_input[0], - group, - pshape_input[1].get_length() / group, - pshape_input[2], - pshape_input[3] - }; + expected_reshape_before = {pshape_input[0], + group, + pshape_input[1].get_length() / group, + pshape_input[2], + pshape_input[3]}; } if (!ngraph::op::util::shapes_equal_except_dynamic_expected_batch(expected_reshape_before, pshape_reshape_before)) { @@ -61,7 +60,7 @@ bool check_shapes(const ngraph::PartialShape& pshape_input, const ngraph::Partia return false; } - ngraph::AxisVector expected_transpose_values{ 0, 2, 1, 3 }; + ngraph::AxisVector expected_transpose_values{0, 2, 1, 3}; if (transpose_constant_values.size() == 5) { expected_transpose_values.push_back(4); } @@ -78,7 +77,7 @@ bool check_shapes(const ngraph::PartialShape& pshape_input, const ngraph::Partia return true; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::ShuffleChannelsFusion, "ShuffleChannelsFusion", 0); @@ -89,19 +88,25 @@ ngraph::pass::ShuffleChannelsFusion::ShuffleChannelsFusion(const bool reshape_co auto transpose_const_pattern = ngraph::pattern::wrap_type(); auto reshape_after_const_pattern = ngraph::pattern::wrap_type(); - auto reshape_before_pattern = ngraph::pattern::wrap_type({input, reshape_before_const_pattern}, - pattern::consumers_count(1)); - auto transpose_pattern = ngraph::pattern::wrap_type({reshape_before_pattern, transpose_const_pattern}, - pattern::consumers_count(1)); - auto reshape_after_pattern = ngraph::pattern::wrap_type({transpose_pattern, reshape_after_const_pattern}); + auto reshape_before_pattern = + ngraph::pattern::wrap_type({input, reshape_before_const_pattern}, + pattern::consumers_count(1)); + auto transpose_pattern = + ngraph::pattern::wrap_type({reshape_before_pattern, transpose_const_pattern}, + pattern::consumers_count(1)); + auto reshape_after_pattern = + ngraph::pattern::wrap_type({transpose_pattern, reshape_after_const_pattern}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); auto data = pattern_map.at(input); - auto reshape_before = std::dynamic_pointer_cast(pattern_map.at(reshape_before_pattern).get_node_shared_ptr()); - auto transpose = std::dynamic_pointer_cast(pattern_map.at(transpose_pattern).get_node_shared_ptr()); - auto reshape_after = std::dynamic_pointer_cast(pattern_map.at(reshape_after_pattern).get_node_shared_ptr()); + auto reshape_before = std::dynamic_pointer_cast( + pattern_map.at(reshape_before_pattern).get_node_shared_ptr()); + auto transpose = std::dynamic_pointer_cast( + pattern_map.at(transpose_pattern).get_node_shared_ptr()); + auto reshape_after = std::dynamic_pointer_cast( + pattern_map.at(reshape_after_pattern).get_node_shared_ptr()); if (!reshape_after || !transpose || !reshape_after) { return false; } @@ -118,8 +123,14 @@ ngraph::pass::ShuffleChannelsFusion::ShuffleChannelsFusion(const bool reshape_co const auto& reshape_before_values = reshape_before_constant->cast_vector(); const auto& reshape_after_values = reshape_after_constant->cast_vector(); - if (std::any_of(reshape_before_values.cbegin(), reshape_before_values.cend(), [](const int64_t& value) { return value == -1; }) || - std::any_of(reshape_after_values.cbegin(), reshape_after_values.cend(), [](const int64_t& value) { return value == -1; })) { + if (std::any_of(reshape_before_values.cbegin(), + reshape_before_values.cend(), + [](const int64_t& value) { + return value == -1; + }) || + std::any_of(reshape_after_values.cbegin(), reshape_after_values.cend(), [](const int64_t& value) { + return value == -1; + })) { return false; } } @@ -128,7 +139,8 @@ ngraph::pass::ShuffleChannelsFusion::ShuffleChannelsFusion(const bool reshape_co auto pshape_reshape_before = reshape_before->get_output_partial_shape(0); auto pshape_reshape_after = reshape_after->get_output_partial_shape(0); - auto transpose_constant = std::dynamic_pointer_cast(pattern_map.at(transpose_const_pattern).get_node_shared_ptr()); + auto transpose_constant = std::dynamic_pointer_cast( + pattern_map.at(transpose_const_pattern).get_node_shared_ptr()); auto transpose_constant_values = transpose_constant->get_axis_vector_val(); if (!check_shapes(pshape_input, pshape_reshape_before, transpose_constant_values, pshape_reshape_after)) { return false; @@ -139,7 +151,7 @@ ngraph::pass::ShuffleChannelsFusion::ShuffleChannelsFusion(const bool reshape_co auto shuffle_shannels = std::make_shared(data, axis, group); shuffle_shannels->set_friendly_name(reshape_after->get_friendly_name()); - ngraph::copy_runtime_info({ reshape_before, transpose, reshape_after }, shuffle_shannels); + ngraph::copy_runtime_info({reshape_before, transpose, reshape_after}, shuffle_shannels); ngraph::replace_node(reshape_after, shuffle_shannels); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp index 4caa1311136..3d4a089686f 100644 --- a/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/simplify_shape_of_sub_graph.cpp @@ -3,20 +3,20 @@ // #include -#include - -#include "itt.hpp" #include #include #include #include #include -#include #include -#include -#include -#include +#include #include +#include +#include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SharedShapeOf, "SharedShapeOf", 0); @@ -28,7 +28,7 @@ bool ngraph::pass::SharedShapeOf::run_on_model(const std::shared_ptr, std::vector>> source_to_shape_of; - for (const auto & node : f->get_ordered_ops()) { + for (const auto& node : f->get_ordered_ops()) { // Recursively apply transformation for sub-graph based operations if (auto sub_graph_node = std::dynamic_pointer_cast(node)) if (auto sub_graph = sub_graph_node->get_function()) @@ -74,12 +74,15 @@ ngraph::pass::GroupedGatherElimination::GroupedGatherElimination() { while (inputs.size() > i + 1) { auto curr = inputs[i].get_node_shared_ptr(), next = inputs[i + 1].get_node_shared_ptr(); if (curr->get_type_info() != next->get_type_info() || - (!ov::is_type(curr) && !ov::is_type(curr) && !ov::is_type(curr)) || + (!ov::is_type(curr) && !ov::is_type(curr) && + !ov::is_type(curr)) || (curr->input_value(0) != next->input_value(0))) { ++i; continue; - } // curr and next are the same type of gather which takes data from the same source - auto joint_indices = ngraph::op::util::make_try_fold(OutputVector{curr->input_value(1), next->input_value(1)}, 0); + } // curr and next are the same type of gather which takes data from the same source + auto joint_indices = ngraph::op::util::make_try_fold( + OutputVector{curr->input_value(1), next->input_value(1)}, + 0); std::shared_ptr new_gather; if (ov::is_type(curr)) { new_gather = register_new_node( @@ -105,7 +108,7 @@ ngraph::pass::GroupedGatherElimination::GroupedGatherElimination() { inputs[i] = new_gather->output(0); } ngraph::copy_runtime_info(concat, new_ops); - if (inputs.size() == 1) // we can optimize out concat + if (inputs.size() == 1) // we can optimize out concat return replace_output_update_name(concat->output(0), inputs[0]); if (original_inputs_size > inputs.size()) { auto new_concat = std::make_shared(inputs, 0); @@ -126,14 +129,15 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::GatherNopElimination, "GatherNopElimination ngraph::pass::GatherNopElimination::GatherNopElimination() { MATCHER_SCOPE(GatherNopElimination); const auto gather_label = ngraph::pattern::wrap_type( - {ngraph::pattern::any_input(pattern::has_static_shape()), - ngraph::pattern::wrap_type(), - ngraph::pattern::wrap_type()}); + {ngraph::pattern::any_input(pattern::has_static_shape()), + ngraph::pattern::wrap_type(), + ngraph::pattern::wrap_type()}); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { auto gather = m.get_match_root(); const auto& number_of_indices = shape_size(gather->get_input_shape(1)); - if (gather->get_input_shape(0) != gather->get_output_shape(0) || shape_size(gather->get_input_shape(2)) != 1 || number_of_indices > 10) + if (gather->get_input_shape(0) != gather->get_output_shape(0) || shape_size(gather->get_input_shape(2)) != 1 || + number_of_indices > 10) return false; std::vector expected_vector(number_of_indices); std::iota(expected_vector.begin(), expected_vector.end(), 0); @@ -164,8 +168,7 @@ ngraph::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { auto gather_in_rank = gather->get_input_partial_shape(0).rank(); auto indices_rank = gather->get_input_partial_shape(1).rank(); auto axis = gather->get_axis(); - if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() || - axis == opset3::Gather::AXIS_NOT_SET_VALUE) { + if (gather_in_rank.is_dynamic() || indices_rank.is_dynamic() || axis == opset3::Gather::AXIS_NOT_SET_VALUE) { return false; } @@ -191,7 +194,8 @@ ngraph::pass::SimplifyGatherShapeOf::SimplifyGatherShapeOf() { new_ops.push_back(new_gather); concat_inputs.push_back(new_gather); } - auto shapeof_indices = std::make_shared(gather->input_value(1), node->get_output_element_type(0)); + auto shapeof_indices = + std::make_shared(gather->input_value(1), node->get_output_element_type(0)); new_ops.push_back(shapeof_indices); concat_inputs.push_back(shapeof_indices); @@ -226,7 +230,7 @@ ngraph::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { return pattern::has_static_shape()(output) && pattern::rank_equals(1)(output); }; const auto concat = pattern::wrap_type(has_static_1d_shape); - const auto reshape_pattern = pattern::wrap_type({ input, concat }); + const auto reshape_pattern = pattern::wrap_type({input, concat}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto node = m.get_match_root(); @@ -283,12 +287,13 @@ ngraph::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { if (gather_can_be_fused) { const size_t num_of_unchanged_dimensions = indices.size(); const auto subgraph_et = gather->get_input_element_type(0); - input = opset8::Constant::create(subgraph_et, Shape{ num_of_unchanged_dimensions }, { 0 }); + input = opset8::Constant::create(subgraph_et, Shape{num_of_unchanged_dimensions}, {0}); gather_folded = true; } } else { const auto concat_input_shape = input.get_shape(); - OPENVINO_ASSERT(concat_input_shape.size() == 1, "concat input rank is not valid for matched Concat with 1D output"); + OPENVINO_ASSERT(concat_input_shape.size() == 1, + "concat input rank is not valid for matched Concat with 1D output"); gather_dims_expected_location += concat_input_shape[0]; } } @@ -301,7 +306,7 @@ ngraph::pass::SimplifySecondInputOfReshape::SimplifySecondInputOfReshape() { new_concat->set_friendly_name(concat->get_friendly_name()); copy_runtime_info(concat, new_concat); - const auto new_reshape = reshape->clone_with_new_inputs({ reshape->input_value(0), new_concat }); + const auto new_reshape = reshape->clone_with_new_inputs({reshape->input_value(0), new_concat}); new_reshape->set_friendly_name(reshape->get_friendly_name()); copy_runtime_info(reshape, new_reshape); diff --git a/src/common/transformations/src/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.cpp b/src/common/transformations/src/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.cpp index 530e4537fe6..7a499b165b4 100644 --- a/src/common/transformations/src/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/skip_gather_before_transpose_and_reshape.cpp @@ -3,15 +3,14 @@ // #include "transformations/common_optimizations/skip_gather_before_transpose_and_reshape.hpp" -#include "itt.hpp" #include -#include - -#include #include #include +#include +#include +#include "itt.hpp" #include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SkipGatherBeforeTransposeAndReshape, "SkipGatherBeforeTransposeAndReshape", 0); @@ -52,18 +51,22 @@ ngraph::pass::SkipGatherBeforeTransposeAndReshape::SkipGatherBeforeTransposeAndR } const auto transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); - const auto transpose_const = as_type_ptr(pattern_map.at(transpose_const_m).get_node_shared_ptr()); + const auto transpose_const = + as_type_ptr(pattern_map.at(transpose_const_m).get_node_shared_ptr()); if (!transpose_const) { return false; } - const auto reshape_const = as_type_ptr(pattern_map.at(reshape_const_m).get_node_shared_ptr()); + const auto reshape_const = + as_type_ptr(pattern_map.at(reshape_const_m).get_node_shared_ptr()); if (!reshape_const) { return false; } const auto reshape_vals = reshape_const->cast_vector(); - if (std::any_of(reshape_vals.begin(), reshape_vals.end(), [](const std::int64_t x) { return x == 0; })) { + if (std::any_of(reshape_vals.begin(), reshape_vals.end(), [](const std::int64_t x) { + return x == 0; + })) { return false; } diff --git a/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp index dc5406dd24a..0c94a6af141 100644 --- a/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/softmax_fusion.cpp @@ -3,16 +3,15 @@ // #include "transformations/common_optimizations/softmax_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include #include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SoftmaxFusion, "SoftmaxFusion", 0); @@ -33,10 +32,12 @@ ngraph::pass::SoftmaxFusion::SoftmaxFusion() { const auto& pattern_map = m.get_pattern_value_map(); - auto reduce_max_axes = std::dynamic_pointer_cast(pattern_map.at(reduce_max_axes_pattern).get_node_shared_ptr()); + auto reduce_max_axes = + std::dynamic_pointer_cast(pattern_map.at(reduce_max_axes_pattern).get_node_shared_ptr()); if (!reduce_max_axes || shape_size(reduce_max_axes->get_shape()) != 1) return false; - auto reduce_sum_axes = std::dynamic_pointer_cast(pattern_map.at(reduce_sum_axes_pattern).get_node_shared_ptr()); + auto reduce_sum_axes = + std::dynamic_pointer_cast(pattern_map.at(reduce_sum_axes_pattern).get_node_shared_ptr()); if (!reduce_sum_axes || shape_size(reduce_sum_axes->get_shape()) != 1) return false; @@ -46,9 +47,9 @@ ngraph::pass::SoftmaxFusion::SoftmaxFusion() { const auto& pshape = pattern_map.at(data_pattern).get_partial_shape(); auto rank = pshape.rank().get_length(); if (reduce_max_axis < 0) - reduce_max_axis += rank; + reduce_max_axis += rank; if (reduce_sum_axis < 0) - reduce_sum_axis += rank; + reduce_sum_axis += rank; } if (reduce_max_axis != reduce_sum_axis) return false; @@ -57,14 +58,15 @@ ngraph::pass::SoftmaxFusion::SoftmaxFusion() { auto div = pattern_map.at(div_pattern).get_node_shared_ptr(); softmax->set_friendly_name(div->get_friendly_name()); - copy_runtime_info({ - pattern_map.at(reduce_max_pattern).get_node_shared_ptr(), - pattern_map.at(sub_pattern).get_node_shared_ptr(), - pattern_map.at(exp_pattern).get_node_shared_ptr(), - pattern_map.at(reduce_sum_pattern).get_node_shared_ptr(), - div, - }, - softmax); + copy_runtime_info( + { + pattern_map.at(reduce_max_pattern).get_node_shared_ptr(), + pattern_map.at(sub_pattern).get_node_shared_ptr(), + pattern_map.at(exp_pattern).get_node_shared_ptr(), + pattern_map.at(reduce_sum_pattern).get_node_shared_ptr(), + div, + }, + softmax); replace_node(div, softmax); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/softplus_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/softplus_fusion.cpp index 3749de60332..84e44e5dff7 100644 --- a/src/common/transformations/src/transformations/common_optimizations/softplus_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/softplus_fusion.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/softplus_fusion.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SoftPlusFusion, "SoftPlusFusion", 0); @@ -19,17 +19,19 @@ ngraph::pass::SoftPlusFusion::SoftPlusFusion() { // fuses ln(exp(x) + 1.0) operations into SoftPlus(x) auto input = ngraph::pattern::any_input(); auto exp = std::make_shared(input); - auto add_constant = ngraph::pattern::wrap_type( - pattern::type_matches_any({element::f32, element::f16})); + auto add_constant = + ngraph::pattern::wrap_type(pattern::type_matches_any({element::f32, element::f16})); auto add = std::make_shared(exp, add_constant); auto log = std::make_shared(add); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - const auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); - auto constant = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); - if (!constant) return false; + auto constant = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); + if (!constant) + return false; auto data = constant->cast_vector(); if (data.size() != 1 || data[0] != 1.0) { @@ -41,7 +43,8 @@ ngraph::pass::SoftPlusFusion::SoftPlusFusion() { softplus->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({pattern_to_output.at(log).get_node_shared_ptr(), pattern_to_output.at(add).get_node_shared_ptr(), - pattern_to_output.at(exp).get_node_shared_ptr()}, softplus); + pattern_to_output.at(exp).get_node_shared_ptr()}, + softplus); ngraph::replace_node(m.get_match_root(), softplus); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/softplus_to_mish_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/softplus_to_mish_fusion.cpp index 6b6c45efbd2..56ed85b040e 100644 --- a/src/common/transformations/src/transformations/common_optimizations/softplus_to_mish_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/softplus_to_mish_fusion.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/softplus_to_mish_fusion.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SoftPlusToMishFusion, "SoftPlusToMishFusion", 0); @@ -22,7 +22,7 @@ ngraph::pass::SoftPlusToMishFusion::SoftPlusToMishFusion() { auto mul = std::make_shared(input, tanh); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - auto & pattern_to_output = m.get_pattern_value_map(); + auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); auto mish = std::make_shared(exp_input); @@ -30,7 +30,8 @@ ngraph::pass::SoftPlusToMishFusion::SoftPlusToMishFusion() { mish->set_friendly_name(m.get_match_root()->get_friendly_name()); ngraph::copy_runtime_info({pattern_to_output.at(mul).get_node_shared_ptr(), pattern_to_output.at(tanh).get_node_shared_ptr(), - pattern_to_output.at(softplus).get_node_shared_ptr()}, mish); + pattern_to_output.at(softplus).get_node_shared_ptr()}, + mish); ngraph::replace_node(m.get_match_root(), mish); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/space_to_batch_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/space_to_batch_fusion.cpp index 1928a48219a..f15b890377b 100644 --- a/src/common/transformations/src/transformations/common_optimizations/space_to_batch_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/space_to_batch_fusion.cpp @@ -3,60 +3,71 @@ // #include "transformations/common_optimizations/space_to_batch_fusion.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SpaceToBatchFusion, "SpaceToBatchFusion", 0); ngraph::pass::SpaceToBatchFusion::SpaceToBatchFusion() { MATCHER_SCOPE(SpaceToBatchFusion); auto data_pattern = pattern::any_input(); - auto reshape_before_pattern = pattern::wrap_type({data_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto trans_before_pattern = pattern::wrap_type({data_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto reshape_or_transpose_before_pattern = std::make_shared(OutputVector{reshape_before_pattern, trans_before_pattern}); + auto reshape_before_pattern = + pattern::wrap_type({data_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto trans_before_pattern = + pattern::wrap_type({data_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto reshape_or_transpose_before_pattern = + std::make_shared(OutputVector{reshape_before_pattern, trans_before_pattern}); auto pads_begin_pattern = pattern::wrap_type(); auto pads_end_pattern = pattern::wrap_type(); auto pad_value = pattern::wrap_type(); - auto pad_pattern = pattern::wrap_type({reshape_or_transpose_before_pattern, pads_begin_pattern, pads_end_pattern, pad_value}); + auto pad_pattern = pattern::wrap_type( + {reshape_or_transpose_before_pattern, pads_begin_pattern, pads_end_pattern, pad_value}); auto space_to_depth_pattern = pattern::wrap_type({pad_pattern}, pattern::has_static_shape()); - auto reshape_after_pattern = pattern::wrap_type({space_to_depth_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto trans_after_pattern = pattern::wrap_type({space_to_depth_pattern, pattern::wrap_type()}, pattern::rank_equals(4)); - auto reshape_or_transpose_after_pattern = std::make_shared(OutputVector{reshape_after_pattern, trans_after_pattern}); + auto reshape_after_pattern = + pattern::wrap_type({space_to_depth_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto trans_after_pattern = + pattern::wrap_type({space_to_depth_pattern, pattern::wrap_type()}, + pattern::rank_equals(4)); + auto reshape_or_transpose_after_pattern = + std::make_shared(OutputVector{reshape_after_pattern, trans_after_pattern}); matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_map = m.get_pattern_value_map(); - auto get_reshape_or_transpose = [&pattern_map] (const std::shared_ptr& reshape_pattern, - const std::shared_ptr& trans_pattern) -> std::shared_ptr { + auto get_reshape_or_transpose = [&pattern_map]( + const std::shared_ptr& reshape_pattern, + const std::shared_ptr& trans_pattern) -> std::shared_ptr { if (pattern_map.count(reshape_pattern)) return pattern_map.at(reshape_pattern).get_node_shared_ptr(); if (pattern_map.count(trans_pattern)) return pattern_map.at(trans_pattern).get_node_shared_ptr(); return nullptr; }; - auto check_input_output_shape = [] (const std::shared_ptr& node) -> bool { + auto check_input_output_shape = [](const std::shared_ptr& node) -> bool { const auto& input_shape = node->get_input_shape(0); const auto& output_shape = node->get_output_shape(0); // Transpose permutation has to be [1, 0, 2, 3] - return input_shape[0] == output_shape[1] && - input_shape[1] == output_shape[0] && - input_shape[2] == output_shape[2] && - input_shape[3] == output_shape[3]; + return input_shape[0] == output_shape[1] && input_shape[1] == output_shape[0] && + input_shape[2] == output_shape[2] && input_shape[3] == output_shape[3]; }; - std::shared_ptr reshape_or_trans_before = get_reshape_or_transpose(reshape_before_pattern, trans_before_pattern); + std::shared_ptr reshape_or_trans_before = + get_reshape_or_transpose(reshape_before_pattern, trans_before_pattern); if (!reshape_or_trans_before) return false; - std::shared_ptr reshape_or_trans_after = get_reshape_or_transpose(reshape_after_pattern, trans_after_pattern); + std::shared_ptr reshape_or_trans_after = + get_reshape_or_transpose(reshape_after_pattern, trans_after_pattern); if (!reshape_or_trans_after) return false; if (!check_input_output_shape(reshape_or_trans_before)) @@ -67,32 +78,37 @@ ngraph::pass::SpaceToBatchFusion::SpaceToBatchFusion() { auto pad = std::dynamic_pointer_cast(pattern_map.at(pad_pattern).get_node_shared_ptr()); if (!pad || pad->get_pad_mode() != op::PadMode::CONSTANT) return false; - auto pad_value_const = std::dynamic_pointer_cast(pattern_map.at(pad_value).get_node_shared_ptr()); + auto pad_value_const = + std::dynamic_pointer_cast(pattern_map.at(pad_value).get_node_shared_ptr()); if (!pad_value_const) return false; auto pad_value = pad_value_const->cast_vector(); if (pad_value.size() != 1 || pad_value[0] != 0.0f) return false; - auto space_to_depth = std::dynamic_pointer_cast(pattern_map.at(space_to_depth_pattern).get_node_shared_ptr()); + auto space_to_depth = std::dynamic_pointer_cast( + pattern_map.at(space_to_depth_pattern).get_node_shared_ptr()); if (!space_to_depth) return false; if (space_to_depth->get_mode() != opset6::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST) return false; auto block_size = static_cast(space_to_depth->get_block_size()); - auto block_shape = op::Constant::create(element::i64, Shape{4}, - std::vector{1, 1, block_size, block_size}); - auto space_to_batch = register_new_node(pattern_map.at(data_pattern), block_shape, - pattern_map.at(pads_begin_pattern), pattern_map.at(pads_end_pattern)); + auto block_shape = + op::Constant::create(element::i64, Shape{4}, std::vector{1, 1, block_size, block_size}); + auto space_to_batch = register_new_node(pattern_map.at(data_pattern), + block_shape, + pattern_map.at(pads_begin_pattern), + pattern_map.at(pads_end_pattern)); space_to_batch->set_friendly_name(reshape_or_trans_after->get_friendly_name()); - copy_runtime_info({ - reshape_or_trans_before, - pad, - space_to_depth, - reshape_or_trans_after, - }, - space_to_batch); + copy_runtime_info( + { + reshape_or_trans_before, + pad, + space_to_depth, + reshape_or_trans_after, + }, + space_to_batch); replace_node(reshape_or_trans_after, space_to_batch); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp index 7c797796b4f..bf1710619bd 100644 --- a/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.cpp @@ -2,21 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/split_concat_pair_to_interpolate_fusion.hpp" #include #include +#include +#include +#include #include +#include #include #include #include #include -#include -#include -#include -#include +#include "itt.hpp" namespace { // This function creates a partition of its argument into groups consisting of adjacent identical elements. @@ -25,7 +25,8 @@ namespace { std::vector> grouped_vector(const std::vector& v) { std::vector> result; - if (v.empty()) return result; + if (v.empty()) + return result; uint64_t prev = v[0]; std::vector group; @@ -42,7 +43,8 @@ std::vector> grouped_vector(const std::vector& v return result; } -std::pair, uint64_t> get_split_before_concat(const std::shared_ptr& concat) { +std::pair, uint64_t> get_split_before_concat( + const std::shared_ptr& concat) { // This function gets producers of the 'concat' node, checks that the following conditions are fulfilled: // 1) all producers for 'concat' are Split nodes; // 2) 'concat' has only one unique producer ('split'); @@ -50,8 +52,8 @@ std::pair, uint64_t> get_split_before_con // 4) for any output port of 'split', number of corresponding input ports of the consumer is the same; // 5) for any output port 'i' of the 'split', corresponding input ports of the consumer are // [i * m, ..., i * m + (m - 1)], where 'm' is the same for all 'i'; - // and, if all these conditions are fulfilled, returns the above mentioned 'Concat' node. Otherwise, if some of these - // conditions is false, this functions returns nullptr. + // and, if all these conditions are fulfilled, returns the above mentioned 'Concat' node. Otherwise, if some of + // these conditions is false, this functions returns nullptr. std::vector idx; std::shared_ptr split; @@ -60,24 +62,28 @@ std::pair, uint64_t> get_split_before_con auto split_op = std::dynamic_pointer_cast(input.get_node_shared_ptr()); if (!split) split = split_op; - if (!split_op || split != split_op) return {}; + if (!split_op || split != split_op) + return {}; idx.emplace_back(static_cast(input.get_index())); } // If 'split' node has more than one consumer, then the transformation is not applicable. for (const auto& output : split->outputs()) { for (const auto& consumer : output.get_target_inputs()) { - if (consumer.get_node() != concat.get()) return {}; + if (consumer.get_node() != concat.get()) + return {}; } } - // If numbers of consumer ports are various for various output ports of 'split', then the transformation is not applicable. + // If numbers of consumer ports are various for various output ports of 'split', then the transformation is not + // applicable. auto grouped_idx = grouped_vector(idx); std::unordered_set sizes_of_groups; for (const auto& group : grouped_idx) { sizes_of_groups.insert(static_cast(group.size())); } - if (sizes_of_groups.size() != 1) return {}; + if (sizes_of_groups.size() != 1) + return {}; uint64_t size_of_group = *(sizes_of_groups.begin()); // The transformation is applicable if output port 0 of 'split' goes to ports [0, ..., m-1] of next node, @@ -85,12 +91,16 @@ std::pair, uint64_t> get_split_before_con // goes to ports [i * m, ..., i * m + (m - 1)], and so on. for (uint64_t i = 0; i < static_cast(grouped_idx.size()); ++i) { const auto& current_group = grouped_idx[i]; - if (std::any_of(current_group.begin(), current_group.end(), [i](uint64_t j){ return j != i; })) { return {}; } + if (std::any_of(current_group.begin(), current_group.end(), [i](uint64_t j) { + return j != i; + })) { + return {}; + } } return {split, size_of_group}; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::SplitConcatPairToInterpolateFusion, "SplitConcatPairToInterpolateFusion", 0); @@ -107,8 +117,8 @@ ngraph::pass::SplitConcatPairToInterpolateFusion::SplitConcatPairToInterpolateFu // 4) 'concat' takes inputs only from 'split'; // 5) split_dim of 'split' is equal to axis of 'concat'; // 6) output port 0 of 'split' goes to ports [0, ..., m-1] of next node, output port 1 of 'split' goes to ports - // [m, ..., m + (m-1)] of next node, ..., output port i of 'split' goes to ports [i * m, ..., i * m + (m - 1)], - // and so on; + // [m, ..., m + (m-1)] of next node, ..., output port i of 'split' goes to ports [i * m, ..., i * m + (m - + // 1)], and so on; // 7) number of outputs of 'split' is equal to the length of the split axis. // Such subgraph // Split -> Concat @@ -128,24 +138,30 @@ ngraph::pass::SplitConcatPairToInterpolateFusion::SplitConcatPairToInterpolateFu auto concat_pattern = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto concat = std::dynamic_pointer_cast(m.get_match_root()); - if (!concat) return false; + if (!concat) + return false; uint64_t scale_factor; std::shared_ptr split; std::tie(split, scale_factor) = get_split_before_concat(concat); // If scale_factor == 1, then output data of Interpolate are equal to input data. Hence, we should not replace // Split->Concat pair with Interpolate. - if (!split || !scale_factor || scale_factor == 1) return false; + if (!split || !scale_factor || scale_factor == 1) + return false; - if (split->get_input_partial_shape(0).rank().is_dynamic()) return false; + if (split->get_input_partial_shape(0).rank().is_dynamic()) + return false; int64_t split_input_rank = split->get_input_partial_shape(0).rank().get_length(); - // If this transformation is applied in the case of the the rank is less than 4, we have a performance degradation. - // And, at this time, we have no models with Split->Concat pattern when this transformation is applicable and - // input rank of Split is greater than 5. - if (split_input_rank != 4 && split_input_rank != 5) return false; + // If this transformation is applied in the case of the the rank is less than 4, we have a performance + // degradation. And, at this time, we have no models with Split->Concat pattern when this transformation is + // applicable and input rank of Split is greater than 5. + if (split_input_rank != 4 && split_input_rank != 5) + return false; - auto split_axis_const = std::dynamic_pointer_cast(split->input_value(1).get_node_shared_ptr()); - if (!split_axis_const) return false; + auto split_axis_const = + std::dynamic_pointer_cast(split->input_value(1).get_node_shared_ptr()); + if (!split_axis_const) + return false; int64_t axis = split_axis_const->cast_vector()[0]; @@ -164,7 +180,8 @@ ngraph::pass::SplitConcatPairToInterpolateFusion::SplitConcatPairToInterpolateFu attrs.coordinate_transformation_mode = opset8::Interpolate::CoordinateTransformMode::HALF_PIXEL; attrs.cube_coeff = -0.75f; - auto scales_node = opset8::Constant::create(element::f32, {1}, std::vector{static_cast(scale_factor)}); + auto scales_node = + opset8::Constant::create(element::f32, {1}, std::vector{static_cast(scale_factor)}); auto axis_node = opset8::Constant::create(element::i64, {1}, std::vector{axis}); auto shape_node = std::make_shared(split->input_value(0)); @@ -172,7 +189,8 @@ ngraph::pass::SplitConcatPairToInterpolateFusion::SplitConcatPairToInterpolateFu auto sslice_end = opset8::Constant::create(element::i64, {1}, std::vector{axis + 1}); std::vector begin_mask = {0}; std::vector end_mask = {0}; - auto strided_slice_node = std::make_shared(shape_node, sslice_begin, sslice_end, begin_mask, end_mask); + auto strided_slice_node = + std::make_shared(shape_node, sslice_begin, sslice_end, begin_mask, end_mask); auto cast_shape_to_float = std::make_shared(strided_slice_node, element::f32); auto mul_node = std::make_shared(cast_shape_to_float, scales_node); @@ -190,11 +208,22 @@ ngraph::pass::SplitConcatPairToInterpolateFusion::SplitConcatPairToInterpolateFu if (!sizes_node) sizes_node = cast_mul_result_to_int; - auto interpolate = register_new_node(split->input_value(0), sizes_node, scales_node, axis_node, attrs); + auto interpolate = + register_new_node(split->input_value(0), sizes_node, scales_node, axis_node, attrs); interpolate->set_friendly_name(concat->get_friendly_name()); - copy_runtime_info({split, concat}, {scales_node, axis_node, shape_node, sslice_begin, sslice_end, strided_slice_node, cast_shape_to_float, mul_node, - floor_node, sizes_node, interpolate}); + copy_runtime_info({split, concat}, + {scales_node, + axis_node, + shape_node, + sslice_begin, + sslice_end, + strided_slice_node, + cast_shape_to_float, + mul_node, + floor_node, + sizes_node, + interpolate}); replace_node(concat, interpolate); return true; diff --git a/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp index 32fe8d9850e..88c84405678 100644 --- a/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/split_squeeze_concat_fusion.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/split_squeeze_concat_fusion.hpp" #include -#include -#include - #include -#include #include +#include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SplitSqueezeConcatFusion, "SplitSqueezeConcatFusion", 0); @@ -22,28 +22,36 @@ ngraph::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { const auto& pattern_to_output = m.get_pattern_value_map(); - auto concat = std::dynamic_pointer_cast(pattern_to_output.at(concat_pattern).get_node_shared_ptr()); - if (!concat) return false; + auto concat = std::dynamic_pointer_cast( + pattern_to_output.at(concat_pattern).get_node_shared_ptr()); + if (!concat) + return false; - NodeVector nodes_to_delete{ concat }; + NodeVector nodes_to_delete{concat}; int64_t axis_value = 0; std::shared_ptr split; const auto& concat_inputs = concat->input_values(); - if (concat_inputs.empty()) return false; + if (concat_inputs.empty()) + return false; for (size_t i = 0; i < concat_inputs.size(); i++) { auto squeeze = std::dynamic_pointer_cast(concat_inputs[i].get_node_shared_ptr()); - if (!squeeze) return false; + if (!squeeze) + return false; nodes_to_delete.push_back(squeeze); - auto split_to_check = std::dynamic_pointer_cast(squeeze->input_value(0).get_node_shared_ptr()); - auto squeeze_axes = std::dynamic_pointer_cast(squeeze->input_value(1).get_node_shared_ptr()); - if (!squeeze_axes || !split_to_check) return false; + auto split_to_check = + std::dynamic_pointer_cast(squeeze->input_value(0).get_node_shared_ptr()); + auto squeeze_axes = + std::dynamic_pointer_cast(squeeze->input_value(1).get_node_shared_ptr()); + if (!squeeze_axes || !split_to_check) + return false; auto squeeze_axes_vec = squeeze_axes->cast_vector(); - if (squeeze_axes_vec.size() != 1) return false; + if (squeeze_axes_vec.size() != 1) + return false; if (i == 0) { axis_value = squeeze_axes_vec[0]; @@ -54,15 +62,17 @@ ngraph::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { } auto split_output = squeeze->input_value(0); - if (split_output.get_target_inputs().size() != 1 || - split_output.get_index() != i) + if (split_output.get_target_inputs().size() != 1 || split_output.get_index() != i) return false; } - if (split->get_num_splits() != concat_inputs.size()) return false; + if (split->get_num_splits() != concat_inputs.size()) + return false; - auto split_axis = std::dynamic_pointer_cast(split->input_value(1).get_node_shared_ptr()); - if (!split_axis) return false; + auto split_axis = + std::dynamic_pointer_cast(split->input_value(1).get_node_shared_ptr()); + if (!split_axis) + return false; auto axis_vec = split_axis->cast_vector(); if (axis_vec.size() != 1 || axis_value != axis_vec[0]) @@ -79,13 +89,15 @@ ngraph::pass::SplitSqueezeConcatFusion::SplitSqueezeConcatFusion() { order.erase(order.begin() + axis_value); order.insert(order.begin() + concat_axis, axis_value); - auto transpose_order = ngraph::opset7::Constant::create(element::i64, { (size_t)rank.get_length() }, order); + auto transpose_order = ngraph::opset7::Constant::create(element::i64, {(size_t)rank.get_length()}, order); auto transpose = register_new_node(input, transpose_order); - auto shape_after = ngraph::opset7::Constant::create(element::i64, { (size_t)rank.get_length() - 1 }, concat->get_output_shape(0)); + auto shape_after = ngraph::opset7::Constant::create(element::i64, + {(size_t)rank.get_length() - 1}, + concat->get_output_shape(0)); auto reshape = std::make_shared(transpose, shape_after, false); reshape->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info(nodes_to_delete, { transpose, reshape }); + ngraph::copy_runtime_info(nodes_to_delete, {transpose, reshape}); ngraph::replace_node(m.get_match_root(), reshape); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp b/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp index 0675d84a82c..f335e7d48fc 100644 --- a/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/strides_optimization.cpp @@ -2,18 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#include +#include +#include +#include #include - -#include "itt.hpp" #include #include #include -#include -#include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::StridesOptimization, "StridesOptimization", 0); @@ -21,7 +20,9 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvStridesPropagation, "ConvStridesPropaga static bool can_propagate_conv_stride(const std::shared_ptr& conv) { const auto& kernel_shape = conv->input_value(1).get_shape(); - return std::all_of(kernel_shape.begin() + 2, kernel_shape.end(), [] (size_t s) -> bool { return s == 1; }); + return std::all_of(kernel_shape.begin() + 2, kernel_shape.end(), [](size_t s) -> bool { + return s == 1; + }); } static std::tuple check_next_ops(const std::vector>& next_ops) { @@ -32,37 +33,47 @@ static std::tuple check_next_ops(const std::vector bool { - bool all_ones = std::all_of(s.begin(), s.end(), [] (size_t i) -> bool { return i == 1; }); - return s == strides[0] && !all_ones; - }); + bool all_ops_are_valid = std::all_of(strides.begin(), strides.end(), [&strides](const ngraph::Strides& s) -> bool { + bool all_ones = std::all_of(s.begin(), s.end(), [](size_t i) -> bool { + return i == 1; + }); + return s == strides[0] && !all_ones; + }); return std::make_tuple(strides[0], all_ops_are_valid); } -static void insert_pooling(const ngraph::Output& first, ngraph::Input& second, const ngraph::Strides& strides) { +static void insert_pooling(const ngraph::Output& first, + ngraph::Input& second, + const ngraph::Strides& strides) { auto first_node = first.get_node_shared_ptr(); auto rank = first.get_partial_shape().rank(); bool do_reshape = rank.is_static() && static_cast(rank.get_length()) < strides.size() + 2; if (do_reshape) { size_t diff = strides.size() + 2 - static_cast(rank.get_length()); - auto ones = ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{diff}, std::vector(diff, 1)); + auto ones = + ngraph::opset7::Constant::create(ngraph::element::i64, ngraph::Shape{diff}, std::vector(diff, 1)); auto current_shape = std::make_shared(first); - std::shared_ptr new_shape = std::make_shared(ngraph::OutputVector{ones, current_shape}, 0); + std::shared_ptr new_shape = + std::make_shared(ngraph::OutputVector{ones, current_shape}, 0); std::shared_ptr constant_new_shape = get_constant_from_source(new_shape); if (constant_new_shape) new_shape = constant_new_shape; first_node = std::make_shared(first_node, new_shape, false); } - std::shared_ptr new_node = std::make_shared(first_node, strides, ngraph::Shape{}, - ngraph::Shape{}, ngraph::Shape(strides.size(), 1)); + std::shared_ptr new_node = + std::make_shared(first_node, + strides, + ngraph::Shape{}, + ngraph::Shape{}, + ngraph::Shape(strides.size(), 1)); if (do_reshape) { // squeeze dimensions back size_t diff = strides.size() + 2 - static_cast(rank.get_length()); std::vector axes(diff); std::iota(axes.begin(), axes.end(), 0); - new_node = std::make_shared(new_node, - ngraph::opset7::Constant::create(ngraph::element::u64, ngraph::Shape{diff}, axes)); + new_node = std::make_shared( + new_node, + ngraph::opset7::Constant::create(ngraph::element::u64, ngraph::Shape{diff}, axes)); } std::shared_ptr constant_new_node = get_constant_from_source(new_node); if (constant_new_node) @@ -75,8 +86,9 @@ static void handle_not_equal_stride_props(std::vector bool { return s == 1; }); + bool are_strides_ones = std::all_of(strides.begin(), strides.end(), [](size_t s) -> bool { + return s == 1; + }); if (!are_strides_ones) { auto conv = dynamic_cast(op.get_node()); if (conv) { @@ -90,15 +102,15 @@ static void handle_not_equal_stride_props(std::vector& node) -> bool { - const auto& shape = node.get_partial_shape(); - const auto& rank = shape.rank(); - if (rank.is_dynamic()) - return false; - return std::all_of(shape.begin() + 2, shape.end(), [] (const Dimension& dim) -> bool { - return dim.is_static(); - }); - }); + auto data = pattern::any_input([](const Output& node) -> bool { + const auto& shape = node.get_partial_shape(); + const auto& rank = shape.rank(); + if (rank.is_dynamic()) + return false; + return std::all_of(shape.begin() + 2, shape.end(), [](const Dimension& dim) -> bool { + return dim.is_static(); + }); + }); auto weights = pattern::any_input(pattern::has_static_shape()); auto conv_pattern = pattern::wrap_type({data, weights}); @@ -117,8 +129,13 @@ ngraph::pass::ConvStridesPropagation::ConvStridesPropagation() { if (!all_ops_are_valid) { handle_not_equal_stride_props(std::move(next_ops)); } else { - std::transform(conv_strides.begin(), conv_strides.end(), strides.begin(), conv_strides.begin(), - [] (size_t s1, size_t s2) -> size_t { return s1 * s2; }); + std::transform(conv_strides.begin(), + conv_strides.end(), + strides.begin(), + conv_strides.begin(), + [](size_t s1, size_t s2) -> size_t { + return s1 * s2; + }); } if (can_propagate_conv_stride(conv)) { diff --git a/src/common/transformations/src/transformations/common_optimizations/subtract_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/subtract_fusion.cpp index b6a0d983a4e..2f7ad6625d4 100644 --- a/src/common/transformations/src/transformations/common_optimizations/subtract_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/subtract_fusion.cpp @@ -29,17 +29,18 @@ ngraph::pass::SubtractFusion::SubtractFusion() { auto p_add_input = pattern::any_input(); auto p_add = ngraph::pattern::wrap_type({p_add_input, p_mul_or_neg}); - matcher_pass_callback callback = [=](pattern::Matcher &m) { - const auto & pattern_to_output = m.get_pattern_value_map(); - const auto & minuend_input = pattern_to_output.at(p_add_input); - const auto & subtrahend_input = pattern_to_output.at(p_input); + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); + const auto& minuend_input = pattern_to_output.at(p_add_input); + const auto& subtrahend_input = pattern_to_output.at(p_input); - const auto & add = pattern_to_output.at(p_add).get_node_shared_ptr(); + const auto& add = pattern_to_output.at(p_add).get_node_shared_ptr(); NodeVector nodes_to_replace{add}; if (pattern_to_output.count(p_mul_const)) { - auto minus_one_const = std::dynamic_pointer_cast(pattern_to_output.at(p_mul_const).get_node_shared_ptr()); + auto minus_one_const = + std::dynamic_pointer_cast(pattern_to_output.at(p_mul_const).get_node_shared_ptr()); if (!op::util::has_constant_value(minus_one_const, -1.)) { return false; } diff --git a/src/common/transformations/src/transformations/common_optimizations/swish_fusion.cpp b/src/common/transformations/src/transformations/common_optimizations/swish_fusion.cpp index adbeeaf026a..918416c801b 100644 --- a/src/common/transformations/src/transformations/common_optimizations/swish_fusion.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/swish_fusion.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/swish_fusion.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" #include "transformations/utils/utils.hpp" namespace { @@ -30,7 +30,7 @@ bool check_beta_value(const std::shared_ptr& constant) return true; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::SwishFusion, "SwishFusion", 0); @@ -43,16 +43,16 @@ ngraph::pass::SwishFusionWithSigmoid::SwishFusionWithSigmoid() { auto sigmoid = std::make_shared(input); auto mul = std::make_shared(input, sigmoid); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); auto swish = std::make_shared(exp_input); swish->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({pattern_to_output.at(sigmoid).get_node_shared_ptr(), - pattern_to_output.at(mul).get_node_shared_ptr()}, - swish); + ngraph::copy_runtime_info( + {pattern_to_output.at(sigmoid).get_node_shared_ptr(), pattern_to_output.at(mul).get_node_shared_ptr()}, + swish); ngraph::replace_node(m.get_match_root(), swish); return true; }; @@ -72,8 +72,8 @@ ngraph::pass::SwishFusionWithSigmoidWithBeta::SwishFusionWithSigmoidWithBeta() { auto sigmoid = std::make_shared(mul_beta); auto mul = std::make_shared(input, sigmoid); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); auto beta_input = pattern_to_output.at(beta); @@ -81,7 +81,9 @@ ngraph::pass::SwishFusionWithSigmoidWithBeta::SwishFusionWithSigmoidWithBeta() { Output new_beta; if (beta_constant) { if (check_beta_value(beta_constant)) { - new_beta = opset4::Constant::create(beta_input.get_element_type(), Shape{}, {beta_constant->cast_vector()[0]}); + new_beta = opset4::Constant::create(beta_input.get_element_type(), + Shape{}, + {beta_constant->cast_vector()[0]}); } else { return false; } @@ -96,9 +98,9 @@ ngraph::pass::SwishFusionWithSigmoidWithBeta::SwishFusionWithSigmoidWithBeta() { auto swish = std::make_shared(exp_input, new_beta); swish->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({pattern_to_output.at(sigmoid).get_node_shared_ptr(), - pattern_to_output.at(mul).get_node_shared_ptr()}, - swish); + ngraph::copy_runtime_info( + {pattern_to_output.at(sigmoid).get_node_shared_ptr(), pattern_to_output.at(mul).get_node_shared_ptr()}, + swish); ngraph::replace_node(m.get_match_root(), swish); return true; }; @@ -121,11 +123,12 @@ ngraph::pass::SwishFusionWithBeta::SwishFusionWithBeta() { auto add = std::make_shared(exp, add_constant); auto div = std::make_shared(input, add); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); - auto constant = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto constant = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); if (!op::util::has_constant_value(constant, 1.0f)) { return false; } @@ -162,10 +165,11 @@ ngraph::pass::SwishFusionWithoutBeta::SwishFusionWithoutBeta() { auto div = std::make_shared(input, add); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - auto & pattern_to_output = m.get_pattern_value_map(); + auto& pattern_to_output = m.get_pattern_value_map(); auto exp_input = pattern_to_output.at(input); - auto constant = std::dynamic_pointer_cast(pattern_to_output.at(add_constant).get_node_shared_ptr()); + auto constant = std::dynamic_pointer_cast( + pattern_to_output.at(add_constant).get_node_shared_ptr()); if (!op::util::has_constant_value(constant, 1.0f)) { return false; } @@ -178,7 +182,7 @@ ngraph::pass::SwishFusionWithoutBeta::SwishFusionWithoutBeta() { pattern_to_output.at(add_constant).get_node_shared_ptr(), pattern_to_output.at(add).get_node_shared_ptr(), pattern_to_output.at(div).get_node_shared_ptr()}, - swish); + swish); ngraph::replace_node(m.get_match_root(), swish); return true; }; diff --git a/src/common/transformations/src/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.cpp b/src/common/transformations/src/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.cpp index 1ac962c7abf..47865bb1dd2 100644 --- a/src/common/transformations/src/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/transpose_reshape_elimination_for_matmul.cpp @@ -7,11 +7,11 @@ #include #include -#include "ngraph/opsets/opset1.hpp" -#include "ngraph/rt_info.hpp" -#include "ngraph/pattern/op/wrap_type.hpp" -#include "ngraph/validation_util.hpp" #include "itt.hpp" +#include "ngraph/opsets/opset1.hpp" +#include "ngraph/pattern/op/wrap_type.hpp" +#include "ngraph/rt_info.hpp" +#include "ngraph/validation_util.hpp" namespace { /// \brief Check for correct Transpose orders which are before and after MatMul. Second Transpose must be back for @@ -23,7 +23,9 @@ namespace { /// /// \return True - Transposes have right orders, otherwise, Transposes have incorrect order for transformation /// -bool check_transposes(const std::vector& before_order, const std::vector& after_order, const bool transposed_b) { +bool check_transposes(const std::vector& before_order, + const std::vector& after_order, + const bool transposed_b) { const size_t rank = before_order.size(); if (rank < 3) return false; @@ -77,10 +79,12 @@ bool check_transposes(const std::vector& before_order, const std::vecto /// \param new_shape New shape for Reshape /// \param transposed_b true - second MatMul input is transposed, otherwise, it's not transposed /// -/// \return True - Reshape has right new shape for reshaping, otherwise, Reshape has incorrect new shape for transformation +/// \return True - Reshape has right new shape for reshaping, otherwise, Reshape has incorrect new shape for +/// transformation /// bool check_input_reshape(const std::shared_ptr& reshape, - const std::vector& new_shape, const bool transposed_b) { + const std::vector& new_shape, + const bool transposed_b) { const auto input_shape = reshape->get_input_shape(0); const size_t input_rank = input_shape.size(); const size_t output_rank = reshape->get_output_shape(0).size(); @@ -89,12 +93,12 @@ bool check_input_reshape(const std::shared_ptr& reshape if (transposed_b) { const int64_t k = input_shape.back(); - const int64_t new_n = ov::shape_size(input_shape) / k; + const int64_t new_n = ov::shape_size(input_shape) / k; if (new_shape != std::vector{new_n, k}) return false; } else { const int64_t k = input_shape.front(); - const int64_t new_n = ov::shape_size(input_shape) / k; + const int64_t new_n = ov::shape_size(input_shape) / k; if (new_shape != std::vector{k, -1} && new_shape != std::vector{k, new_n}) return false; } @@ -107,61 +111,75 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeReshapeEliminationForMatmul, "Tran ngraph::pass::TransposeReshapeEliminationForMatmul::TransposeReshapeEliminationForMatmul() { MATCHER_SCOPE(TransposeReshapeEliminationForMatmul); - auto input_1_pattern = ngraph::pattern::any_input([] (const Output& node) -> bool { - const auto& shape = node.get_partial_shape(); - const auto& rank = shape.rank(); - return rank.is_static() && rank.get_length() == 2 && shape.is_static(); - }); - auto input_2_pattern = ngraph::pattern::any_input([] (const Output& node) -> bool { - return node.get_partial_shape().is_static(); - }); + auto input_1_pattern = ngraph::pattern::any_input([](const Output& node) -> bool { + const auto& shape = node.get_partial_shape(); + const auto& rank = shape.rank(); + return rank.is_static() && rank.get_length() == 2 && shape.is_static(); + }); + auto input_2_pattern = ngraph::pattern::any_input([](const Output& node) -> bool { + return node.get_partial_shape().is_static(); + }); auto const_transpose_before_pattern = ngraph::pattern::wrap_type(); - auto transpose_before_pattern = ngraph::pattern::wrap_type({input_2_pattern, const_transpose_before_pattern}); + auto transpose_before_pattern = + ngraph::pattern::wrap_type({input_2_pattern, const_transpose_before_pattern}); auto const_reshape_before_pattern = ngraph::pattern::wrap_type(); - auto reshape_before_pattern = ngraph::pattern::wrap_type({transpose_before_pattern, const_reshape_before_pattern}); + auto reshape_before_pattern = + ngraph::pattern::wrap_type({transpose_before_pattern, const_reshape_before_pattern}); auto matmul_pattern = ngraph::pattern::wrap_type({input_1_pattern, reshape_before_pattern}); auto const_reshape_after_pattern = ngraph::pattern::wrap_type(); - auto reshape_after_pattern = ngraph::pattern::wrap_type({matmul_pattern, const_reshape_after_pattern}); + auto reshape_after_pattern = + ngraph::pattern::wrap_type({matmul_pattern, const_reshape_after_pattern}); auto const_transpose_after_pattern = ngraph::pattern::wrap_type(); - auto transpose_after_pattern = ngraph::pattern::wrap_type({reshape_after_pattern, const_transpose_after_pattern}); + auto transpose_after_pattern = + ngraph::pattern::wrap_type({reshape_after_pattern, const_transpose_after_pattern}); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { const auto& pattern_value_map = m.get_pattern_value_map(); const auto& input_1 = pattern_value_map.at(input_1_pattern); const auto& input_2 = pattern_value_map.at(input_2_pattern); - auto matmul = std::dynamic_pointer_cast(pattern_value_map.at(matmul_pattern).get_node_shared_ptr()); + auto matmul = + std::dynamic_pointer_cast(pattern_value_map.at(matmul_pattern).get_node_shared_ptr()); if (!matmul) return false; const bool transposed_a = matmul->get_transpose_a(); const bool transposed_b = matmul->get_transpose_b(); - auto reshape_before = std::dynamic_pointer_cast(pattern_value_map.at(reshape_before_pattern).get_node_shared_ptr()); - auto reshape_after = std::dynamic_pointer_cast(pattern_value_map.at(reshape_after_pattern).get_node_shared_ptr()); + auto reshape_before = std::dynamic_pointer_cast( + pattern_value_map.at(reshape_before_pattern).get_node_shared_ptr()); + auto reshape_after = std::dynamic_pointer_cast( + pattern_value_map.at(reshape_after_pattern).get_node_shared_ptr()); auto reshape_before_constant = std::dynamic_pointer_cast( - pattern_value_map.at(const_reshape_before_pattern).get_node_shared_ptr()); + pattern_value_map.at(const_reshape_before_pattern).get_node_shared_ptr()); if (!reshape_before || !reshape_after || !reshape_before_constant) return false; if (!check_input_reshape(reshape_before, reshape_before_constant->cast_vector(), transposed_b)) return false; // check transpose order before and after matmul - auto transpose_before = std::dynamic_pointer_cast(pattern_value_map.at(transpose_before_pattern).get_node_shared_ptr()); - auto transpose_after = std::dynamic_pointer_cast(pattern_value_map.at(transpose_after_pattern).get_node_shared_ptr()); - if (!transpose_before || !transpose_after) return false; + auto transpose_before = std::dynamic_pointer_cast( + pattern_value_map.at(transpose_before_pattern).get_node_shared_ptr()); + auto transpose_after = std::dynamic_pointer_cast( + pattern_value_map.at(transpose_after_pattern).get_node_shared_ptr()); + if (!transpose_before || !transpose_after) + return false; - auto transpose_before_constant = std::dynamic_pointer_cast(transpose_before->get_input_node_shared_ptr(1)); - auto transpose_after_constant = std::dynamic_pointer_cast(transpose_after->get_input_node_shared_ptr(1)); - if (!transpose_before_constant || !transpose_after_constant) return false; + auto transpose_before_constant = + std::dynamic_pointer_cast(transpose_before->get_input_node_shared_ptr(1)); + auto transpose_after_constant = + std::dynamic_pointer_cast(transpose_after->get_input_node_shared_ptr(1)); + if (!transpose_before_constant || !transpose_after_constant) + return false; auto transpose_before_order = transpose_before_constant->cast_vector(); auto transpose_after_order = transpose_after_constant->cast_vector(); - // need to check that input shape is correctly contracted and output shape is correctly unpacked using transposes + // need to check that input shape is correctly contracted and output shape is correctly unpacked using + // transposes if (!check_transposes(transpose_before_order, transpose_after_order, transposed_b)) return false; diff --git a/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp b/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp index 4d0e1bab44b..a9127811eb9 100644 --- a/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/transpose_sinking.cpp @@ -2,18 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/transpose_sinking.hpp" -#include "transformations/utils/utils.hpp" #include -#include - #include #include -#include #include +#include #include +#include + +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeSinking, "TransposeSinking", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeConvert, "TransposeConvert", 0); @@ -26,8 +26,9 @@ using namespace ngraph; namespace { -std::shared_ptr get_reduced_order_constant(const std::shared_ptr& axes_const, - const std::shared_ptr& order_const) { +std::shared_ptr get_reduced_order_constant( + const std::shared_ptr& axes_const, + const std::shared_ptr& order_const) { auto order = order_const->cast_vector(); auto axes = axes_const->cast_vector(); @@ -44,11 +45,11 @@ std::shared_ptr get_reduced_order_constant(const std:: std::replace(order.begin(), order.end(), *lowest_greater_eq_i, i); std::replace(order_sorted.begin(), order_sorted.end(), *lowest_greater_eq_i, i); } - return std::make_shared( - ngraph::element::i64, ngraph::Shape{order.size()}, order); + return std::make_shared(ngraph::element::i64, ngraph::Shape{order.size()}, order); } -std::shared_ptr get_reversed_order_constant(const std::shared_ptr& order_const) { +std::shared_ptr get_reversed_order_constant( + const std::shared_ptr& order_const) { const auto& order = order_const->cast_vector(); const auto& rank = order.size(); const auto& default_order = ngraph::get_default_order(rank); @@ -56,34 +57,35 @@ std::shared_ptr get_reversed_order_constant(const std: for (size_t i = 0; i < rank; ++i) reverse_order[order[i]] = default_order[i]; - return std::make_shared( - ngraph::element::i64, ngraph::Shape{reverse_order.size()}, reverse_order); + return std::make_shared(ngraph::element::i64, + ngraph::Shape{reverse_order.size()}, + reverse_order); } -} // namespace +} // namespace ngraph::pass::TransposeEltwise::TransposeEltwise() { MATCHER_SCOPE(TransposeEltwise); auto eltwise_data_input_p = pattern::any_input(); auto eltwise_const_input_p = pattern::wrap_type(); - auto eltwise_p = pattern::wrap_type({eltwise_data_input_p, eltwise_const_input_p}, - [](const Output & output) { + auto eltwise_p = pattern::wrap_type( + {eltwise_data_input_p, eltwise_const_input_p}, + [](const Output& output) { return ov::is_preprocesing_node(output.get_node_shared_ptr()); - }); - auto transpose_p = pattern::wrap_type({eltwise_p, - pattern::wrap_type()}, - pattern::consumers_count(1)); + }); + auto transpose_p = pattern::wrap_type({eltwise_p, pattern::wrap_type()}, + pattern::consumers_count(1)); - auto callback = [=](ngraph::pattern::Matcher &m) { - const auto &pattern_to_output = m.get_pattern_value_map(); + auto callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); auto eltwise = pattern_to_output.at(eltwise_p).get_node_shared_ptr(); auto eltwise_const_input = pattern_to_output.at(eltwise_const_input_p); auto eltwise_data_input = pattern_to_output.at(eltwise_data_input_p); auto transpose = pattern_to_output.at(transpose_p).get_node_shared_ptr(); - const auto & order_size = transpose->get_input_shape(1).at(0); - const auto & shape = eltwise_const_input.get_shape(); + const auto& order_size = transpose->get_input_shape(1).at(0); + const auto& shape = eltwise_const_input.get_shape(); if (shape.size() != order_size && ov::shape_size(shape) != 1) { // TODO: temporary restrictions return false; @@ -113,13 +115,13 @@ ngraph::pass::TransposeEltwise::TransposeEltwise() { ngraph::pass::TransposeConvert::TransposeConvert() { MATCHER_SCOPE(TransposeConvert); - auto transpose_label = pattern::wrap_type({pattern::any_input(), - pattern::wrap_type()}, - pattern::consumers_count(1)); + auto transpose_label = + pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}, + pattern::consumers_count(1)); auto convert_label = pattern::wrap_type({transpose_label}); - matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher &m) { - const auto &pattern_to_output = m.get_pattern_value_map(); + matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr(); auto convert = pattern_to_output.at(convert_label).get_node_shared_ptr(); @@ -140,12 +142,15 @@ ngraph::pass::TransposeConvert::TransposeConvert() { ngraph::pass::TransposeReduction::TransposeReduction() { MATCHER_SCOPE(TransposeReduction); - auto transpose_label = pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}, pattern::consumers_count(1)); - auto reduce_or_squeeze_label = pattern::wrap_type( + auto transpose_label = + pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}, + pattern::consumers_count(1)); + auto reduce_or_squeeze_label = + pattern::wrap_type( {transpose_label, pattern::wrap_type()}); - ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher &m) { - const auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_value_map(); auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr(); auto reduction = pattern_to_output.at(reduce_or_squeeze_label).get_node_shared_ptr(); @@ -155,24 +160,30 @@ ngraph::pass::TransposeReduction::TransposeReduction() { if (!transpose || !(arithmetic_reduce || logical_reduce || squeeze)) return false; - bool keep_dims = false; // squeeze always reduces number of output dimensions + bool keep_dims = false; // squeeze always reduces number of output dimensions if (logical_reduce) keep_dims = logical_reduce->get_keep_dims(); else if (arithmetic_reduce) keep_dims = arithmetic_reduce->get_keep_dims(); - auto transpose_order = std::dynamic_pointer_cast(transpose->get_input_node_shared_ptr(1)); - auto reduction_axes = std::dynamic_pointer_cast(reduction->get_input_node_shared_ptr(1)); + auto transpose_order = + std::dynamic_pointer_cast(transpose->get_input_node_shared_ptr(1)); + auto reduction_axes = + std::dynamic_pointer_cast(reduction->get_input_node_shared_ptr(1)); if (!transpose_order || !reduction_axes) return false; - const auto& non_negative_axes = ngraph::normalize_axes( - reduction->get_friendly_name(), reduction_axes->cast_vector(), reduction->get_input_partial_shape(0).rank()); - reduction_axes = ngraph::opset6::Constant::create(ngraph::element::i64, {non_negative_axes.size()}, non_negative_axes); + const auto& non_negative_axes = ngraph::normalize_axes(reduction->get_friendly_name(), + reduction_axes->cast_vector(), + reduction->get_input_partial_shape(0).rank()); + reduction_axes = + ngraph::opset6::Constant::create(ngraph::element::i64, {non_negative_axes.size()}, non_negative_axes); ngraph::NodeVector new_ops; auto new_axes = ngraph::op::util::make_try_fold( - transpose_order, reduction_axes, ngraph::opset6::Constant::create(ngraph::element::i64, {}, {0})); + transpose_order, + reduction_axes, + ngraph::opset6::Constant::create(ngraph::element::i64, {}, {0})); new_ops.push_back(new_axes); auto new_reduce = reduction->clone_with_new_inputs({transpose->input_value(0), new_axes}); new_ops.push_back(new_reduce); @@ -199,22 +210,28 @@ ngraph::pass::TransposeReduction::TransposeReduction() { ngraph::pass::TransposeFQReduction::TransposeFQReduction() { MATCHER_SCOPE(TransposeFQReduction); - auto transpose_label = pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}); - auto fq_label = pattern::wrap_type( - {transpose_label, pattern::any_input(pattern::has_static_rank()), pattern::any_input(pattern::has_static_rank()), - pattern::any_input(pattern::has_static_rank()), pattern::any_input(pattern::has_static_rank())}); - auto reduce_or_squeeze_label = pattern::wrap_type( + auto transpose_label = + pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}); + auto fq_label = pattern::wrap_type({transpose_label, + pattern::any_input(pattern::has_static_rank()), + pattern::any_input(pattern::has_static_rank()), + pattern::any_input(pattern::has_static_rank()), + pattern::any_input(pattern::has_static_rank())}); + auto reduce_or_squeeze_label = + pattern::wrap_type( {fq_label, pattern::wrap_type()}); - ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto transpose = pattern_to_output.at(transpose_label).get_node_shared_ptr(); - if (!transpose) return false; + if (!transpose) + return false; auto transpose_order = std::dynamic_pointer_cast(transpose->get_input_node_shared_ptr(1)); auto fq = pattern_to_output.at(fq_label).get_node_shared_ptr(); - if (!transpose_order || !fq) return false; + if (!transpose_order || !fq) + return false; ngraph::NodeVector new_ops; @@ -261,8 +278,10 @@ ngraph::pass::TransposeFQReduction::TransposeFQReduction() { ngraph::pass::TransposeFuse::TransposeFuse() { MATCHER_SCOPE(TransposeFuse); - auto transpose_1 = pattern::wrap_type({ pattern::any_input(), pattern::wrap_type() }, pattern::consumers_count(1)); - auto transpose_2 = pattern::wrap_type({ transpose_1, pattern::wrap_type() }); + auto transpose_1 = + pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}, + pattern::consumers_count(1)); + auto transpose_2 = pattern::wrap_type({transpose_1, pattern::wrap_type()}); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { const auto& pattern_to_output = m.get_pattern_value_map(); @@ -271,8 +290,10 @@ ngraph::pass::TransposeFuse::TransposeFuse() { auto transpose2 = pattern_to_output.at(transpose_2).get_node_shared_ptr(); auto input = transpose1->input_value(0); - auto transpose1_order = std::dynamic_pointer_cast(transpose1->get_input_node_shared_ptr(1)); - auto transpose2_order = std::dynamic_pointer_cast(transpose2->get_input_node_shared_ptr(1)); + auto transpose1_order = + std::dynamic_pointer_cast(transpose1->get_input_node_shared_ptr(1)); + auto transpose2_order = + std::dynamic_pointer_cast(transpose2->get_input_node_shared_ptr(1)); if (!transpose1_order || !transpose2_order) return false; @@ -295,7 +316,7 @@ ngraph::pass::TransposeFuse::TransposeFuse() { auto new_transpose = register_new_node(input, new_order); new_transpose->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info({ transpose1, transpose2 }, new_transpose); + ngraph::copy_runtime_info({transpose1, transpose2}, new_transpose); ngraph::replace_node(m.get_match_root(), new_transpose); } diff --git a/src/common/transformations/src/transformations/common_optimizations/transpose_to_reshape.cpp b/src/common/transformations/src/transformations/common_optimizations/transpose_to_reshape.cpp index 94e2a5129ca..06a54f6e657 100644 --- a/src/common/transformations/src/transformations/common_optimizations/transpose_to_reshape.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/transpose_to_reshape.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/transpose_to_reshape.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::TransposeToReshape, "TransposeToReshape", 0); @@ -22,7 +22,7 @@ ngraph::pass::TransposeToReshape::TransposeToReshape() { MATCHER_SCOPE(TransposeToReshape); auto transpose_label = pattern::wrap_type( - { pattern::any_input(pattern::has_static_rank()), pattern::wrap_type() }); + {pattern::any_input(pattern::has_static_rank()), pattern::wrap_type()}); ngraph::matcher_pass_callback matcher_pass_callback = [=](ngraph::pattern::Matcher& m) { auto transpose = m.get_match_root(); auto data = transpose->input_value(0); @@ -57,14 +57,14 @@ ngraph::pass::TransposeToReshape::TransposeToReshape() { std::vector dims; for (size_t i = 0; i < input_shape_rank; ++i) { if (order_value[i] != static_cast(i)) { - dims.push_back({ input_shape[order_value[i]], i }); + dims.push_back({input_shape[order_value[i]], i}); } } // If number of dimensions != 1 to move equal to 0 we can remove this Transpose if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) { - return !(item.dim.is_static() && item.dim.get_length() == 1); - }) == 0) { + return !(item.dim.is_static() && item.dim.get_length() == 1); + }) == 0) { return replace_output_update_name(transpose->output(0), transpose->input_value(0)); } @@ -82,19 +82,19 @@ ngraph::pass::TransposeToReshape::TransposeToReshape() { NodeVector new_ops; if (count_if(dims.begin(), dims.end(), [](const DimensionToPosition& item) { - return item.dim.is_dynamic(); - }) < 2) { + return item.dim.is_dynamic(); + }) < 2) { std::vector reshape_value(input_shape_rank, 0); for (const auto& item : dims) { reshape_value[item.pos] = item.dim.is_dynamic() ? -1 : item.dim.get_length(); } - reshape_dim = - opset3::Constant::create(element::i64, Shape{ reshape_value.size() }, reshape_value); + reshape_dim = opset3::Constant::create(element::i64, Shape{reshape_value.size()}, reshape_value); } else { auto shape_of = std::make_shared(data); new_ops.push_back(shape_of); - reshape_dim = std::make_shared( - shape_of, order, opset3::Constant::create(element::i64, Shape{ 1 }, { 0 })); + reshape_dim = std::make_shared(shape_of, + order, + opset3::Constant::create(element::i64, Shape{1}, {0})); new_ops.push_back(reshape_dim.get_node_shared_ptr()); } diff --git a/src/common/transformations/src/transformations/common_optimizations/weights_dequantize_to_fake_quantize.cpp b/src/common/transformations/src/transformations/common_optimizations/weights_dequantize_to_fake_quantize.cpp index e85e3d7c1e1..b30fd3f89f5 100644 --- a/src/common/transformations/src/transformations/common_optimizations/weights_dequantize_to_fake_quantize.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/weights_dequantize_to_fake_quantize.cpp @@ -3,12 +3,13 @@ // #include -#include #include +#include #include -#include #include #include +#include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::WeightsDequantizeToFakeQuantize, "WeightsDequantizeToFakeQuantize", 0); @@ -27,34 +28,42 @@ ngraph::pass::WeightsDequantizeToFakeQuantize::WeightsDequantizeToFakeQuantize() const auto mul = ngraph::pattern::wrap_type({sub_or_convert, mul_c}); ngraph::matcher_pass_callback callback; - callback = [=](ngraph::pattern::Matcher &m) { - const auto &pattern_map = m.get_pattern_map(); + callback = [=](ngraph::pattern::Matcher& m) { + const auto& pattern_map = m.get_pattern_map(); - const auto &weights_node = ov::as_type_ptr(pattern_map.at(weights)); - const auto &convert_node = pattern_map.at(convert); - const auto &multiply_node = pattern_map.at(mul); - const auto &scale_node = pattern_map.at(mul_c); + const auto& weights_node = ov::as_type_ptr(pattern_map.at(weights)); + const auto& convert_node = pattern_map.at(convert); + const auto& multiply_node = pattern_map.at(mul); + const auto& scale_node = pattern_map.at(mul_c); if (!weights_node || !convert_node || !multiply_node || !scale_node) { return false; } - const auto *data = weights_node->get_data_ptr(); + const auto* data = weights_node->get_data_ptr(); const int8_t weights_minimum = *std::min_element(data, data + shape_size(weights_node->get_shape())); int64_t levels = (weights_minimum == static_cast(-128)) ? 256 : 255; int64_t in_low = -(levels / 2), in_high = levels + in_low - 1; - const auto &input_low = opset6::Constant::create(convert_node->get_element_type(), {}, {in_low}); - const auto &input_high = opset6::Constant::create(convert_node->get_element_type(), {}, {in_high}); + const auto& input_low = opset6::Constant::create(convert_node->get_element_type(), {}, {in_low}); + const auto& input_high = opset6::Constant::create(convert_node->get_element_type(), {}, {in_high}); - auto &zero_point = pattern_map.count(sub_c) ? pattern_map.at(sub_c) : opset6::Constant::create(convert_node->get_element_type(), {}, {0}); + auto& zero_point = pattern_map.count(sub_c) + ? pattern_map.at(sub_c) + : opset6::Constant::create(convert_node->get_element_type(), {}, {0}); - const auto &output_low = op::util::eltwise_fold( - op::util::eltwise_fold(input_low, zero_point), scale_node); - const auto &output_high = op::util::eltwise_fold( - op::util::eltwise_fold(input_high, zero_point), scale_node); + const auto& output_low = + op::util::eltwise_fold(op::util::eltwise_fold(input_low, zero_point), + scale_node); + const auto& output_high = + op::util::eltwise_fold(op::util::eltwise_fold(input_high, zero_point), + scale_node); - auto fq = std::make_shared( - convert_node, input_low, input_high, output_low, output_high, levels); + auto fq = std::make_shared(convert_node, + input_low, + input_high, + output_low, + output_high, + levels); NodeVector nodes_to_copy_RT_info_from{multiply_node, scale_node, zero_point}; if (pattern_map.count(sub)) diff --git a/src/common/transformations/src/transformations/common_optimizations/wrap_interpolate_into_transposes.cpp b/src/common/transformations/src/transformations/common_optimizations/wrap_interpolate_into_transposes.cpp index e57078c7aab..0f07340841d 100644 --- a/src/common/transformations/src/transformations/common_optimizations/wrap_interpolate_into_transposes.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/wrap_interpolate_into_transposes.cpp @@ -2,24 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp" #include #include +#include #include -#include +#include +#include #include +#include #include #include -#include -#include -#include +#include "itt.hpp" namespace { std::vector reverse_permutation(const std::vector& perm) { - if (perm.empty()) return {}; + if (perm.empty()) + return {}; std::vector result(perm.size()); for (int64_t i = 0; i < static_cast(perm.size()); ++i) { @@ -48,7 +49,7 @@ std::vector build_new_axes(size_t num_of_axes, size_t rank) { std::iota(result.begin(), result.end(), static_cast(rank - num_of_axes)); return result; } -} // namespace +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::WrapInterpolateIntoTransposes, "WrapInterpolateIntoTransposes", 0); @@ -57,18 +58,25 @@ ngraph::pass::WrapInterpolateIntoTransposes::WrapInterpolateIntoTransposes() { auto interpolate_pattern = ov::pass::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto interpolate = std::dynamic_pointer_cast(m.get_match_root()); - if (!interpolate || interpolate->get_input_partial_shape(0).rank().is_dynamic() || interpolate->inputs().size() != 4) return false; + if (!interpolate || interpolate->get_input_partial_shape(0).rank().is_dynamic() || + interpolate->inputs().size() != 4) + return false; int64_t input_rank = interpolate->get_input_partial_shape(0).rank().get_length(); // If the input rank is equal to 1 or 2, then such Interpolate is supported by MKLDNN. - if (input_rank < 3) return false; + if (input_rank < 3) + return false; - auto axes_node = std::dynamic_pointer_cast(interpolate->input_value(3).get_node_shared_ptr()); - if (!axes_node) return false; + auto axes_node = + std::dynamic_pointer_cast(interpolate->input_value(3).get_node_shared_ptr()); + if (!axes_node) + return false; const auto axes = axes_node->cast_vector(); if (static_cast(axes.size()) > input_rank - 2 || - std::all_of(axes.begin(), axes.end(), [](int64_t axis){ return axis != 0 && axis != 1; })) { + std::all_of(axes.begin(), axes.end(), [](int64_t axis) { + return axis != 0 && axis != 1; + })) { return false; } @@ -76,15 +84,23 @@ ngraph::pass::WrapInterpolateIntoTransposes::WrapInterpolateIntoTransposes() { const auto last_perm = reverse_permutation(first_perm); auto first_transpose_perm = ov::opset8::Constant::create(element::i64, {first_perm.size()}, first_perm); - auto first_transpose = std::make_shared(interpolate->input_value(0), first_transpose_perm); + auto first_transpose = + std::make_shared(interpolate->input_value(0), first_transpose_perm); auto new_axes = build_new_axes(axes.size(), input_rank); auto new_axes_node = ov::opset8::Constant::create(element::i64, {new_axes.size()}, new_axes); - auto new_interpolate = interpolate->clone_with_new_inputs({first_transpose, interpolate->input_value(1), interpolate->input_value(2), new_axes_node}); + auto new_interpolate = interpolate->clone_with_new_inputs( + {first_transpose, interpolate->input_value(1), interpolate->input_value(2), new_axes_node}); auto last_transpose_perm = ov::opset8::Constant::create(element::i64, {last_perm.size()}, last_perm); auto last_transpose = std::make_shared(new_interpolate, last_transpose_perm); last_transpose->set_friendly_name(interpolate->get_friendly_name()); - copy_runtime_info(interpolate, {first_transpose_perm, first_transpose, new_axes_node, new_interpolate, last_transpose_perm, last_transpose}); + copy_runtime_info(interpolate, + {first_transpose_perm, + first_transpose, + new_axes_node, + new_interpolate, + last_transpose_perm, + last_transpose}); replace_node(interpolate, last_transpose); return true; diff --git a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp index 7ee8fe2ab82..5e75607e71f 100644 --- a/src/common/transformations/src/transformations/control_flow/unroll_if.cpp +++ b/src/common/transformations/src/transformations/control_flow/unroll_if.cpp @@ -47,11 +47,12 @@ bool ngraph::pass::UnrollIf::run_on_model(const std::shared_ptroutput(output_desc->m_output_index))); + in_value.get_tensor().set_name( + op::util::create_ie_output_name(if_node->output(output_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END - for (const auto& input : if_node->output(output_desc->m_output_index).get_target_inputs()) { - input.replace_source_output(result->get_input_source_output(0)); - } + for (const auto& input : if_node->output(output_desc->m_output_index).get_target_inputs()) { + input.replace_source_output(result->get_input_source_output(0)); + } } is_applicable = true; f->add_sinks(body->get_sinks()); diff --git a/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp b/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp index f25db972d67..8176f544a2b 100644 --- a/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp +++ b/src/common/transformations/src/transformations/control_flow/unroll_tensor_iterator.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/control_flow/unroll_tensor_iterator.hpp" -#include "transformations/utils/utils.hpp" #include -#include - #include #include #include #include +#include + +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::UnrollTensorIterator, "UnrollTensorIterator", 0); @@ -24,7 +24,7 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrget_function(); + const auto& function = sub_graph_op->get_function(); int64_t num_iter = sub_graph_op->get_num_iterations(); // negative value means inconsistent TI @@ -37,7 +37,7 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr> body_functions(num_iter); for (int64_t idx = 0; idx < num_iter; ++idx) { body_functions[idx] = clone_function(*function); - for (auto &node : body_functions[idx]->get_ops()) { + for (auto& node : body_functions[idx]->get_ops()) { node->set_friendly_name(sub_graph_op->get_friendly_name() + "/" + std::to_string(idx + 1) + "/" + node->get_friendly_name()); copy_runtime_info(sub_graph_op, node); @@ -45,9 +45,9 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrget_input_descriptions()) { - if (const auto &input_desc = std::dynamic_pointer_cast( - desc)) { + for (const auto& desc : sub_graph_op->get_input_descriptions()) { + if (const auto& input_desc = + std::dynamic_pointer_cast(desc)) { // Connect the sliced input (layer before the input) to the Split layer and connect // the corresponding Split output to the corresponding copy of the body. // If the number of iterations is 1, then the Split is not needed. @@ -63,23 +63,23 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr 0 ? j : num_iter - j - 1; const auto& param = body_functions[j]->get_parameters()[input_desc->m_body_parameter_index]; - for (auto &output : param->outputs()) { + for (auto& output : param->outputs()) { output.replace(split->output(idx)); } } } else { // connect to the body const auto& param = body_functions[0]->get_parameters()[input_desc->m_body_parameter_index]; - for (auto &output : param->outputs()) { + for (auto& output : param->outputs()) { output.replace(in_data); } } - } else if (const auto &merged_desc = std::dynamic_pointer_cast( - desc)) { + } else if (const auto& merged_desc = + std::dynamic_pointer_cast(desc)) { // Connect the input to the corresponding copy of the body. auto in_data = sub_graph_op->input_values()[merged_desc->m_input_index]; const auto& param = body_functions[0]->get_parameters()[merged_desc->m_body_parameter_index]; - for (auto &output : param->outputs()) { + for (auto& output : param->outputs()) { output.replace(in_data); } @@ -87,17 +87,17 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrget_parameters()[merged_desc->m_body_parameter_index]; const auto& prev_val = body_functions[j - 1]->get_results()[merged_desc->m_body_value_index]; - for (auto &output : cur_param->outputs()) { + for (auto& output : cur_param->outputs()) { output.replace(prev_val->get_input_source_output(0)); } } - } else if (const auto &invariant_desc = std::dynamic_pointer_cast( - desc)) { + } else if (const auto& invariant_desc = + std::dynamic_pointer_cast(desc)) { // Connect the input to the corresponding copy of the body. auto in_data = sub_graph_op->input_values()[invariant_desc->m_input_index]; for (int64_t j = 0; j < num_iter; j++) { auto param = body_functions[j]->get_parameters()[invariant_desc->m_body_parameter_index]; - for (auto &output : param->outputs()) { + for (auto& output : param->outputs()) { output.replace(in_data); } } @@ -108,9 +108,9 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptrget_output_descriptions()) { - if (const auto &concat_desc = std::dynamic_pointer_cast( - desc)) { + for (const auto& desc : sub_graph_op->get_output_descriptions()) { + if (const auto& concat_desc = + std::dynamic_pointer_cast(desc)) { if (!concat_desc) { return false; } @@ -126,7 +126,8 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr 0 ? j : num_iter - j - 1; - std::shared_ptr result = body_functions[idx]->get_results()[concat_desc->m_body_value_index]; + std::shared_ptr result = + body_functions[idx]->get_results()[concat_desc->m_body_value_index]; auto input_to_res = result->get_input_source_output(0); to_concat[j] = input_to_res; } @@ -136,40 +137,41 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptroutput(0).get_tensor().set_name( - op::util::create_ie_output_name(sub_graph_op->output(concat_desc->m_output_index))); + op::util::create_ie_output_name(sub_graph_op->output(concat_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END // connect the Concat layer to the corresponding TI outputs - for (auto &input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { + for (auto& input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(concat); } } else { // Connect outputs of the bodies to the corresponding TI outputs - std::shared_ptr result = body_functions[0]->get_results().at( - concat_desc->m_body_value_index); + std::shared_ptr result = + body_functions[0]->get_results().at(concat_desc->m_body_value_index); const auto& input_to_res = result->get_input_source_output(0); // set output name to Tensor to store it for ngraph to cnn conversion NGRAPH_SUPPRESS_DEPRECATED_START input_to_res.get_tensor().set_name( - op::util::create_ie_output_name(sub_graph_op->output(concat_desc->m_output_index))); + op::util::create_ie_output_name(sub_graph_op->output(concat_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END - for (auto &input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { + for (auto& input : sub_graph_op->output(concat_desc->m_output_index).get_target_inputs()) { input.replace_source_output(input_to_res); } } - } else if (const auto &output_desc = std::dynamic_pointer_cast( - desc)) { + } else if (const auto& output_desc = + std::dynamic_pointer_cast(desc)) { // Connect outputs of the bodies to the corresponding TI outputs auto iter = output_desc->m_iteration; - iter = iter >= 0? iter: num_iter - 1; - std::shared_ptr result = body_functions[iter]->get_results()[output_desc->m_body_value_index]; + iter = iter >= 0 ? iter : num_iter - 1; + std::shared_ptr result = + body_functions[iter]->get_results()[output_desc->m_body_value_index]; const auto& in_value = result->input_value(0); // set output name to Tensor to store it for ngraph to cnn conversion NGRAPH_SUPPRESS_DEPRECATED_START in_value.get_tensor().set_name( - op::util::create_ie_output_name(sub_graph_op->output(output_desc->m_output_index))); + op::util::create_ie_output_name(sub_graph_op->output(output_desc->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END - for (const auto &input : sub_graph_op->output(output_desc->m_output_index).get_target_inputs()) { + for (const auto& input : sub_graph_op->output(output_desc->m_output_index).get_target_inputs()) { input.replace_source_output(result->get_input_source_output(0)); } } else { @@ -184,24 +186,26 @@ bool ngraph::pass::UnrollTensorIterator::run_on_model(const std::shared_ptr(sub_graph_op); + const auto& loop = std::dynamic_pointer_cast(sub_graph_op); if (loop) { // 1. Check CurrentIteration Parameter is not connected to outer network bool need_to_remove_iteration_param = false; const auto cur_iter_idx = loop->get_special_body_ports().current_iteration_input_idx; if (cur_iter_idx >= 0) { - const auto &in_descs = loop->get_input_descriptions(); - need_to_remove_iteration_param = std::all_of(in_descs.begin(), in_descs.end(), - [cur_iter_idx](const std::shared_ptr &in_desc) { - return in_desc->m_body_parameter_index != static_cast(cur_iter_idx); - }); + const auto& in_descs = loop->get_input_descriptions(); + need_to_remove_iteration_param = + std::all_of(in_descs.begin(), + in_descs.end(), + [cur_iter_idx](const std::shared_ptr& in_desc) { + return in_desc->m_body_parameter_index != static_cast(cur_iter_idx); + }); } // 2. Replace CurrentIteration Parameter with a Constant for each copy of the body if (need_to_remove_iteration_param) { for (int64_t idx = 0; idx < num_iter; ++idx) { const auto iter_idx = loop->get_special_body_ports().current_iteration_input_idx; - const auto ¶m_to_delete = body_functions[idx]->get_parameters()[iter_idx]; + const auto& param_to_delete = body_functions[idx]->get_parameters()[iter_idx]; auto cur_iter_const = std::make_shared(ngraph::element::i64, Shape{}, idx); replace_node(param_to_delete, cur_iter_const); body_functions[idx]->remove_parameter(param_to_delete); diff --git a/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp b/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp index 3d5288f5ad0..3d5d60c5553 100644 --- a/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp +++ b/src/common/transformations/src/transformations/disable_decompression_convert_constant_folding.cpp @@ -4,11 +4,11 @@ #include "transformations/disable_decompression_convert_constant_folding.hpp" +#include "itt.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" -#include "transformations/rt_info/disable_constant_folding.hpp" #include "transformations/rt_info/decompression.hpp" -#include "itt.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" ov::pass::DisableDecompressionConvertConstantFolding::DisableDecompressionConvertConstantFolding() { MATCHER_SCOPE(DisableDecompressionConvertConstantFolding); diff --git a/src/common/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp b/src/common/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp index 7a346be7b51..7db3b639942 100644 --- a/src/common/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp +++ b/src/common/transformations/src/transformations/low_precision/disable_convert_constant_folding_on_const_path.cpp @@ -5,26 +5,27 @@ #include "transformations/low_precision/disable_convert_constant_folding_on_const_path.hpp" #include -#include -#include - #include #include -#include #include +#include #include +#include #include +#include using namespace ngraph; -NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableConvertConstantFoldingOnConstPath, "DisableConvertConstantFoldingOnConstPath", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableConvertConstantFoldingOnConstPath, + "DisableConvertConstantFoldingOnConstPath", + 0); ngraph::pass::DisableConvertConstantFoldingOnConstPath::DisableConvertConstantFoldingOnConstPath( - const element::TypeVector & inputPrecisions) { + const element::TypeVector& inputPrecisions) { auto matcherData = ngraph::pattern::any_input(); - auto matcherConvert = ngraph::pattern::wrap_type({ matcherData }, pattern::consumers_count(1)); + auto matcherConvert = ngraph::pattern::wrap_type({matcherData}, pattern::consumers_count(1)); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher & m) -> bool { + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) -> bool { const auto& opsMap = m.get_pattern_value_map(); const auto convert = opsMap.at(matcherConvert).get_node_shared_ptr(); diff --git a/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp index d9b6eab3c7a..22148b0b15b 100644 --- a/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/batch_norm_decomposition.cpp @@ -2,18 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/batch_norm_decomposition.hpp" #include -#include - #include #include -#include -#include #include +#include +#include #include +#include + +#include "itt.hpp" using namespace ngraph; @@ -21,23 +21,19 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::BatchNormDecomposition, "BatchNormDecomposi ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { MATCHER_SCOPE(BatchNormDecomposition); - auto bn_1 = pattern::wrap_type({ - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_rank()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()) - }); - auto bn_5 = pattern::wrap_type({ - pattern::any_input(pattern::has_static_rank()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()), - pattern::any_input(pattern::has_static_shape()) - }); + auto bn_1 = pattern::wrap_type({pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_rank()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape())}); + auto bn_5 = pattern::wrap_type({pattern::any_input(pattern::has_static_rank()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape()), + pattern::any_input(pattern::has_static_shape())}); auto bn = std::make_shared(OutputVector{bn_1, bn_5}); - ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { auto m_bn = m.get_match_root(); Output m_input, m_gamma, m_beta, m_mean, m_var; double eps; @@ -75,19 +71,20 @@ ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { // create new shape [1, C, 1, 1, ...] const auto new_shape = std::make_shared(OutputVector{one, C_dim, tail_shape}, 0); - std::shared_ptr gamma_div_scale_aligned = std::make_shared(gamma_div_scale, new_shape, true); + std::shared_ptr gamma_div_scale_aligned = + std::make_shared(gamma_div_scale, new_shape, true); std::shared_ptr beta_aligned = std::make_shared(m_beta, new_shape, true); std::shared_ptr mean_aligned = std::make_shared(m_mean, new_shape, true); std::shared_ptr mean_negative = std::make_shared( mean_aligned, opset5::Constant::create(mean_aligned->get_output_element_type(0), Shape{}, {-1})); - if (auto constant = ov::get_constant_from_source(beta_aligned)) - beta_aligned = constant; - if (auto constant = ov::get_constant_from_source(mean_negative)) - mean_negative = constant; - if (auto constant = ov::get_constant_from_source(gamma_div_scale_aligned)) - gamma_div_scale_aligned = constant; + if (auto constant = ov::get_constant_from_source(beta_aligned)) + beta_aligned = constant; + if (auto constant = ov::get_constant_from_source(mean_negative)) + mean_negative = constant; + if (auto constant = ov::get_constant_from_source(gamma_div_scale_aligned)) + gamma_div_scale_aligned = constant; // input_sub_mean = input + mean * -1 auto input_sub_mean = register_new_node(m_input, mean_negative); @@ -98,8 +95,9 @@ ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { add->set_friendly_name(m_bn->get_friendly_name()); - copy_runtime_info(m_bn, {scale_add, scale, gamma_div_scale, gamma_div_scale_aligned, - beta_aligned, input_sub_mean, mul, add}); + copy_runtime_info( + m_bn, + {scale_add, scale, gamma_div_scale, gamma_div_scale_aligned, beta_aligned, input_sub_mean, mul, add}); replace_node(m_bn, add); @@ -108,4 +106,3 @@ ngraph::pass::BatchNormDecomposition::BatchNormDecomposition() { auto m = std::make_shared(bn, matcher_name); this->register_matcher(m, callback); } - diff --git a/src/common/transformations/src/transformations/op_conversions/bidirectional_sequences_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/bidirectional_sequences_decomposition.cpp index 8fc3b86669d..200a538f70f 100644 --- a/src/common/transformations/src/transformations/op_conversions/bidirectional_sequences_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/bidirectional_sequences_decomposition.cpp @@ -2,18 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/bidirectional_sequences_decomposition.hpp" #include - #include #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalSequenceDecomposition, "BidirectionalSequenceDecomposition", 0); -NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalLSTMSequenceDecomposition, "BidirectionalLSTMSequenceDecomposition", 0); +NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalLSTMSequenceDecomposition, + "BidirectionalLSTMSequenceDecomposition", + 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalGRUSequenceDecomposition, "BidirectionalGRUSequenceDecomposition", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::BidirectionalRNNSequenceDecomposition, "BidirectionalRNNSequenceDecomposition", 0); @@ -21,7 +23,7 @@ ngraph::pass::BidirectionalLSTMSequenceDecomposition::BidirectionalLSTMSequenceD MATCHER_SCOPE(BidirectionalLSTMSequenceDecomposition); auto lstm_sequence_ngraph = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto lstm_sequence = std::dynamic_pointer_cast(m.get_match_root()); if (!lstm_sequence || transformation_callback(lstm_sequence)) { return false; @@ -37,47 +39,51 @@ ngraph::pass::BidirectionalLSTMSequenceDecomposition::BidirectionalLSTMSequenceD auto W = std::make_shared(lstm_sequence->input_value(4), axis_0, 2); auto R = std::make_shared(lstm_sequence->input_value(5), axis_0, 2); auto B = std::make_shared(lstm_sequence->input_value(6), axis_0, 2); - auto lstm_sequence_forward = std::make_shared( - lstm_sequence->input_value(0), - H->output(0), - C->output(0), - lstm_sequence->input_value(3), - W->output(0), - R->output(0), - B->output(0), - lstm_sequence->get_hidden_size(), - ngraph::op::RecurrentSequenceDirection::FORWARD, - lstm_sequence->get_activations_alpha(), - lstm_sequence->get_activations_beta(), - lstm_sequence->get_activations(), - lstm_sequence->get_clip()); + auto lstm_sequence_forward = + std::make_shared(lstm_sequence->input_value(0), + H->output(0), + C->output(0), + lstm_sequence->input_value(3), + W->output(0), + R->output(0), + B->output(0), + lstm_sequence->get_hidden_size(), + ngraph::op::RecurrentSequenceDirection::FORWARD, + lstm_sequence->get_activations_alpha(), + lstm_sequence->get_activations_beta(), + lstm_sequence->get_activations(), + lstm_sequence->get_clip()); - auto lstm_sequence_reverse = std::make_shared( - lstm_sequence->input_value(0), - H->output(1), - C->output(1), - lstm_sequence->input_value(3), - W->output(1), - R->output(1), - B->output(1), - lstm_sequence->get_hidden_size(), - ngraph::op::RecurrentSequenceDirection::REVERSE, - lstm_sequence->get_activations_alpha(), - lstm_sequence->get_activations_beta(), - lstm_sequence->get_activations(), - lstm_sequence->get_clip()); + auto lstm_sequence_reverse = + std::make_shared(lstm_sequence->input_value(0), + H->output(1), + C->output(1), + lstm_sequence->input_value(3), + W->output(1), + R->output(1), + B->output(1), + lstm_sequence->get_hidden_size(), + ngraph::op::RecurrentSequenceDirection::REVERSE, + lstm_sequence->get_activations_alpha(), + lstm_sequence->get_activations_beta(), + lstm_sequence->get_activations(), + lstm_sequence->get_clip()); - auto concat_0 = std::make_shared(OutputVector{lstm_sequence_forward->output(0), - lstm_sequence_reverse->output(0)}, 1); - auto concat_1 = std::make_shared(OutputVector{lstm_sequence_forward->output(1), - lstm_sequence_reverse->output(1)}, 1); - auto concat_2 = std::make_shared(OutputVector{lstm_sequence_forward->output(2), - lstm_sequence_reverse->output(2)}, 1); - ngraph::copy_runtime_info(lstm_sequence, {H, C, W, R, B, lstm_sequence_forward, lstm_sequence_reverse, - concat_0, concat_1, concat_2}); - concat_0->set_friendly_name(lstm_sequence->get_friendly_name()+".0"); - concat_1->set_friendly_name(lstm_sequence->get_friendly_name()+".1"); - concat_2->set_friendly_name(lstm_sequence->get_friendly_name()+".2"); + auto concat_0 = std::make_shared( + OutputVector{lstm_sequence_forward->output(0), lstm_sequence_reverse->output(0)}, + 1); + auto concat_1 = std::make_shared( + OutputVector{lstm_sequence_forward->output(1), lstm_sequence_reverse->output(1)}, + 1); + auto concat_2 = std::make_shared( + OutputVector{lstm_sequence_forward->output(2), lstm_sequence_reverse->output(2)}, + 1); + ngraph::copy_runtime_info( + lstm_sequence, + {H, C, W, R, B, lstm_sequence_forward, lstm_sequence_reverse, concat_0, concat_1, concat_2}); + concat_0->set_friendly_name(lstm_sequence->get_friendly_name() + ".0"); + concat_1->set_friendly_name(lstm_sequence->get_friendly_name() + ".1"); + concat_2->set_friendly_name(lstm_sequence->get_friendly_name() + ".2"); ngraph::replace_node(lstm_sequence, {concat_0->output(0), concat_1->output(0), concat_2->output(0)}); return true; }; @@ -90,7 +96,7 @@ ngraph::pass::BidirectionalGRUSequenceDecomposition::BidirectionalGRUSequenceDec MATCHER_SCOPE(BidirectionalGRUSequenceDecomposition); auto gru_sequence_ngraph = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto gru_sequence = std::dynamic_pointer_cast(m.get_match_root()); if (!gru_sequence || transformation_callback(gru_sequence)) { return false; @@ -105,44 +111,46 @@ ngraph::pass::BidirectionalGRUSequenceDecomposition::BidirectionalGRUSequenceDec auto W = std::make_shared(gru_sequence->input_value(3), axis_0, 2); auto R = std::make_shared(gru_sequence->input_value(4), axis_0, 2); auto B = std::make_shared(gru_sequence->input_value(5), axis_0, 2); - auto gru_sequence_forward = std::make_shared( - gru_sequence->input_value(0), - H->output(0), - gru_sequence->input_value(2), - W->output(0), - R->output(0), - B->output(0), - gru_sequence->get_hidden_size(), - ngraph::op::RecurrentSequenceDirection::FORWARD, - gru_sequence->get_activations(), - gru_sequence->get_activations_alpha(), - gru_sequence->get_activations_beta(), - gru_sequence->get_clip(), - gru_sequence->get_linear_before_reset()); + auto gru_sequence_forward = + std::make_shared(gru_sequence->input_value(0), + H->output(0), + gru_sequence->input_value(2), + W->output(0), + R->output(0), + B->output(0), + gru_sequence->get_hidden_size(), + ngraph::op::RecurrentSequenceDirection::FORWARD, + gru_sequence->get_activations(), + gru_sequence->get_activations_alpha(), + gru_sequence->get_activations_beta(), + gru_sequence->get_clip(), + gru_sequence->get_linear_before_reset()); - auto gru_sequence_reverse = std::make_shared( - gru_sequence->input_value(0), - H->output(1), - gru_sequence->input_value(2), - W->output(1), - R->output(1), - B->output(1), - gru_sequence->get_hidden_size(), - ngraph::op::RecurrentSequenceDirection::REVERSE, - gru_sequence->get_activations(), - gru_sequence->get_activations_alpha(), - gru_sequence->get_activations_beta(), - gru_sequence->get_clip(), - gru_sequence->get_linear_before_reset()); + auto gru_sequence_reverse = + std::make_shared(gru_sequence->input_value(0), + H->output(1), + gru_sequence->input_value(2), + W->output(1), + R->output(1), + B->output(1), + gru_sequence->get_hidden_size(), + ngraph::op::RecurrentSequenceDirection::REVERSE, + gru_sequence->get_activations(), + gru_sequence->get_activations_alpha(), + gru_sequence->get_activations_beta(), + gru_sequence->get_clip(), + gru_sequence->get_linear_before_reset()); - auto concat_0 = std::make_shared(OutputVector{gru_sequence_forward->output(0), - gru_sequence_reverse->output(0)}, 1); - auto concat_1 = std::make_shared(OutputVector{gru_sequence_forward->output(1), - gru_sequence_reverse->output(1)}, 1); - ngraph::copy_runtime_info(gru_sequence, {H, W, R, B, gru_sequence_forward, gru_sequence_reverse, - concat_0, concat_1}); - concat_0->set_friendly_name(gru_sequence->get_friendly_name()+".0"); - concat_1->set_friendly_name(gru_sequence->get_friendly_name()+".1"); + auto concat_0 = std::make_shared( + OutputVector{gru_sequence_forward->output(0), gru_sequence_reverse->output(0)}, + 1); + auto concat_1 = std::make_shared( + OutputVector{gru_sequence_forward->output(1), gru_sequence_reverse->output(1)}, + 1); + ngraph::copy_runtime_info(gru_sequence, + {H, W, R, B, gru_sequence_forward, gru_sequence_reverse, concat_0, concat_1}); + concat_0->set_friendly_name(gru_sequence->get_friendly_name() + ".0"); + concat_1->set_friendly_name(gru_sequence->get_friendly_name() + ".1"); ngraph::replace_node(gru_sequence, {concat_0->output(0), concat_1->output(0)}); return true; }; @@ -155,7 +163,7 @@ ngraph::pass::BidirectionalRNNSequenceDecomposition::BidirectionalRNNSequenceDec MATCHER_SCOPE(BidirectionalRNNSequenceDecomposition); auto rnn_sequence_ngraph = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto rnn_sequence = std::dynamic_pointer_cast(m.get_match_root()); if (!rnn_sequence || transformation_callback(rnn_sequence)) { return false; @@ -170,40 +178,42 @@ ngraph::pass::BidirectionalRNNSequenceDecomposition::BidirectionalRNNSequenceDec auto W = std::make_shared(rnn_sequence->input_value(3), axis_0, 2); auto R = std::make_shared(rnn_sequence->input_value(4), axis_0, 2); auto B = std::make_shared(rnn_sequence->input_value(5), axis_0, 2); - auto rnn_sequence_forward = std::make_shared( - rnn_sequence->input_value(0), - H->output(0), - rnn_sequence->input_value(2), - W->output(0), - R->output(0), - B->output(0), - rnn_sequence->get_hidden_size(), - ngraph::op::RecurrentSequenceDirection::FORWARD, - rnn_sequence->get_activations(), - rnn_sequence->get_activations_alpha(), - rnn_sequence->get_activations_beta(), - rnn_sequence->get_clip()); + auto rnn_sequence_forward = + std::make_shared(rnn_sequence->input_value(0), + H->output(0), + rnn_sequence->input_value(2), + W->output(0), + R->output(0), + B->output(0), + rnn_sequence->get_hidden_size(), + ngraph::op::RecurrentSequenceDirection::FORWARD, + rnn_sequence->get_activations(), + rnn_sequence->get_activations_alpha(), + rnn_sequence->get_activations_beta(), + rnn_sequence->get_clip()); - auto rnn_sequence_reverse = std::make_shared( - rnn_sequence->input_value(0), - H->output(1), - rnn_sequence->input_value(2), - W->output(1), - R->output(1), - B->output(1), - rnn_sequence->get_hidden_size(), - ngraph::op::RecurrentSequenceDirection::REVERSE, - rnn_sequence->get_activations(), - rnn_sequence->get_activations_alpha(), - rnn_sequence->get_activations_beta(), - rnn_sequence->get_clip()); + auto rnn_sequence_reverse = + std::make_shared(rnn_sequence->input_value(0), + H->output(1), + rnn_sequence->input_value(2), + W->output(1), + R->output(1), + B->output(1), + rnn_sequence->get_hidden_size(), + ngraph::op::RecurrentSequenceDirection::REVERSE, + rnn_sequence->get_activations(), + rnn_sequence->get_activations_alpha(), + rnn_sequence->get_activations_beta(), + rnn_sequence->get_clip()); - auto concat_0 = std::make_shared(OutputVector{rnn_sequence_forward->output(0), - rnn_sequence_reverse->output(0)}, 1); - auto concat_1 = std::make_shared(OutputVector{rnn_sequence_forward->output(1), - rnn_sequence_reverse->output(1)}, 1); - ngraph::copy_runtime_info(rnn_sequence, {H, W, R, B, rnn_sequence_forward, rnn_sequence_reverse, - concat_0, concat_1}); + auto concat_0 = std::make_shared( + OutputVector{rnn_sequence_forward->output(0), rnn_sequence_reverse->output(0)}, + 1); + auto concat_1 = std::make_shared( + OutputVector{rnn_sequence_forward->output(1), rnn_sequence_reverse->output(1)}, + 1); + ngraph::copy_runtime_info(rnn_sequence, + {H, W, R, B, rnn_sequence_forward, rnn_sequence_reverse, concat_0, concat_1}); concat_0->set_friendly_name(rnn_sequence->get_friendly_name() + ".0"); concat_1->set_friendly_name(rnn_sequence->get_friendly_name() + ".1"); ngraph::replace_node(rnn_sequence, {concat_0->output(0), concat_1->output(0)}); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_batch_to_space.cpp b/src/common/transformations/src/transformations/op_conversions/convert_batch_to_space.cpp index 819cb1ae5ba..7d387903398 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_batch_to_space.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_batch_to_space.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_batch_to_space.hpp" #include -#include - #include #include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertBatchToSpace, "ConvertBatchToSpace", 0); @@ -18,7 +18,7 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space() { MATCHER_SCOPE(ConvertBatchToSpace_convert_batch_to_space); auto batch_to_space = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto batch_to_space = std::dynamic_pointer_cast (m.get_match_root()); + auto batch_to_space = std::dynamic_pointer_cast(m.get_match_root()); if (!batch_to_space) { return false; } @@ -42,8 +42,8 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space() { return false; } - const std::vector &block_values = block_const->cast_vector(); - const std::vector &crops_end_values = crops_end_const->cast_vector(); + const std::vector& block_values = block_const->cast_vector(); + const std::vector& crops_end_values = crops_end_const->cast_vector(); // First we have to disperse the data from batch, then rearrange them // so as appropriate chunks of data where close to their destination place. @@ -65,7 +65,7 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space() { } const auto out_pattern_1 = - opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); + opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); const bool special_zero = false; std::shared_ptr flat_node = std::make_shared(data, out_pattern_1, special_zero); new_ops.push_back(flat_node); @@ -78,9 +78,9 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space() { } const auto axes_order_const = - opset3::Constant::create(element::i64, - Shape{axes_order.size()}, - std::vector(axes_order.begin(), axes_order.end())); + opset3::Constant::create(element::i64, + Shape{axes_order.size()}, + std::vector(axes_order.begin(), axes_order.end())); flat_node = std::make_shared(flat_node, axes_order_const); new_ops.push_back(flat_node); // x''' = reshape(x'', [batch / (B_1 * ... * B_{N - 1}), D_1 * B_1, D_2 * B_2, ... , D_{N - 1} @@ -91,8 +91,7 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space() { squeezed_shape.push_back(data_shape.at(i) * block_values.at(i)); } - const auto out_pattern_2 = - opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); + const auto out_pattern_2 = opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); flat_node = std::make_shared(flat_node, out_pattern_2, special_zero); new_ops.push_back(flat_node); @@ -108,13 +107,14 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space() { upperbounds_values.push_back(flat_node_shape.at(i) - crops_end_values.at(i)); } - const auto upperbounds = opset3::Constant::create( - crops_end.get_element_type(), Shape{upperbounds_values.size()}, upperbounds_values); + const auto upperbounds = opset3::Constant::create(crops_end.get_element_type(), + Shape{upperbounds_values.size()}, + upperbounds_values); std::vector begin_mask(data_shape.size(), 0); std::vector end_mask(data_shape.size(), 0); - flat_node = std::make_shared( - flat_node, crops_begin_const, upperbounds, begin_mask, end_mask); + flat_node = + std::make_shared(flat_node, crops_begin_const, upperbounds, begin_mask, end_mask); new_ops.push_back(flat_node); flat_node->set_friendly_name(batch_to_space->get_friendly_name()); @@ -131,7 +131,7 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() { MATCHER_SCOPE(ConvertBatchToSpace_convert_batch_to_space_by_elements); auto batch_to_space = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto batch_to_space = std::dynamic_pointer_cast (m.get_match_root()); + auto batch_to_space = std::dynamic_pointer_cast(m.get_match_root()); if (!batch_to_space) { return false; } @@ -154,8 +154,8 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() { const auto crops_begin_const = ov::as_type_ptr(crops_begin.get_node_shared_ptr()); const auto crops_end_const = ov::as_type_ptr(crops_end.get_node_shared_ptr()); - const std::vector &block_values = block_const->cast_vector(); - const std::vector &crops_end_values = crops_end_const->cast_vector(); + const std::vector& block_values = block_const->cast_vector(); + const std::vector& crops_end_values = crops_end_const->cast_vector(); std::vector dispersed_shape(1); dispersed_shape.insert(dispersed_shape.end(), data_shape.begin(), data_shape.end()); @@ -172,7 +172,7 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() { dispersed_shape[0] = block_values[block_idx]; dispersed_shape[1] /= block_values[block_idx]; const auto out_pattern_1 = - opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); + opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); const bool special_zero = false; flat_node = std::make_shared(flat_node, out_pattern_1, special_zero); new_ops.push_back(flat_node); @@ -188,9 +188,9 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() { } const auto axes_order_const = - ngraph::opset3::Constant::create(element::i64, - Shape{axes_order.size()}, - std::vector(axes_order.begin(), axes_order.end())); + ngraph::opset3::Constant::create(element::i64, + Shape{axes_order.size()}, + std::vector(axes_order.begin(), axes_order.end())); flat_node = std::make_shared(flat_node, axes_order_const); new_ops.push_back(flat_node); @@ -198,7 +198,7 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() { squeezed_shape[block_idx] *= block_values[block_idx]; dispersed_shape[block_idx + 1] = squeezed_shape[block_idx]; const auto out_pattern_2 = - opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); + opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); flat_node = std::make_shared(flat_node, out_pattern_2, special_zero); new_ops.push_back(flat_node); } @@ -208,13 +208,14 @@ void ngraph::pass::ConvertBatchToSpace::convert_batch_to_space_by_elements() { for (size_t i = 0; i < flat_node_shape.size(); ++i) { upperbounds_values.push_back(flat_node_shape.at(i) - crops_end_values.at(i)); } - const auto upperbounds = opset3::Constant::create( - crops_end.get_element_type(), Shape{upperbounds_values.size()}, upperbounds_values); + const auto upperbounds = opset3::Constant::create(crops_end.get_element_type(), + Shape{upperbounds_values.size()}, + upperbounds_values); std::vector begin_mask(data_shape.size(), 0); std::vector end_mask(data_shape.size(), 0); - flat_node = std::make_shared( - flat_node, crops_begin_const, upperbounds, begin_mask, end_mask); + flat_node = + std::make_shared(flat_node, crops_begin_const, upperbounds, begin_mask, end_mask); new_ops.push_back(flat_node); flat_node->set_friendly_name(batch_to_space->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_broadcast3.cpp b/src/common/transformations/src/transformations/op_conversions/convert_broadcast3.cpp index 515c48b52ed..3a45833d0e2 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_broadcast3.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_broadcast3.cpp @@ -2,31 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_broadcast3.hpp" #include -#include - -#include - #include #include #include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertBroadcast3, "ConvertBroadcast3", 0); namespace { -bool make_compatible_shape(const ngraph::PartialShape & input_shape, std::vector & target_shape) { +bool make_compatible_shape(const ngraph::PartialShape& input_shape, std::vector& target_shape) { if (input_shape.rank().is_dynamic()) { return false; } - const int64_t & input_shape_rank = input_shape.rank().get_length(); + const int64_t& input_shape_rank = input_shape.rank().get_length(); if (input_shape_rank > static_cast(target_shape.size())) { // target_shape rank must greater or equal to input_shape rank, so in case when it's less we // insert missing input_shape dimensions to the beginning of the target_shape. - const int64_t & dims_to_add_count = input_shape_rank - target_shape.size(); + const int64_t& dims_to_add_count = input_shape_rank - target_shape.size(); std::vector dims_to_add(dims_to_add_count); for (int64_t dim = 0; dim < dims_to_add_count; ++dim) { if (input_shape[dim].is_dynamic()) { @@ -36,9 +35,10 @@ bool make_compatible_shape(const ngraph::PartialShape & input_shape, std::vector } target_shape.insert(target_shape.begin(), dims_to_add.begin(), dims_to_add.end()); } - for (int64_t i_dim = input_shape_rank - 1, t_dim = target_shape.size() - 1; i_dim >= 0 && t_dim >= 0; --i_dim, --t_dim) { + for (int64_t i_dim = input_shape_rank - 1, t_dim = target_shape.size() - 1; i_dim >= 0 && t_dim >= 0; + --i_dim, --t_dim) { if (input_shape[i_dim].is_static()) { - const auto & input_dim = input_shape[i_dim].get_length(); + const auto& input_dim = input_shape[i_dim].get_length(); if (static_cast(input_dim) != target_shape[t_dim] && input_dim != 1 && target_shape[t_dim] != 1) { // this dimensions are not broadcastable return false; @@ -57,7 +57,7 @@ bool make_compatible_shape(const ngraph::PartialShape & input_shape, std::vector return true; } -} // namespace +} // namespace ngraph::pass::ConvertBroadcast3::ConvertBroadcast3() { MATCHER_SCOPE(ConvertBroadcast3); @@ -71,26 +71,34 @@ ngraph::pass::ConvertBroadcast3::ConvertBroadcast3() { auto input = broadcast->input_value(0); auto target_shape_input = broadcast->input_value(1); - const auto & broadcast_type = broadcast->get_broadcast_spec(); - const auto & input_element_type = input.get_element_type(); + const auto& broadcast_type = broadcast->get_broadcast_spec(); + const auto& input_element_type = input.get_element_type(); if (broadcast_type == op::BroadcastType::NUMPY) { input = std::make_shared(input, target_shape_input, op::AutoBroadcastType::NUMPY); } else if (broadcast_type == op::BroadcastType::PDPD) { input = std::make_shared(input, target_shape_input, op::AutoBroadcastType::PDPD); } else if (broadcast_type == op::BroadcastType::NONE) { - input = std::make_shared(input, target_shape_input, broadcast->input_value(2), op::AutoBroadcastType::NONE); + input = std::make_shared(input, + target_shape_input, + broadcast->input_value(2), + op::AutoBroadcastType::NONE); } else if (broadcast_type == op::BroadcastType::BIDIRECTIONAL) { - if (auto const_target_shape = std::dynamic_pointer_cast(target_shape_input.get_node_shared_ptr())) { - const auto & input_shape = input.get_partial_shape(); - const auto & target_shape = const_target_shape->cast_vector(); + if (auto const_target_shape = + std::dynamic_pointer_cast(target_shape_input.get_node_shared_ptr())) { + const auto& input_shape = input.get_partial_shape(); + const auto& target_shape = const_target_shape->cast_vector(); std::vector aligned_target_shape{target_shape}; if (make_compatible_shape(input_shape, aligned_target_shape)) { - input = std::make_shared(input, - opset1::Constant::create(element::i64, Shape({aligned_target_shape.size()}), aligned_target_shape)); + input = std::make_shared( + input, + opset1::Constant::create(element::i64, + Shape({aligned_target_shape.size()}), + aligned_target_shape)); } else { - input = std::make_shared(input, - opset1::Constant::create(input_element_type, target_shape, {1})); + input = std::make_shared( + input, + opset1::Constant::create(input_element_type, target_shape, {1})); } } else { auto constant_one = opset1::Constant::create(input_element_type, {1}, {1}); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_broadcast_to_tiles.cpp b/src/common/transformations/src/transformations/op_conversions/convert_broadcast_to_tiles.cpp index 45a005e6bd4..b7fbaa32ebf 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_broadcast_to_tiles.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_broadcast_to_tiles.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_broadcast_to_tiles.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertBroadcastToTiles, "ConvertBroadcastToTiles", 0); @@ -30,9 +30,12 @@ ngraph::pass::ConvertBroadcastToTiles::ConvertBroadcastToTiles() { return false; } - auto shape_node = std::dynamic_pointer_cast(broadcast->input_value(1).get_node_shared_ptr()); - auto axes_node = std::dynamic_pointer_cast(broadcast->input_value(2).get_node_shared_ptr()); - if (!shape_node || !axes_node) return false; + auto shape_node = + std::dynamic_pointer_cast(broadcast->input_value(1).get_node_shared_ptr()); + auto axes_node = + std::dynamic_pointer_cast(broadcast->input_value(2).get_node_shared_ptr()); + if (!shape_node || !axes_node) + return false; auto output_shape = shape_node->cast_vector(); auto input_shape = data_node.get_shape(); @@ -64,7 +67,7 @@ ngraph::pass::ConvertBroadcastToTiles::ConvertBroadcastToTiles() { } else { return false; } - auto shape_const = std::make_shared(element::i64, Shape {shape.size()}, shape); + auto shape_const = std::make_shared(element::i64, Shape{shape.size()}, shape); auto reshape = std::make_shared(data_node, shape_const, true); new_ops.push_back(reshape); last_node = reshape; @@ -88,7 +91,7 @@ ngraph::pass::ConvertBroadcastToTiles::ConvertBroadcastToTiles() { ++input_shape_it; } - auto const_node = std::make_shared(element::i64, Shape {dims_count}, dims); + auto const_node = std::make_shared(element::i64, Shape{dims_count}, dims); auto tile = register_new_node(last_node, const_node); new_ops.push_back(tile); tile->set_friendly_name(broadcast->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_deformable_conv_v8_to_v1.cpp b/src/common/transformations/src/transformations/op_conversions/convert_deformable_conv_v8_to_v1.cpp index 276a1192ca1..e4d1380164d 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_deformable_conv_v8_to_v1.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_deformable_conv_v8_to_v1.cpp @@ -3,10 +3,11 @@ // #include "transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp" + #include #include -#include #include +#include #include "itt.hpp" @@ -18,12 +19,12 @@ ngraph::pass::ConvertDeformableConv8To1::ConvertDeformableConv8To1() { auto deformable_conv_v8 = pattern::wrap_type(); ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { - auto deformable_conv_v8_node = std::dynamic_pointer_cast(m.get_match_root()); + auto deformable_conv_v8_node = + std::dynamic_pointer_cast(m.get_match_root()); if (!deformable_conv_v8_node) return false; - if (deformable_conv_v8_node->get_input_size() != 3 - || deformable_conv_v8_node->get_bilinear_interpolation_pad()) + if (deformable_conv_v8_node->get_input_size() != 3 || deformable_conv_v8_node->get_bilinear_interpolation_pad()) return false; auto arg = deformable_conv_v8_node->input_value(0); @@ -31,16 +32,16 @@ ngraph::pass::ConvertDeformableConv8To1::ConvertDeformableConv8To1() { auto filters = deformable_conv_v8_node->input_value(2); auto deformable_conv_v1 = - std::make_shared(arg, - offsets, - filters, - deformable_conv_v8_node->get_strides(), - deformable_conv_v8_node->get_pads_begin(), - deformable_conv_v8_node->get_pads_end(), - deformable_conv_v8_node->get_dilations(), - deformable_conv_v8_node->get_auto_pad(), - deformable_conv_v8_node->get_group(), - deformable_conv_v8_node->get_deformable_group()); + std::make_shared(arg, + offsets, + filters, + deformable_conv_v8_node->get_strides(), + deformable_conv_v8_node->get_pads_begin(), + deformable_conv_v8_node->get_pads_end(), + deformable_conv_v8_node->get_dilations(), + deformable_conv_v8_node->get_auto_pad(), + deformable_conv_v8_node->get_group(), + deformable_conv_v8_node->get_deformable_group()); deformable_conv_v1->set_friendly_name(deformable_conv_v8_node->get_friendly_name()); ngraph::copy_runtime_info(deformable_conv_v8_node, deformable_conv_v1); ngraph::replace_node(deformable_conv_v8_node, deformable_conv_v1); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_depth_to_space.cpp b/src/common/transformations/src/transformations/op_conversions/convert_depth_to_space.cpp index 90ceae01f0f..e8d2a4d8104 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_depth_to_space.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_depth_to_space.cpp @@ -2,24 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_depth_to_space.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertDepthToSpace, "ConvertDepthToSpace", 0); ngraph::pass::ConvertDepthToSpace::ConvertDepthToSpace() { MATCHER_SCOPE(ConvertDepthToSpace); - auto dts_node = ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape())}); + auto dts_node = + ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape())}); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto dts_node = std::dynamic_pointer_cast (m.get_match_root()); + auto dts_node = std::dynamic_pointer_cast(m.get_match_root()); if (!dts_node || transformation_callback(dts_node)) { return false; } @@ -52,12 +53,12 @@ ngraph::pass::ConvertDepthToSpace::ConvertDepthToSpace() { } switch (mode) { - case ngraph::op::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST: - shape_begin.push_back(C); - break; - case ngraph::op::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST: - shape_begin.insert(shape_begin.begin() + 1, C); - break; + case ngraph::op::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST: + shape_begin.push_back(C); + break; + case ngraph::op::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST: + shape_begin.insert(shape_begin.begin() + 1, C); + break; } for (size_t i = 0; i < spatial_dims; ++i) { @@ -67,20 +68,20 @@ ngraph::pass::ConvertDepthToSpace::ConvertDepthToSpace() { // Calculate Transpose order std::vector order{0}; switch (mode) { - case ngraph::op::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST: - order.push_back(spatial_dims + 1); - for (size_t i = 1; i <= spatial_dims; ++i) { - order.push_back(spatial_dims + 1 + i); - order.push_back(i); - } - break; - case ngraph::op::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST: - order.push_back(1); - for (size_t i = 1; i <= spatial_dims; ++i) { - order.push_back(spatial_dims + 1 + i); - order.push_back(i + 1); - } - break; + case ngraph::op::DepthToSpace::DepthToSpaceMode::BLOCKS_FIRST: + order.push_back(spatial_dims + 1); + for (size_t i = 1; i <= spatial_dims; ++i) { + order.push_back(spatial_dims + 1 + i); + order.push_back(i); + } + break; + case ngraph::op::DepthToSpace::DepthToSpaceMode::DEPTH_FIRST: + order.push_back(1); + for (size_t i = 1; i <= spatial_dims; ++i) { + order.push_back(spatial_dims + 1 + i); + order.push_back(i + 1); + } + break; } // Calculate Reshape shape_end @@ -89,7 +90,7 @@ ngraph::pass::ConvertDepthToSpace::ConvertDepthToSpace() { shape_end.push_back(block_size * input_shape[2 + i]); } - auto create_constant = [](std::vector & v) -> std::shared_ptr { + auto create_constant = [](std::vector& v) -> std::shared_ptr { return op::Constant::create(element::i64, Shape{v.size()}, v); }; diff --git a/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp b/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp index e99631a13cb..0f13fbcf681 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_divide.cpp @@ -2,21 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_divide.hpp" -#include "transformations/utils/utils.hpp" - #include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include - +#include "itt.hpp" #include "transformations/rt_info/nonconvertible_divide.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertDivide, "ConvertDivide", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertDivideWithConstant, "ConvertDivideWithConstant", 0); @@ -25,13 +23,13 @@ namespace { bool convert_divide(std::shared_ptr node) { auto div = std::dynamic_pointer_cast(node); // We can not apply this transformation in case with integer input data type - if (!div || ov::divide_is_nonconvertible(div) - || div->get_input_element_type(0).is_integral()) { + if (!div || ov::divide_is_nonconvertible(div) || div->get_input_element_type(0).is_integral()) { return false; } - std::shared_ptr pow = std::make_shared(div->input_value(1), - ngraph::op::Constant::create(div->get_input_element_type(1), ngraph::Shape{}, {-1})); + std::shared_ptr pow = std::make_shared( + div->input_value(1), + ngraph::op::Constant::create(div->get_input_element_type(1), ngraph::Shape{}, {-1})); if (std::dynamic_pointer_cast(div->get_input_node_shared_ptr(1))) { if (auto const_pow = ngraph::get_constant_from_source(pow)) { @@ -56,7 +54,7 @@ bool convert_divide(std::shared_ptr node) { } return true; } -} // namespace +} // namespace ngraph::pass::ConvertDivide::ConvertDivide() { MATCHER_SCOPE(ConvertDivide); @@ -72,8 +70,8 @@ ngraph::pass::ConvertDivide::ConvertDivide() { ngraph::pass::ConvertDivideWithConstant::ConvertDivideWithConstant() { MATCHER_SCOPE(ConvertDivideWithConstant); - auto div = ngraph::pattern::wrap_type( - {pattern::any_input(), pattern::wrap_type()}); + auto div = + ngraph::pattern::wrap_type({pattern::any_input(), pattern::wrap_type()}); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { return convert_divide(m.get_match_root()); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp index 18a2f6a531b..40c2d7b5bf9 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_gather_0d.cpp @@ -4,14 +4,13 @@ #include "transformations/op_conversions/convert_gather_0d.hpp" -#include "itt.hpp" - #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertGather0D, "ConvertGather0D", 0); @@ -19,13 +18,14 @@ ngraph::pass::ConvertGather0D::ConvertGather0D() { MATCHER_SCOPE(ConvertGather0D); auto gather = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { auto gather = std::dynamic_pointer_cast(m.get_match_root()); if (!gather) { return false; } - auto axes_constant = std::dynamic_pointer_cast(gather->input_value(2).get_node_shared_ptr()); + auto axes_constant = + std::dynamic_pointer_cast(gather->input_value(2).get_node_shared_ptr()); if (!axes_constant) { return false; } @@ -39,9 +39,11 @@ ngraph::pass::ConvertGather0D::ConvertGather0D() { } auto axis = axes_constant->cast_vector()[0]; - indices = std::make_shared(indices, opset1::Constant::create(element::i64, Shape{1}, {0})); + indices = + std::make_shared(indices, opset1::Constant::create(element::i64, Shape{1}, {0})); auto gather_new = std::make_shared(gather->input_value(0), indices, axes_constant); - auto sq = std::make_shared(gather_new, opset1::Constant::create(element::i64, Shape{1}, {axis})); + auto sq = std::make_shared(gather_new, + opset1::Constant::create(element::i64, Shape{1}, {axis})); sq->set_friendly_name(gather->get_friendly_name()); ngraph::copy_runtime_info(gather, {indices.get_node_shared_ptr(), gather_new, sq}); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gather_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gather_downgrade.cpp index e573a9a847a..184829a35fb 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_gather_downgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_gather_downgrade.cpp @@ -3,11 +3,13 @@ // #include "transformations/op_conversions/convert_gather_downgrade.hpp" + #include #include #include -#include #include +#include + #include "itt.hpp" using namespace std; @@ -16,7 +18,6 @@ using namespace ngraph; NGRAPH_RTTI_DEFINITION(pass::ConvertGather7ToGather1, "ConvertGather7ToGather1", 0); NGRAPH_RTTI_DEFINITION(pass::ConvertGather8ToGather7, "ConvertGather8ToGather7", 0); - pass::ConvertGather7ToGather1::ConvertGather7ToGather1() { MATCHER_SCOPE(ConvertGather7ToGather1); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gather_upgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gather_upgrade.cpp index 7c45c39f5ab..207a432012c 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_gather_upgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_gather_upgrade.cpp @@ -3,11 +3,13 @@ // #include "transformations/op_conversions/convert_gather_upgrade.hpp" + #include #include #include -#include #include +#include + #include "itt.hpp" using namespace std; @@ -16,7 +18,6 @@ using namespace ngraph; NGRAPH_RTTI_DEFINITION(pass::ConvertGather1ToGather7, "ConvertGather1ToGather7", 0); NGRAPH_RTTI_DEFINITION(pass::ConvertGather7ToGather8, "ConvertGather7ToGather8", 0); - pass::ConvertGather1ToGather7::ConvertGather1ToGather7() { MATCHER_SCOPE(ConvertGather1ToGather7); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gelu.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gelu.cpp index e4d043118e8..5c5395fccc0 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_gelu.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_gelu.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include +#include #include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertGELU, "ConvertGELU", 0); @@ -25,11 +25,14 @@ ngraph::pass::ConvertGELU::ConvertGELU() { auto input_type = input.get_element_type(); // f(x) = 0.5 * x * (1.0 + erf( x / sqrt(2.0) ) - auto mul = std::make_shared(input, ngraph::opset1::Constant::create(input_type, Shape{}, {0.5})); + auto mul = + std::make_shared(input, + ngraph::opset1::Constant::create(input_type, Shape{}, {0.5})); auto sq2 = std::make_shared(ngraph::opset1::Constant::create(input_type, Shape{}, {2.0})); - auto div = register_new_node(input, sq2); // can be decomposed + auto div = register_new_node(input, sq2); // can be decomposed auto erf = std::make_shared(div); - auto add = std::make_shared(erf, ngraph::opset1::Constant::create(input_type, Shape{}, {1.0})); + auto add = + std::make_shared(erf, ngraph::opset1::Constant::create(input_type, Shape{}, {1.0})); auto res = std::make_shared(mul, add); res->set_friendly_name(gelu->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp b/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp index de0feeb8dcb..227336e768b 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_interpolate1_to_interpolate4.cpp @@ -2,25 +2,26 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_interpolate1_to_interpolate4.hpp" #include -#include - #include #include #include -#include #include -#include +#include #include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertInterpolate1ToInterpolate4, "ConvertInterpolate1ToInterpolate4", 0); ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolate4() { MATCHER_SCOPE(ConvertInterpolate1ToInterpolate4); - auto interpolate1 = ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_rank()), pattern::any_input()}); + auto interpolate1 = ngraph::pattern::wrap_type( + {pattern::any_input(pattern::has_static_rank()), pattern::any_input()}); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto interpolationV0 = std::dynamic_pointer_cast(m.get_match_root()); if (!interpolationV0) { @@ -30,8 +31,10 @@ ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolat auto attrsV0 = interpolationV0->get_attrs(); std::vector axes{attrsV0.axes.begin(), attrsV0.axes.end()}; const auto& out_dims = std::make_shared(interpolationV0->input_value(1), element::f32); - const auto& in_dims = std::make_shared(ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_source( - interpolationV0->input_value(0), axes), element::f32); + const auto& in_dims = std::make_shared( + ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_source(interpolationV0->input_value(0), + axes), + element::f32); std::shared_ptr scales = std::make_shared(out_dims, in_dims); if (const auto& constant = ov::get_constant_from_source(scales)) @@ -72,17 +75,29 @@ ngraph::pass::ConvertInterpolate1ToInterpolate4::ConvertInterpolate1ToInterpolat attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::ASYMMETRIC; attrsV4.cube_coeff = -0.75f; if (attrsV0.align_corners) { - attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS; + attrsV4.coordinate_transformation_mode = + ngraph::opset4::Interpolate::CoordinateTransformMode::ALIGN_CORNERS; } else if ((attrsV4.mode == ngraph::op::v4::Interpolate::InterpolateMode::LINEAR_ONNX || attrsV4.mode == ngraph::op::v4::Interpolate::InterpolateMode::LINEAR) && - std::all_of(attrsV4.pads_begin.begin(), attrsV4.pads_begin.end(), [](size_t i){return i == 0;}) && - std::all_of(attrsV4.pads_end.begin(), attrsV4.pads_end.end(), [](size_t i){return i == 0;}) && - !(input_shape_rank - 2 == 2 && attrsV0.axes == AxisSet{2, 3})) { + std::all_of(attrsV4.pads_begin.begin(), + attrsV4.pads_begin.end(), + [](size_t i) { + return i == 0; + }) && + std::all_of(attrsV4.pads_end.begin(), + attrsV4.pads_end.end(), + [](size_t i) { + return i == 0; + }) && + !(input_shape_rank - 2 == 2 && attrsV0.axes == AxisSet{2, 3})) { attrsV4.coordinate_transformation_mode = ngraph::opset4::Interpolate::CoordinateTransformMode::HALF_PIXEL; } - auto interpolateV4 = std::make_shared(interpolationV0->input_value(0), interpolationV0->input_value(1), - scales, axisConstant, attrsV4); + auto interpolateV4 = std::make_shared(interpolationV0->input_value(0), + interpolationV0->input_value(1), + scales, + axisConstant, + attrsV4); interpolateV4->set_friendly_name(interpolationV0->get_friendly_name()); ngraph::copy_runtime_info(interpolationV0, interpolateV4); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp b/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp index e8ebccb356a..20a8139107f 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.cpp @@ -2,19 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" -#include -#include +#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" +#include #include #include #include - -#include #include +#include +#include +#include "itt.hpp" #include "ngraph_ops/nms_static_shape_ie.hpp" -#include "transformations/op_conversions/convert_matrix_nms_to_matrix_nms_ie.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMatrixNmsToMatrixNmsIE, "ConvertMatrixNmsToMatrixNmsIE", 0); @@ -22,7 +21,7 @@ ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool MATCHER_SCOPE(ConvertMatrixNmsToMatrixNmsIE); auto nms = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto nms = std::dynamic_pointer_cast(m.get_match_root()); if (!nms || transformation_callback(nms)) { return false; @@ -38,10 +37,9 @@ ngraph::pass::ConvertMatrixNmsToMatrixNmsIE::ConvertMatrixNmsToMatrixNmsIE(bool NodeVector new_ops; auto attrs = nms->get_attrs(); attrs.output_type = force_i32_output_type ? element::i32 : nms->get_output_type(); - auto nms_new = std::make_shared>( - new_args.at(0), - new_args.at(1), - attrs); + auto nms_new = std::make_shared>(new_args.at(0), + new_args.at(1), + attrs); new_ops.emplace_back(nms_new); Output output_0 = nms_new->output(0); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp index 761e9f8d90a..d04a54c4f12 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_downgrade.cpp @@ -3,17 +3,18 @@ // #include "transformations/op_conversions/convert_maxpool_downgrade.hpp" + #include #include -#include #include +#include #include + #include "itt.hpp" using namespace std; using namespace ngraph; - pass::ConvertMaxPool8ToMaxPool1::ConvertMaxPool8ToMaxPool1() { MATCHER_SCOPE(ConvertMaxPool8ToMaxPool1); @@ -30,12 +31,12 @@ pass::ConvertMaxPool8ToMaxPool1::ConvertMaxPool8ToMaxPool1() { return false; auto maxpool_v1_node = make_shared(maxpool_v8_node->input_value(0), - maxpool_v8_node->get_strides(), - maxpool_v8_node->get_pads_begin(), - maxpool_v8_node->get_pads_end(), - maxpool_v8_node->get_kernel(), - maxpool_v8_node->get_rounding_type(), - maxpool_v8_node->get_auto_pad()); + maxpool_v8_node->get_strides(), + maxpool_v8_node->get_pads_begin(), + maxpool_v8_node->get_pads_end(), + maxpool_v8_node->get_kernel(), + maxpool_v8_node->get_rounding_type(), + maxpool_v8_node->get_auto_pad()); auto out_name = ngraph::op::util::create_ie_output_name(maxpool_v8_node->output(0)); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_upgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_upgrade.cpp index 463c037cc77..211c6c6611d 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_maxpool_upgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_maxpool_upgrade.cpp @@ -3,11 +3,13 @@ // #include "transformations/op_conversions/convert_maxpool_upgrade.hpp" + #include #include -#include #include +#include #include + #include "itt.hpp" ngraph::pass::ConvertMaxPool1ToMaxPool8::ConvertMaxPool1ToMaxPool8() { diff --git a/src/common/transformations/src/transformations/op_conversions/convert_minimum_to_power_and_max.cpp b/src/common/transformations/src/transformations/op_conversions/convert_minimum_to_power_and_max.cpp index 5d9748925d7..3611b8cb0de 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_minimum_to_power_and_max.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_minimum_to_power_and_max.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_minimum_to_power_and_max.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMinimum, "ConvertMinimum", 0); @@ -19,8 +19,8 @@ ngraph::pass::ConvertMinimum::ConvertMinimum() { auto minimum = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto minimum = std::dynamic_pointer_cast (m.get_match_root()); - if (!minimum || transformation_callback(minimum) || !minimum->get_output_element_type(0).is_signed()) { + auto minimum = std::dynamic_pointer_cast(m.get_match_root()); + if (!minimum || transformation_callback(minimum) || !minimum->get_output_element_type(0).is_signed()) { return false; } @@ -29,15 +29,19 @@ ngraph::pass::ConvertMinimum::ConvertMinimum() { * Mul(-1)--' */ - auto neg_0 = std::make_shared(minimum->input(0).get_source_output(), - opset1::Constant::create(minimum->get_input_element_type(0), Shape{}, {-1})); + auto neg_0 = std::make_shared( + minimum->input(0).get_source_output(), + opset1::Constant::create(minimum->get_input_element_type(0), Shape{}, {-1})); - auto neg_1 = std::make_shared(minimum->input(1).get_source_output(), - opset1::Constant::create(minimum->get_input_element_type(1), Shape{}, {-1})); + auto neg_1 = std::make_shared( + minimum->input(1).get_source_output(), + opset1::Constant::create(minimum->get_input_element_type(1), Shape{}, {-1})); auto max = std::make_shared(neg_0, neg_1); - auto neg_2 = std::make_shared(max, opset1::Constant::create(max->get_element_type(), Shape{}, {-1})); + auto neg_2 = std::make_shared( + max, + opset1::Constant::create(max->get_element_type(), Shape{}, {-1})); neg_2->set_friendly_name(minimum->get_friendly_name()); ngraph::copy_runtime_info(minimum, {neg_0, neg_1, max, neg_2}); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_mod.cpp b/src/common/transformations/src/transformations/op_conversions/convert_mod.cpp index cbac3d48228..51e93ced9d0 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_mod.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_mod.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_mod.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMod, "ConvertMod", 0); @@ -19,7 +19,7 @@ ngraph::pass::ConvertMod::ConvertMod() { auto mod = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto mod = std::dynamic_pointer_cast (m.get_match_root()); + auto mod = std::dynamic_pointer_cast(m.get_match_root()); if (!mod) { return false; } @@ -42,7 +42,9 @@ ngraph::pass::ConvertMod::ConvertMod() { auto mul = std::make_shared(dividend_sign, sub); mul->set_friendly_name(mod->get_friendly_name()); - ngraph::copy_runtime_info(mod, {dividend, dividend_sign, divisor, div, convert_to_i64, convert, multiplication, sub, mul}); + ngraph::copy_runtime_info( + mod, + {dividend, dividend_sign, divisor, div, convert_to_i64, convert, multiplication, sub, mul}); ngraph::replace_node(mod, mul); return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp b/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp index 596fbde42d4..26683c7e135 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.cpp @@ -2,19 +2,18 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" -#include -#include +#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp" +#include #include #include #include - -#include #include +#include +#include +#include "itt.hpp" #include "ngraph_ops/nms_static_shape_ie.hpp" -#include "transformations/op_conversions/convert_multiclass_nms_to_multiclass_nms_ie.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE, "ConvertMulticlassNmsToMulticlassNmsIE", 0); @@ -22,7 +21,7 @@ ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulti MATCHER_SCOPE(ConvertMulticlassNmsToMulticlassNmsIE); auto nms = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto nms = std::dynamic_pointer_cast(m.get_match_root()); if (!nms || transformation_callback(nms)) { return false; @@ -39,10 +38,9 @@ ngraph::pass::ConvertMulticlassNmsToMulticlassNmsIE::ConvertMulticlassNmsToMulti auto attrs = nms->get_attrs(); attrs.output_type = force_i32_output_type ? element::i32 : nms->get_output_type(); - auto nms_new = std::make_shared>( - new_args.at(0), - new_args.at(1), - attrs); + auto nms_new = std::make_shared>(new_args.at(0), + new_args.at(1), + attrs); new_ops.emplace_back(nms_new); Output output_0 = nms_new->output(0); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_mvn1_to_mvn6.cpp b/src/common/transformations/src/transformations/op_conversions/convert_mvn1_to_mvn6.cpp index 4f42fbc59bc..773f3e63a82 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_mvn1_to_mvn6.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_mvn1_to_mvn6.cpp @@ -4,13 +4,11 @@ #include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp" -#include - -#include - #include #include #include +#include +#include #include "itt.hpp" @@ -37,12 +35,12 @@ ngraph::pass::ConvertMVN1ToMVN6::ConvertMVN1ToMVN6() { } std::vector axes_v(input_rank.get_length() - start_axis); std::iota(axes_v.begin(), axes_v.end(), start_axis); - auto axes = opset6::Constant::create(ngraph::element::i64, { axes_v.size() }, axes_v); + auto axes = opset6::Constant::create(ngraph::element::i64, {axes_v.size()}, axes_v); auto mvn6_node = std::make_shared(input, - axes, - mvn_node->get_normalize_variance(), - mvn_node->get_eps(), - ngraph::op::MVNEpsMode::OUTSIDE_SQRT); + axes, + mvn_node->get_normalize_variance(), + mvn_node->get_eps(), + ngraph::op::MVNEpsMode::OUTSIDE_SQRT); mvn6_node->set_friendly_name(mvn_node->get_friendly_name()); ngraph::copy_runtime_info(mvn_node, mvn6_node); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_negative.cpp b/src/common/transformations/src/transformations/op_conversions/convert_negative.cpp index 0f66711452b..6b852d9fadb 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_negative.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_negative.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_negative.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertNegative, "ConvertNegative", 0); @@ -19,13 +19,14 @@ ngraph::pass::ConvertNegative::ConvertNegative() { auto neg = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto neg = std::dynamic_pointer_cast (m.get_match_root()); + auto neg = std::dynamic_pointer_cast(m.get_match_root()); if (!neg) { return false; } - auto mul = std::make_shared(neg->input(0).get_source_output(), - opset1::Constant::create(neg->get_element_type(), Shape{}, {-1})); + auto mul = std::make_shared( + neg->input(0).get_source_output(), + opset1::Constant::create(neg->get_element_type(), Shape{}, {-1})); mul->set_friendly_name(neg->get_friendly_name()); ngraph::copy_runtime_info(neg, mul); ngraph::replace_node(neg, mul); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp b/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp index b1e6d90f775..4445a06008c 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_nms_to_nms_ie_internal.cpp @@ -2,18 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" -#include -#include +#include "transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp" +#include #include #include - -#include #include +#include +#include +#include "itt.hpp" #include "ngraph_ops/nms_ie_internal.hpp" -#include "transformations/op_conversions/convert_nms_to_nms_ie_internal.hpp" #include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertNMSToNMSIEInternal, "ConvertNMSToNMSIEInternal", 0); @@ -22,7 +21,7 @@ ngraph::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() { MATCHER_SCOPE(ConvertNMSToNMSIEInternal); auto nms = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { auto nms_5 = std::dynamic_pointer_cast(m.get_match_root()); if (!nms_5 || transformation_callback(nms_5)) { return false; @@ -31,9 +30,12 @@ ngraph::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() { const auto new_args = nms_5->input_values(); const std::size_t num_of_inputs = new_args.size(); - const auto& arg2 = num_of_inputs > 2 ? new_args.at(2) : ngraph::opset5::Constant::create(element::i32, Shape{}, {0}); - const auto& arg3 = num_of_inputs > 3 ? new_args.at(3) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); - const auto& arg4 = num_of_inputs > 4 ? new_args.at(4) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); + const auto& arg2 = + num_of_inputs > 2 ? new_args.at(2) : ngraph::opset5::Constant::create(element::i32, Shape{}, {0}); + const auto& arg3 = + num_of_inputs > 3 ? new_args.at(3) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); + const auto& arg4 = + num_of_inputs > 4 ? new_args.at(4) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); // vector of new nGraph operations NodeVector new_ops; @@ -61,15 +63,15 @@ ngraph::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() { int center_point_box = 0; switch (nms_5->get_box_encoding()) { - case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER: - center_point_box = 1; - break; - case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER: - center_point_box = 0; - break; - default: - throw ngraph_error("NonMaxSuppression layer " + nms_5->get_friendly_name() + - " has unsupported box encoding"); + case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER: + center_point_box = 1; + break; + case ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER: + center_point_box = 0; + break; + default: + throw ngraph_error("NonMaxSuppression layer " + nms_5->get_friendly_name() + + " has unsupported box encoding"); } std::shared_ptr nms_legacy{nullptr}; @@ -77,29 +79,29 @@ ngraph::pass::ConvertNMSToNMSIEInternal::ConvertNMSToNMSIEInternal() { if (num_of_inputs > 5 && !nms_5->is_soft_nms_sigma_constant_and_default()) { new_soft_nms_sigma = std::make_shared(new_args.at(5), new_shape_for_soft_nms_sigma, true); new_ops.emplace_back(new_soft_nms_sigma.get_node_shared_ptr()); - nms_legacy = std::make_shared( - new_args.at(0), - new_args.at(1), - new_max_per_class, - new_iou_threshold, - new_score_threshold, - new_soft_nms_sigma, - center_point_box, - nms_5->get_sort_result_descending(), - element::i32, - nms_5->get_output_element_type(1)); + nms_legacy = + std::make_shared(new_args.at(0), + new_args.at(1), + new_max_per_class, + new_iou_threshold, + new_score_threshold, + new_soft_nms_sigma, + center_point_box, + nms_5->get_sort_result_descending(), + element::i32, + nms_5->get_output_element_type(1)); new_ops.push_back(nms_legacy); } else { - nms_legacy = std::make_shared( - new_args.at(0), - new_args.at(1), - new_max_per_class, - new_iou_threshold, - new_score_threshold, - center_point_box, - nms_5->get_sort_result_descending(), - element::i32, - nms_5->get_output_element_type(1)); + nms_legacy = + std::make_shared(new_args.at(0), + new_args.at(1), + new_max_per_class, + new_iou_threshold, + new_score_threshold, + center_point_box, + nms_5->get_sort_result_descending(), + element::i32, + nms_5->get_output_element_type(1)); new_ops.push_back(nms_legacy); } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_pad_to_group_conv.cpp b/src/common/transformations/src/transformations/op_conversions/convert_pad_to_group_conv.cpp index c5695b6eb3b..c5291cd88bd 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_pad_to_group_conv.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_pad_to_group_conv.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_pad_to_group_conv.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertPadToGroupConvolution, "ConvertPadToGroupConvolution", 0); @@ -20,14 +20,14 @@ ngraph::pass::ConvertPadToGroupConvolution::ConvertPadToGroupConvolution() { auto neg = ngraph::pattern::wrap_type(pattern::has_static_dim(1)); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto pad = std::dynamic_pointer_cast (m.get_match_root()); + auto pad = std::dynamic_pointer_cast(m.get_match_root()); if (!pad) { return false; } auto input = pad->input_value(0); - const auto & channel_dim = input.get_partial_shape()[1].get_length(); - const auto & rank = input.get_partial_shape().rank().get_length(); + const auto& channel_dim = input.get_partial_shape()[1].get_length(); + const auto& rank = input.get_partial_shape().rank().get_length(); if (rank < 4) { // We can not create Convolution without spatial dimensions. // Also creating Convolution with single spatial dimension won't be effective as @@ -41,7 +41,8 @@ ngraph::pass::ConvertPadToGroupConvolution::ConvertPadToGroupConvolution() { } if (pad->inputs().size() == 4) { - if (auto pad_value = std::dynamic_pointer_cast(pad->input_value(3).get_node_shared_ptr())) { + if (auto pad_value = + std::dynamic_pointer_cast(pad->input_value(3).get_node_shared_ptr())) { // pad value is a scalar if (pad_value->cast_vector()[0] != 0) { return false; @@ -50,8 +51,8 @@ ngraph::pass::ConvertPadToGroupConvolution::ConvertPadToGroupConvolution() { } // Check that Pad has padding only for spatial dimensions - const auto & pad_begin = pad->get_pads_begin(); - const auto & pad_end = pad->get_pads_end(); + const auto& pad_begin = pad->get_pads_begin(); + const auto& pad_end = pad->get_pads_end(); if (pad_begin.empty() || pad_end.empty()) { // pads will be empty if inputs are not constants @@ -59,14 +60,20 @@ ngraph::pass::ConvertPadToGroupConvolution::ConvertPadToGroupConvolution() { } // Check that not spatial dimension are not padded - if (std::any_of(pad_begin.begin(), pad_begin.begin() + 2, [](ptrdiff_t value) { return value != 0; }) || - std::any_of(pad_end.begin(), pad_end.begin() + 2, [](ptrdiff_t value) { return value != 0; })) { + if (std::any_of(pad_begin.begin(), + pad_begin.begin() + 2, + [](ptrdiff_t value) { + return value != 0; + }) || + std::any_of(pad_end.begin(), pad_end.begin() + 2, [](ptrdiff_t value) { + return value != 0; + })) { return false; } // Create fake weights with ones GOIXY Shape weights_shape(rank + 1, 1); - weights_shape[0] = channel_dim; // G dimension + weights_shape[0] = channel_dim; // G dimension auto weights = opset4::Constant::create(pad->input(0).get_element_type(), weights_shape, {1}); // Create GroupConvolution attributes @@ -74,7 +81,8 @@ ngraph::pass::ConvertPadToGroupConvolution::ConvertPadToGroupConvolution() { CoordinateDiff new_pad_begin{pad_begin.begin() + 2, pad_begin.end()}; CoordinateDiff new_pad_end{pad_end.begin() + 2, pad_end.end()}; - auto conv = std::make_shared(input, weights, stride, new_pad_begin, new_pad_end, stride); + auto conv = + std::make_shared(input, weights, stride, new_pad_begin, new_pad_end, stride); conv->set_friendly_name(pad->get_friendly_name()); ngraph::copy_runtime_info(pad, conv); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_previous_nms_to_nms_5.cpp b/src/common/transformations/src/transformations/op_conversions/convert_previous_nms_to_nms_5.cpp index c81e1e1caeb..10dc4ba635e 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_previous_nms_to_nms_5.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_previous_nms_to_nms_5.cpp @@ -2,19 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_previous_nms_to_nms_5.hpp" #include #include -#include - #include #include #include #include -#include #include +#include +#include + +#include "itt.hpp" using namespace ngraph; @@ -26,138 +26,136 @@ struct NMSAttributes { bool is_supported_nms; }; - NMSAttributes get_nms4_attrs(const std::shared_ptr& nms4) { - NMSAttributes attrs; +NMSAttributes get_nms4_attrs(const std::shared_ptr& nms4) { + NMSAttributes attrs; + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; + attrs.is_supported_nms = true; + attrs.sort_result_descending = true; + attrs.output_type = ::ngraph::element::i64; + + switch (nms4->get_box_encoding()) { + case ::ngraph::opset4::NonMaxSuppression::BoxEncodingType::CENTER: + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER; + break; + case ::ngraph::opset4::NonMaxSuppression::BoxEncodingType::CORNER: attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - attrs.is_supported_nms = true; - attrs.sort_result_descending = true; - attrs.output_type = ::ngraph::element::i64; - - switch (nms4->get_box_encoding()) { - case ::ngraph::opset4::NonMaxSuppression::BoxEncodingType::CENTER: - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER; - break; - case ::ngraph::opset4::NonMaxSuppression::BoxEncodingType::CORNER: - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - break; - default: - throw ngraph_error("NonMaxSuppression layer " + nms4->get_friendly_name() + - " has unsupported box encoding"); - } - - attrs.sort_result_descending = nms4->get_sort_result_descending(); - attrs.output_type = nms4->get_output_type(); - - return attrs; + break; + default: + throw ngraph_error("NonMaxSuppression layer " + nms4->get_friendly_name() + " has unsupported box encoding"); } - NMSAttributes get_nms3_attrs(const std::shared_ptr& nms3) { - NMSAttributes attrs; + attrs.sort_result_descending = nms4->get_sort_result_descending(); + attrs.output_type = nms4->get_output_type(); + return attrs; +} + +NMSAttributes get_nms3_attrs(const std::shared_ptr& nms3) { + NMSAttributes attrs; + + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; + attrs.is_supported_nms = true; + attrs.sort_result_descending = true; + attrs.output_type = ::ngraph::element::i64; + + switch (nms3->get_box_encoding()) { + case ::ngraph::opset3::NonMaxSuppression::BoxEncodingType::CENTER: + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER; + break; + case ::ngraph::opset3::NonMaxSuppression::BoxEncodingType::CORNER: attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - attrs.is_supported_nms = true; - attrs.sort_result_descending = true; - attrs.output_type = ::ngraph::element::i64; - - switch (nms3->get_box_encoding()) { - case ::ngraph::opset3::NonMaxSuppression::BoxEncodingType::CENTER: - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER; - break; - case ::ngraph::opset3::NonMaxSuppression::BoxEncodingType::CORNER: - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - break; - default: - throw ngraph_error("NonMaxSuppression layer " + nms3->get_friendly_name() + - " has unsupported box encoding"); - } - - attrs.sort_result_descending = nms3->get_sort_result_descending(); - attrs.output_type = nms3->get_output_type(); - - return attrs; + break; + default: + throw ngraph_error("NonMaxSuppression layer " + nms3->get_friendly_name() + " has unsupported box encoding"); } - NMSAttributes get_nms1_attrs(const std::shared_ptr& nms1) { - NMSAttributes attrs; + attrs.sort_result_descending = nms3->get_sort_result_descending(); + attrs.output_type = nms3->get_output_type(); + return attrs; +} + +NMSAttributes get_nms1_attrs(const std::shared_ptr& nms1) { + NMSAttributes attrs; + + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; + attrs.is_supported_nms = true; + attrs.sort_result_descending = true; + attrs.output_type = ::ngraph::element::i64; + + switch (nms1->get_box_encoding()) { + case ::ngraph::opset1::NonMaxSuppression::BoxEncodingType::CENTER: + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER; + break; + case ::ngraph::opset1::NonMaxSuppression::BoxEncodingType::CORNER: attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - attrs.is_supported_nms = true; - attrs.sort_result_descending = true; - attrs.output_type = ::ngraph::element::i64; - - switch (nms1->get_box_encoding()) { - case ::ngraph::opset1::NonMaxSuppression::BoxEncodingType::CENTER: - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CENTER; - break; - case ::ngraph::opset1::NonMaxSuppression::BoxEncodingType::CORNER: - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - break; - default: - throw ngraph_error("NonMaxSuppression layer " + nms1->get_friendly_name() + - " has unsupported box encoding"); - } - - attrs.sort_result_descending = nms1->get_sort_result_descending(); - - return attrs; + break; + default: + throw ngraph_error("NonMaxSuppression layer " + nms1->get_friendly_name() + " has unsupported box encoding"); } - NMSAttributes get_nms_attrs(const std::shared_ptr& root) { - NMSAttributes attrs; - attrs.output_type = ::ngraph::element::i64; - attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; - attrs.sort_result_descending = false; - attrs.is_supported_nms = false; + attrs.sort_result_descending = nms1->get_sort_result_descending(); - auto nms_4 = std::dynamic_pointer_cast(root); - if (nms_4) { - return get_nms4_attrs(nms_4); - } - auto nms_3 = std::dynamic_pointer_cast(root); - if (nms_3) { - return get_nms3_attrs(nms_3); - } - auto nms_1 = std::dynamic_pointer_cast(root); - if (nms_1) { - return get_nms1_attrs(nms_1); - } + return attrs; +} - return attrs; +NMSAttributes get_nms_attrs(const std::shared_ptr& root) { + NMSAttributes attrs; + attrs.output_type = ::ngraph::element::i64; + attrs.box_encoding = ::ngraph::opset5::NonMaxSuppression::BoxEncodingType::CORNER; + attrs.sort_result_descending = false; + attrs.is_supported_nms = false; + + auto nms_4 = std::dynamic_pointer_cast(root); + if (nms_4) { + return get_nms4_attrs(nms_4); + } + auto nms_3 = std::dynamic_pointer_cast(root); + if (nms_3) { + return get_nms3_attrs(nms_3); + } + auto nms_1 = std::dynamic_pointer_cast(root); + if (nms_1) { + return get_nms1_attrs(nms_1); } - bool callback_func(pattern::Matcher &m, pass::MatcherPass * impl) { - auto root = m.get_match_root(); + return attrs; +} - auto attrs = get_nms_attrs(root); - if (!attrs.is_supported_nms) { - return false; - } +bool callback_func(pattern::Matcher& m, pass::MatcherPass* impl) { + auto root = m.get_match_root(); - const auto new_args = root->input_values(); - - size_t num_of_args = new_args.size(); - - const auto& arg2 = num_of_args > 2 ? new_args.at(2) : ngraph::opset5::Constant::create(element::i64, Shape{}, {0}); - const auto& arg3 = num_of_args > 3 ? new_args.at(3) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); - const auto& arg4 = num_of_args > 4 ? new_args.at(4) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); - - const auto nms_5 = impl->register_new_node( - new_args.at(0), - new_args.at(1), - arg2, - arg3, - arg4, - attrs.box_encoding, - attrs.sort_result_descending, - attrs.output_type); - - nms_5->set_friendly_name(root->get_friendly_name()); - ngraph::copy_runtime_info(root, nms_5); - root->output(0).replace(nms_5->output(0)); - return true; + auto attrs = get_nms_attrs(root); + if (!attrs.is_supported_nms) { + return false; } -} // namespace + + const auto new_args = root->input_values(); + + size_t num_of_args = new_args.size(); + + const auto& arg2 = num_of_args > 2 ? new_args.at(2) : ngraph::opset5::Constant::create(element::i64, Shape{}, {0}); + const auto& arg3 = + num_of_args > 3 ? new_args.at(3) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); + const auto& arg4 = + num_of_args > 4 ? new_args.at(4) : ngraph::opset5::Constant::create(element::f32, Shape{}, {.0f}); + + const auto nms_5 = impl->register_new_node(new_args.at(0), + new_args.at(1), + arg2, + arg3, + arg4, + attrs.box_encoding, + attrs.sort_result_descending, + attrs.output_type); + + nms_5->set_friendly_name(root->get_friendly_name()); + ngraph::copy_runtime_info(root, nms_5); + root->output(0).replace(nms_5->output(0)); + return true; +} +} // namespace NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertNMS4ToNMS5, "ConvertNMS4ToNMS5", 0); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_prior_box_v8_to_v0.cpp b/src/common/transformations/src/transformations/op_conversions/convert_prior_box_v8_to_v0.cpp index c2b8a676484..467ad82f826 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_prior_box_v8_to_v0.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_prior_box_v8_to_v0.cpp @@ -41,7 +41,9 @@ ngraph::pass::ConvertPriorBox8To0::ConvertPriorBox8To0() { attrs_v0.variance = attrs_v8.variance; attrs_v0.scale_all_sizes = attrs_v8.scale_all_sizes; - auto prior_box_v0 = std::make_shared(prior_box_v8_node->input_value(0), prior_box_v8_node->input_value(1), attrs_v0); + auto prior_box_v0 = std::make_shared(prior_box_v8_node->input_value(0), + prior_box_v8_node->input_value(1), + attrs_v0); prior_box_v0->set_friendly_name(prior_box_v8_node->get_friendly_name()); ngraph::copy_runtime_info(prior_box_v8_node, prior_box_v0); ngraph::replace_node(prior_box_v8_node, prior_box_v0); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_reduce_to_pooling.cpp b/src/common/transformations/src/transformations/op_conversions/convert_reduce_to_pooling.cpp index 2010cad4dc6..2bdf5dbe02c 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_reduce_to_pooling.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_reduce_to_pooling.cpp @@ -2,9 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_reduce_to_pooling.hpp" +#include "itt.hpp" + NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceToPooling, "ConvertReduceToPooling", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceMeanToPooling, "ConvertReduceMeanToPooling", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceMaxToPooling, "ConvertReduceMaxToPooling", 0); @@ -12,24 +13,28 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertReduceSumToPooling, "ConvertReduceSu ngraph::pass::ConvertReduceMeanToPooling::ConvertReduceMeanToPooling() { MATCHER_SCOPE(ConvertReduceMeanToPooling); - auto m = std::make_shared(ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape()), - pattern::wrap_type()}, - pattern::has_static_shape()), matcher_name); + auto m = std::make_shared( + ngraph::pattern::wrap_type( + {pattern::any_input(pattern::has_static_shape()), pattern::wrap_type()}, + pattern::has_static_shape()), + matcher_name); register_matcher(m, convert_reduce_to_pooling()); } ngraph::pass::ConvertReduceMaxToPooling::ConvertReduceMaxToPooling() { MATCHER_SCOPE(ConvertReduceMaxToPooling); auto m = std::make_shared( - ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape()), - pattern::wrap_type()}, - pattern::has_static_shape()), matcher_name); + ngraph::pattern::wrap_type( + {pattern::any_input(pattern::has_static_shape()), pattern::wrap_type()}, + pattern::has_static_shape()), + matcher_name); register_matcher(m, convert_reduce_to_pooling()); } ngraph::pass::ConvertReduceSumToPooling::ConvertReduceSumToPooling() { MATCHER_SCOPE(ConvertReduceSumToPooling); auto m = std::make_shared( - ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape()), - pattern::wrap_type()}, - pattern::has_static_shape()), matcher_name); + ngraph::pattern::wrap_type( + {pattern::any_input(pattern::has_static_shape()), pattern::wrap_type()}, + pattern::has_static_shape()), + matcher_name); register_matcher(m, convert_reduce_to_pooling()); } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp b/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp index 60ee1a46ab7..5aea4234244 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_scatter_elements_to_scatter.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_scatter_elements_to_scatter.hpp" #include -#include -#include - #include #include #include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertScatterElementsToScatter, "ConvertScatterElementsToScatter", 0); @@ -30,7 +30,8 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { auto scatter = m.get_match_root(); auto broadcast = scatter->input_value(1).get_node_shared_ptr(); - auto axis_const = std::dynamic_pointer_cast(scatter->input_value(3).get_node_shared_ptr()); + auto axis_const = + std::dynamic_pointer_cast(scatter->input_value(3).get_node_shared_ptr()); if (!axis_const) { return false; @@ -43,7 +44,8 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() const auto updates_pshape = scatter->input(2).get_partial_shape(); // Check that ScatterElementsUpdate and Broadcast inputs has static shapes - if (data_pshape.rank().is_dynamic() || indices_pshape.rank().is_dynamic() || updates_pshape.rank().is_dynamic()) { + if (data_pshape.rank().is_dynamic() || indices_pshape.rank().is_dynamic() || + updates_pshape.rank().is_dynamic()) { return false; } @@ -51,45 +53,45 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() const uint64_t updates_rank = updates_pshape.rank().get_length(); const uint64_t indices_rank = indices_pshape.rank().get_length(); - // Check that axis Constant has {} or {1} shape if (shape_size(axis_const->get_shape()) > 1) { return false; } - const size_t axis = ngraph::normalize_axes(scatter->get_friendly_name(), - axis_const->cast_vector(), - data_pshape.rank())[0]; + const size_t axis = ngraph::normalize_axes(scatter->get_friendly_name(), + axis_const->cast_vector(), + data_pshape.rank())[0]; struct Range { uint64_t l, r; - Range(const uint64_t & l, const uint64_t & r) : l(l), r(r) { - if (l > r) throw ngraph_error("Range values are inconsistent"); + Range(const uint64_t& l, const uint64_t& r) : l(l), r(r) { + if (l > r) + throw ngraph_error("Range values are inconsistent"); } uint64_t size() const { return r - l; } - bool operator!= (const Range & rhs) const { + bool operator!=(const Range& rhs) const { return (r - l != rhs.r - rhs.l); } - static - bool is_valid(const int64_t & l, const int64_t & r) { + static bool is_valid(const int64_t& l, const int64_t& r) { return (l >= 0 && l <= r); } - static - bool is_empty(const uint64_t & l, const uint64_t & r) { + static bool is_empty(const uint64_t& l, const uint64_t& r) { return l == r; } }; - auto compare_shapes_ranges = [](const PartialShape & lhsShape, const PartialShape & rhsShape, const Range & lhsRange, const Range & rhsRange) -> bool { + auto compare_shapes_ranges = [](const PartialShape& lhsShape, + const PartialShape& rhsShape, + const Range& lhsRange, + const Range& rhsRange) -> bool { // Check that ranges are equal and suits to Shapes sizes - if (lhsRange != rhsRange || - lhsRange.r > static_cast(lhsShape.rank().get_length()) || + if (lhsRange != rhsRange || lhsRange.r > static_cast(lhsShape.rank().get_length()) || rhsRange.r > static_cast(rhsShape.rank().get_length())) { return false; } @@ -105,7 +107,7 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() return true; }; - auto product = [](const Shape & shape, const Range & range) -> uint64_t { + auto product = [](const Shape& shape, const Range& range) -> uint64_t { uint64_t prod(1); for (size_t dim = range.l; dim < range.r; ++dim) { prod *= shape[dim]; @@ -122,7 +124,8 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() * updates_shape[d_0, d_1, i_0(axis), i_1, ... , i_n, d_axis + 1, ... , d_n] * * EXAMPLE: - * In this example the input shapes are suits the rules above and ScatterElementsUpdate can be replaced with ScatterUpdate + * In this example the input shapes are suits the rules above and ScatterElementsUpdate can be replaced with + * ScatterUpdate * * axis = 1 | (axis) * \/ @@ -172,8 +175,10 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() const auto indices_shape = indices_pshape.get_shape(); Shape indices_new_shape(updates_shape.begin() + axis, updates_shape.begin() + updates_last.l); if (indices_shape != indices_new_shape) { - indices_input = std::make_shared(indices_input, - opset3::Constant::create(element::i64, Shape{indices_new_shape.size()}, indices_new_shape), false); + indices_input = std::make_shared( + indices_input, + opset3::Constant::create(element::i64, Shape{indices_new_shape.size()}, indices_new_shape), + false); new_ops.push_back(indices_input.get_node_shared_ptr()); } } else { @@ -182,7 +187,8 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() // 2. updates_pshape axis interval size = 1 for (size_t dim = 1; dim < indices_rank; ++dim) { - if (indices_pshape[dim] != 1) return false; + if (indices_pshape[dim] != 1) + return false; } if (Range(axis, updates_last.l).size() != 1) { @@ -193,8 +199,9 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() if (indices_rank > 1) { std::vector squeeze_axes(indices_rank - 1ul); std::iota(squeeze_axes.begin(), squeeze_axes.end(), 1); - indices_input = std::make_shared(indices_input, - opset3::Constant::create(element::i64, Shape{squeeze_axes.size()}, squeeze_axes)); + indices_input = std::make_shared( + indices_input, + opset3::Constant::create(element::i64, Shape{squeeze_axes.size()}, squeeze_axes)); new_ops.push_back(indices_input.get_node_shared_ptr()); } } @@ -213,4 +220,3 @@ ngraph::pass::ConvertScatterElementsToScatter::ConvertScatterElementsToScatter() auto m = std::make_shared(scatter, matcher_name); register_matcher(m, callback); } - diff --git a/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp b/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp index d57dfb1b8e4..38e51cd151d 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_sequences_to_tensor_iterator.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" -#include "ngraph/builder/autobroadcast.hpp" #include "transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" +#include "ngraph/builder/autobroadcast.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertRNNSequenceToTensorIterator, "ConvertRNNSequenceToTensorIterator", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertGRUSequenceToTensorIterator, "ConvertGRUSequenceToTensorIterator", 0); @@ -20,285 +20,292 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertLSTMSequenceToTensorIterator, "Conve NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertSequenceToTensorIterator, "ConvertSequenceToTensorIterator", 0); namespace { - ngraph::Output get_current_iter(ngraph::ParameterVector &body_params, - ngraph::ResultVector &body_results, - const ngraph::Output &seq_lengths) { - auto curr_iter_body_param = std::make_shared(seq_lengths.get_element_type(), - ngraph::Shape{1}); - // increment current iteration - auto one = ngraph::opset5::Constant::create(seq_lengths.get_element_type(), ngraph::Shape{1}, {1}); - auto add = std::make_shared(curr_iter_body_param, one); - auto curr_iter_result = std::make_shared(add); - body_params.push_back(curr_iter_body_param); - body_results.push_back(curr_iter_result); - return curr_iter_body_param; +ngraph::Output get_current_iter(ngraph::ParameterVector& body_params, + ngraph::ResultVector& body_results, + const ngraph::Output& seq_lengths) { + auto curr_iter_body_param = + std::make_shared(seq_lengths.get_element_type(), ngraph::Shape{1}); + // increment current iteration + auto one = ngraph::opset5::Constant::create(seq_lengths.get_element_type(), ngraph::Shape{1}, {1}); + auto add = std::make_shared(curr_iter_body_param, one); + auto curr_iter_result = std::make_shared(add); + body_params.push_back(curr_iter_body_param); + body_results.push_back(curr_iter_result); + return curr_iter_body_param; +} + +ngraph::Output get_masked_value(const std::shared_ptr& ti, + ngraph::ParameterVector& body_params, + ngraph::ResultVector& body_results, + const ngraph::Output& current_iter, + const ngraph::Output& data, + const ngraph::Output& seq_lengths) { + // body parameters + auto aggregated_Y_h_body_param = + std::make_shared(data.get_element_type(), data.get_partial_shape()); + + body_params.push_back(aggregated_Y_h_body_param); + + // Create mask node deciding whether or not to mask batch data. + auto batch_seq_length = ngraph::builder::opset1::legacy_broadcast_for_binary_operation(data, seq_lengths, 0); + + auto mask_condition = std::make_shared(current_iter, batch_seq_length); + auto mask_Y_h = std::make_shared(current_iter, batch_seq_length); + + // Select values depending on mask. + // Select(, , ) + auto select_aggregated_H = std::make_shared(mask_Y_h, data, aggregated_Y_h_body_param); + auto aggregated_result = std::make_shared(select_aggregated_H); + body_results.push_back(aggregated_result); + + auto scalar_mask_value = ngraph::opset5::Constant::create(data.get_element_type(), {}, {0.f}); + auto data_shape = ngraph::op::util::make_try_fold(data); + auto mask_value = ngraph::op::util::make_try_fold(scalar_mask_value, data_shape); + return ngraph::op::util::make_try_fold(mask_condition, mask_value, data); +} + +bool convert_sequence_to_ti(const std::shared_ptr& sequence, + const ngraph::Output& X, + const ngraph::Output& H_t, + const ngraph::Output& C_t, + const ngraph::Output& seq_lengths, + const ngraph::Output& W, + const ngraph::Output& R, + const ngraph::Output& B, + const ngraph::op::RecurrentSequenceDirection& direction) { + auto X_pshape = X.get_partial_shape(); + if (X_pshape.size() < 2 || X_pshape[1].is_dynamic()) { + return false; } - ngraph::Output get_masked_value(const std::shared_ptr &ti, - ngraph::ParameterVector &body_params, - ngraph::ResultVector &body_results, - const ngraph::Output ¤t_iter, - const ngraph::Output &data, - const ngraph::Output &seq_lengths) { - // body parameters - auto aggregated_Y_h_body_param = std::make_shared(data.get_element_type(), data.get_partial_shape()); + auto max_seq_len = X_pshape[1].get_length(); + bool enable_mask = ngraph::op::util::is_seq_len_provided(seq_lengths.get_node_shared_ptr(), max_seq_len); - body_params.push_back(aggregated_Y_h_body_param); - - // Create mask node deciding whether or not to mask batch data. - auto batch_seq_length = ngraph::builder::opset1::legacy_broadcast_for_binary_operation(data, seq_lengths, 0); - - auto mask_condition = std::make_shared(current_iter, batch_seq_length); - auto mask_Y_h = std::make_shared(current_iter, batch_seq_length); - - // Select values depending on mask. - // Select(, , ) - auto select_aggregated_H = std::make_shared(mask_Y_h, data, aggregated_Y_h_body_param); - auto aggregated_result = std::make_shared(select_aggregated_H); - body_results.push_back(aggregated_result); - - auto scalar_mask_value = ngraph::opset5::Constant::create(data.get_element_type(), {}, { 0.f }); - auto data_shape = ngraph::op::util::make_try_fold(data); - auto mask_value = ngraph::op::util::make_try_fold(scalar_mask_value, data_shape); - return ngraph::op::util::make_try_fold(mask_condition, mask_value, data); + const bool is_reverse = direction == ngraph::op::RecurrentSequenceDirection::REVERSE; + std::shared_ptr reverse_seq_before; + if (is_reverse && enable_mask) { + reverse_seq_before = std::make_shared(X, seq_lengths, 0, 1); } - bool convert_sequence_to_ti( - const std::shared_ptr& sequence, - const ngraph::Output& X, - const ngraph::Output& H_t, - const ngraph::Output& C_t, - const ngraph::Output& seq_lengths, - const ngraph::Output& W, - const ngraph::Output& R, - const ngraph::Output& B, - const ngraph::op::RecurrentSequenceDirection& direction) { - auto X_pshape = X.get_partial_shape(); - if (X_pshape.size() < 2 || X_pshape[1].is_dynamic()) { - return false; - } + auto axis_0 = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}); + auto axis_1 = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); - auto max_seq_len = X_pshape[1].get_length(); - bool enable_mask = ngraph::op::util::is_seq_len_provided(seq_lengths.get_node_shared_ptr(), max_seq_len); + // TensorIterator Body: begin + auto X_param_pshape = X_pshape; + X_param_pshape[1] = 1; // split by seq_lengths dimension + auto X_body_param = std::make_shared(X.get_element_type(), X_param_pshape); - const bool is_reverse = direction == ngraph::op::RecurrentSequenceDirection::REVERSE; - std::shared_ptr reverse_seq_before; - if (is_reverse && enable_mask) { - reverse_seq_before = std::make_shared(X, seq_lengths, 0, 1); - } + const auto squeezed_h = ngraph::op::util::make_try_fold(H_t, axis_1); + auto H_body_param = std::make_shared(squeezed_h->get_element_type(), + squeezed_h->get_output_partial_shape(0)); + auto seq_body_param = + std::make_shared(seq_lengths.get_element_type(), seq_lengths.get_partial_shape()); - auto axis_0 = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 0 }); - auto axis_1 = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 1 }); + // LSTM sequence case + const bool cell_state_defined = C_t.get_node_shared_ptr() != nullptr; + std::shared_ptr C_body_param = nullptr; + std::shared_ptr squeezed_c = nullptr; + if (cell_state_defined) { + squeezed_c = ngraph::op::util::make_try_fold(C_t, axis_1); + C_body_param = std::make_shared(squeezed_c->get_element_type(), + squeezed_c->get_output_partial_shape(0)); + } - // TensorIterator Body: begin - auto X_param_pshape = X_pshape; - X_param_pshape[1] = 1; // split by seq_lengths dimension - auto X_body_param = std::make_shared(X.get_element_type(), X_param_pshape); + const auto squeezed_x = ngraph::op::util::make_try_fold(X_body_param, axis_1); + const auto squeezed_w = ngraph::op::util::make_try_fold(W, axis_0); + const auto squeezed_r = ngraph::op::util::make_try_fold(R, axis_0); + const auto squeezed_b = ngraph::op::util::make_try_fold(B, axis_0); - const auto squeezed_h = ngraph::op::util::make_try_fold(H_t, axis_1); - auto H_body_param = std::make_shared(squeezed_h->get_element_type(), squeezed_h->get_output_partial_shape(0)); - auto seq_body_param = std::make_shared(seq_lengths.get_element_type(), seq_lengths.get_partial_shape()); + std::shared_ptr cell; + if (const auto lstm_sequence = ngraph::as_type_ptr(sequence)) { + cell = std::make_shared(squeezed_x, + H_body_param, + C_body_param, + squeezed_w, + squeezed_r, + squeezed_b, + lstm_sequence->get_hidden_size(), + lstm_sequence->get_activations(), + lstm_sequence->get_activations_alpha(), + lstm_sequence->get_activations_beta(), + lstm_sequence->get_clip()); + } else if (const auto rnn_sequence = ngraph::as_type_ptr(sequence)) { + cell = std::make_shared(squeezed_x, + H_body_param, + squeezed_w, + squeezed_r, + squeezed_b, + rnn_sequence->get_hidden_size(), + rnn_sequence->get_activations(), + rnn_sequence->get_activations_alpha(), + rnn_sequence->get_activations_beta(), + rnn_sequence->get_clip()); + } else if (const auto gnn_sequence = ngraph::as_type_ptr(sequence)) { + cell = std::make_shared(squeezed_x, + H_body_param, + squeezed_w, + squeezed_r, + squeezed_b, + gnn_sequence->get_hidden_size(), + gnn_sequence->get_activations(), + gnn_sequence->get_activations_alpha(), + gnn_sequence->get_activations_beta(), + gnn_sequence->get_clip(), + gnn_sequence->get_linear_before_reset()); + } else { + return false; + } - // LSTM sequence case - const bool cell_state_defined = C_t.get_node_shared_ptr() != nullptr; - std::shared_ptr C_body_param = nullptr; - std::shared_ptr squeezed_c = nullptr; - if (cell_state_defined) { - squeezed_c = ngraph::op::util::make_try_fold(C_t, axis_1); - C_body_param = std::make_shared(squeezed_c->get_element_type(), squeezed_c->get_output_partial_shape(0)); - } + ngraph::ParameterVector body_params; + ngraph::ResultVector body_results; - const auto squeezed_x = ngraph::op::util::make_try_fold(X_body_param, axis_1); - const auto squeezed_w = ngraph::op::util::make_try_fold(W, axis_0); - const auto squeezed_r = ngraph::op::util::make_try_fold(R, axis_0); - const auto squeezed_b = ngraph::op::util::make_try_fold(B, axis_0); + ngraph::Output hidden_state = cell->output(0); + ngraph::Output cell_state; + if (cell_state_defined) + cell_state = cell->output(1); - std::shared_ptr cell; - if (const auto lstm_sequence = ngraph::as_type_ptr(sequence)) { - cell = std::make_shared( - squeezed_x, - H_body_param, - C_body_param, - squeezed_w, - squeezed_r, - squeezed_b, - lstm_sequence->get_hidden_size(), - lstm_sequence->get_activations(), - lstm_sequence->get_activations_alpha(), - lstm_sequence->get_activations_beta(), - lstm_sequence->get_clip()); - } else if (const auto rnn_sequence = ngraph::as_type_ptr(sequence)) { - cell = std::make_shared( - squeezed_x, - H_body_param, - squeezed_w, - squeezed_r, - squeezed_b, - rnn_sequence->get_hidden_size(), - rnn_sequence->get_activations(), - rnn_sequence->get_activations_alpha(), - rnn_sequence->get_activations_beta(), - rnn_sequence->get_clip()); - } else if (const auto gnn_sequence = ngraph::as_type_ptr(sequence)) { - cell = std::make_shared( - squeezed_x, - H_body_param, - squeezed_w, - squeezed_r, - squeezed_b, - gnn_sequence->get_hidden_size(), - gnn_sequence->get_activations(), - gnn_sequence->get_activations_alpha(), - gnn_sequence->get_activations_beta(), - gnn_sequence->get_clip(), - gnn_sequence->get_linear_before_reset()); + auto tensor_iterator = std::make_shared(); + if (enable_mask) { + const auto current_iter = get_current_iter(body_params, body_results, seq_body_param); + hidden_state = + get_masked_value(tensor_iterator, body_params, body_results, current_iter, hidden_state, seq_body_param); + if (cell_state_defined) + cell_state = + get_masked_value(tensor_iterator, body_params, body_results, current_iter, cell_state, seq_body_param); + } + + auto H_res = std::make_shared(hidden_state); + auto C_res = cell_state_defined ? std::make_shared(cell_state) : nullptr; + auto hidden_state_unsqueezed = std::make_shared(hidden_state, axis_1); + auto concat_res = std::make_shared(hidden_state_unsqueezed); + + body_params.push_back(X_body_param); + body_params.push_back(H_body_param); + if (cell_state_defined) + body_params.push_back(C_body_param); + body_params.push_back(seq_body_param); + + body_results.push_back(concat_res); + body_results.push_back(H_res); + if (cell_state_defined) + body_results.push_back(C_res); + + auto body = std::make_shared(body_results, body_params); + tensor_iterator->set_function(body); + // TensorIterator Body: end + if (is_reverse) { + if (!enable_mask) { + // Reversed order, stride -1 + tensor_iterator->set_sliced_input(X_body_param, X, -1, -1, 1, 0, 1); + tensor_iterator->get_concatenated_slices(concat_res, -1, -1, 1, 0, 1); } else { - return false; - } - - ngraph::ParameterVector body_params; - ngraph::ResultVector body_results; - - ngraph::Output hidden_state = cell->output(0); - ngraph::Output cell_state; - if (cell_state_defined) - cell_state = cell->output(1); - - auto tensor_iterator = std::make_shared(); - if (enable_mask) { - const auto current_iter = get_current_iter(body_params, body_results, seq_body_param); - hidden_state = get_masked_value(tensor_iterator, body_params, body_results, current_iter, hidden_state, seq_body_param); - if (cell_state_defined) - cell_state = get_masked_value(tensor_iterator, body_params, body_results, current_iter, cell_state, seq_body_param); - } - - auto H_res = std::make_shared(hidden_state); - auto C_res = cell_state_defined ? std::make_shared(cell_state) : nullptr; - auto hidden_state_unsqueezed = std::make_shared(hidden_state, axis_1); - auto concat_res = std::make_shared(hidden_state_unsqueezed); - - body_params.push_back(X_body_param); - body_params.push_back(H_body_param); - if (cell_state_defined) - body_params.push_back(C_body_param); - body_params.push_back(seq_body_param); - - body_results.push_back(concat_res); - body_results.push_back(H_res); - if (cell_state_defined) - body_results.push_back(C_res); - - auto body = std::make_shared(body_results, body_params); - tensor_iterator->set_function(body); - // TensorIterator Body: end - if (is_reverse) { - if (!enable_mask) { - // Reversed order, stride -1 - tensor_iterator->set_sliced_input(X_body_param, X, -1, -1, 1, 0, 1); - tensor_iterator->get_concatenated_slices(concat_res, -1, -1, 1, 0, 1); - } else { - // use ReverseSequence as initializer - tensor_iterator->set_sliced_input(X_body_param, reverse_seq_before, 0, 1, 1, -1, 1); - tensor_iterator->get_concatenated_slices(concat_res, 0, 1, 1, -1, 1); - } - } else { - // forward order - tensor_iterator->set_sliced_input(X_body_param, X, 0, 1, 1, -1, 1); + // use ReverseSequence as initializer + tensor_iterator->set_sliced_input(X_body_param, reverse_seq_before, 0, 1, 1, -1, 1); tensor_iterator->get_concatenated_slices(concat_res, 0, 1, 1, -1, 1); } - - tensor_iterator->set_merged_input(H_body_param, squeezed_h, H_res); - if (cell_state_defined) - tensor_iterator->set_merged_input(C_body_param, squeezed_c, C_res); - tensor_iterator->set_invariant_input(seq_body_param, seq_lengths); - - ngraph::Output H_out = H_res; - ngraph::Output C_out = C_res; - if (enable_mask) { - // create initial values for body_parameters in outer graph - // aggregated Y_h - concatenation of the last non-zero values for each batch - auto H_body_param_shape = ngraph::op::util::make_try_fold(H_body_param); - auto aggregated_Y_h_scalar = ngraph::opset5::Constant::create(H_body_param->get_element_type(), {}, { 0.f }); - auto aggregated_Y_h = ngraph::op::util::make_try_fold(aggregated_Y_h_scalar, H_body_param_shape); - - auto init_val_curr_iter = ngraph::opset5::Constant::create(seq_lengths.get_element_type(), ngraph::Shape{ 1 }, { 1 }); - ngraph::copy_runtime_info(sequence, { aggregated_Y_h, init_val_curr_iter }); - - // set initial value and back edge for current iteration - tensor_iterator->set_merged_input(body_params.at(0), init_val_curr_iter, body_results.at(0)); - // set initial value and back edge for aggregated H - tensor_iterator->set_merged_input(body_params.at(1), aggregated_Y_h, body_results.at(1)); - - H_out = tensor_iterator->get_function()->get_results()[1]; - - if (cell_state_defined) { - auto C_body_param_shape = ngraph::op::util::make_try_fold(C_body_param); - auto aggregated_Y_c_scalar = ngraph::opset5::Constant::create(C_body_param->get_element_type(), {}, { 0.f }); - auto aggregated_Y_c = ngraph::op::util::make_try_fold(aggregated_Y_c_scalar, C_body_param_shape); - ngraph::copy_runtime_info(sequence, aggregated_Y_c); - - // set initial value and back edge for aggregated C - tensor_iterator->set_merged_input(body_params.at(2), aggregated_Y_c, body_results.at(2)); - C_out = tensor_iterator->get_function()->get_results()[2]; - } - } - - tensor_iterator->get_iter_value(H_out); - if (cell_state_defined) - tensor_iterator->get_iter_value(C_out); - tensor_iterator->set_friendly_name(sequence->get_friendly_name()); - ngraph::NodeVector new_nodes{squeezed_h, tensor_iterator}; - if (cell_state_defined) - new_nodes.push_back(squeezed_c); - ngraph::OutputVector nodes_to_replace; - if (enable_mask && is_reverse) { - auto reverse_seq_after = std::make_shared(tensor_iterator->output(0), seq_lengths, 0, 1); - // Resolve a collision of names data nodes in CNN Network in Reverse case with mask. - /* - * Before transformation (no collisions) - * RNN/LSTM/GRU Sequence [rnn_name] -- (data_node: rnn_name.0) - > Result1 - * -- (data_node: rnn_name.1) - > Result2 - * - * - * After transformation (without identity, there are collisions): - * We need to set rnn_name.0 to RevSequence to store result name. - * TI [rnn_name] -- (DATA_NODE: rnn_name.0) --> RevSequence [rnn_name.0] -- (DATA_NODE: rnn_name.0) -> Result1 - * -- (data_node: rnn_name.1) --> Result2 - * - * - * After transformation (with identity, no collisions): - * TI has other_name, but it doesn't affect result names due TI is not connected to Results directly. - * TI [other_name] -- (data_node: other_name.0) --> RevSequence [rnn_name.0] -- (data_node: rnn_name.0) -> Result1 - * -- (data_node: other_name.1) --> Identity(rnn_name.1) -- (data_node: rnn_name.1) -> Result2 - */ - new_nodes.push_back(reverse_seq_before); - new_nodes.push_back(reverse_seq_after); - nodes_to_replace.push_back(reverse_seq_after); - nodes_to_replace.push_back(tensor_iterator->output(1)); - - if (cell_state_defined) { - auto cell_state = tensor_iterator->output(2); - new_nodes.emplace_back(cell_state.get_node_shared_ptr()); - nodes_to_replace.emplace_back(cell_state); - } - - tensor_iterator->set_friendly_name(sequence->get_friendly_name() + "/tensor_iterator"); - } else { - nodes_to_replace = tensor_iterator->outputs(); - } - - for (size_t i = 0; i < nodes_to_replace.size(); i++) { - auto unsqueeze = std::make_shared(nodes_to_replace[i], axis_1); - unsqueeze->set_friendly_name(sequence->get_friendly_name() + "." + std::to_string(i)); - nodes_to_replace[i] = unsqueeze; - new_nodes.push_back(unsqueeze); - } - ngraph::copy_runtime_info(sequence, new_nodes); - ngraph::replace_node(sequence, nodes_to_replace); - - return true; + } else { + // forward order + tensor_iterator->set_sliced_input(X_body_param, X, 0, 1, 1, -1, 1); + tensor_iterator->get_concatenated_slices(concat_res, 0, 1, 1, -1, 1); } -} // namespace + + tensor_iterator->set_merged_input(H_body_param, squeezed_h, H_res); + if (cell_state_defined) + tensor_iterator->set_merged_input(C_body_param, squeezed_c, C_res); + tensor_iterator->set_invariant_input(seq_body_param, seq_lengths); + + ngraph::Output H_out = H_res; + ngraph::Output C_out = C_res; + if (enable_mask) { + // create initial values for body_parameters in outer graph + // aggregated Y_h - concatenation of the last non-zero values for each batch + auto H_body_param_shape = ngraph::op::util::make_try_fold(H_body_param); + auto aggregated_Y_h_scalar = ngraph::opset5::Constant::create(H_body_param->get_element_type(), {}, {0.f}); + auto aggregated_Y_h = + ngraph::op::util::make_try_fold(aggregated_Y_h_scalar, H_body_param_shape); + + auto init_val_curr_iter = + ngraph::opset5::Constant::create(seq_lengths.get_element_type(), ngraph::Shape{1}, {1}); + ngraph::copy_runtime_info(sequence, {aggregated_Y_h, init_val_curr_iter}); + + // set initial value and back edge for current iteration + tensor_iterator->set_merged_input(body_params.at(0), init_val_curr_iter, body_results.at(0)); + // set initial value and back edge for aggregated H + tensor_iterator->set_merged_input(body_params.at(1), aggregated_Y_h, body_results.at(1)); + + H_out = tensor_iterator->get_function()->get_results()[1]; + + if (cell_state_defined) { + auto C_body_param_shape = ngraph::op::util::make_try_fold(C_body_param); + auto aggregated_Y_c_scalar = ngraph::opset5::Constant::create(C_body_param->get_element_type(), {}, {0.f}); + auto aggregated_Y_c = + ngraph::op::util::make_try_fold(aggregated_Y_c_scalar, C_body_param_shape); + ngraph::copy_runtime_info(sequence, aggregated_Y_c); + + // set initial value and back edge for aggregated C + tensor_iterator->set_merged_input(body_params.at(2), aggregated_Y_c, body_results.at(2)); + C_out = tensor_iterator->get_function()->get_results()[2]; + } + } + + tensor_iterator->get_iter_value(H_out); + if (cell_state_defined) + tensor_iterator->get_iter_value(C_out); + tensor_iterator->set_friendly_name(sequence->get_friendly_name()); + ngraph::NodeVector new_nodes{squeezed_h, tensor_iterator}; + if (cell_state_defined) + new_nodes.push_back(squeezed_c); + ngraph::OutputVector nodes_to_replace; + if (enable_mask && is_reverse) { + auto reverse_seq_after = + std::make_shared(tensor_iterator->output(0), seq_lengths, 0, 1); + // Resolve a collision of names data nodes in CNN Network in Reverse case with mask. + /* + * Before transformation (no collisions) + * RNN/LSTM/GRU Sequence [rnn_name] -- (data_node: rnn_name.0) - > Result1 + * -- (data_node: rnn_name.1) - > Result2 + * + * + * After transformation (without identity, there are collisions): + * We need to set rnn_name.0 to RevSequence to store result name. + * TI [rnn_name] -- (DATA_NODE: rnn_name.0) --> RevSequence [rnn_name.0] -- (DATA_NODE: rnn_name.0) -> Result1 + * -- (data_node: rnn_name.1) --> Result2 + * + * + * After transformation (with identity, no collisions): + * TI has other_name, but it doesn't affect result names due TI is not connected to Results directly. + * TI [other_name] -- (data_node: other_name.0) --> RevSequence [rnn_name.0] -- (data_node: rnn_name.0) -> + * Result1 + * -- (data_node: other_name.1) --> Identity(rnn_name.1) -- (data_node: rnn_name.1) -> Result2 + */ + new_nodes.push_back(reverse_seq_before); + new_nodes.push_back(reverse_seq_after); + nodes_to_replace.push_back(reverse_seq_after); + nodes_to_replace.push_back(tensor_iterator->output(1)); + + if (cell_state_defined) { + auto cell_state = tensor_iterator->output(2); + new_nodes.emplace_back(cell_state.get_node_shared_ptr()); + nodes_to_replace.emplace_back(cell_state); + } + + tensor_iterator->set_friendly_name(sequence->get_friendly_name() + "/tensor_iterator"); + } else { + nodes_to_replace = tensor_iterator->outputs(); + } + + for (size_t i = 0; i < nodes_to_replace.size(); i++) { + auto unsqueeze = std::make_shared(nodes_to_replace[i], axis_1); + unsqueeze->set_friendly_name(sequence->get_friendly_name() + "." + std::to_string(i)); + nodes_to_replace[i] = unsqueeze; + new_nodes.push_back(unsqueeze); + } + ngraph::copy_runtime_info(sequence, new_nodes); + ngraph::replace_node(sequence, nodes_to_replace); + + return true; +} +} // namespace ngraph::pass::ConvertRNNSequenceToTensorIterator::ConvertRNNSequenceToTensorIterator() { MATCHER_SCOPE(ConvertRNNSequenceToTensorIterator); @@ -310,24 +317,33 @@ ngraph::pass::ConvertRNNSequenceToTensorIterator::ConvertRNNSequenceToTensorIter auto B_m = pattern::any_input(); auto rnn_seq = ngraph::pattern::wrap_type({X_m, H_t_m, seq_lengths_m, W_m, R_m, B_m}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto sequence = ngraph::as_type_ptr(m.get_match_root()); // Bidirectional Sequence op should be decomposed to Reverse + Forward // (e.g. apply BidirectionalRNNSequenceDecomposition transformation before this one) - if (!sequence || sequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL || transformation_callback(sequence)) { + if (!sequence || sequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL || + transformation_callback(sequence)) { return false; } const auto& pattern_map = m.get_pattern_value_map(); - const auto& X = pattern_map.at(X_m); // split - const auto& H_t = pattern_map.at(H_t_m); // merged (init value + back edge) - const auto& seq_lengths = pattern_map.at(seq_lengths_m); // invariant - const auto& W = pattern_map.at(W_m); // const in the body - const auto& R = pattern_map.at(R_m); // const in the body - const auto& B = pattern_map.at(B_m); // const in the body + const auto& X = pattern_map.at(X_m); // split + const auto& H_t = pattern_map.at(H_t_m); // merged (init value + back edge) + const auto& seq_lengths = pattern_map.at(seq_lengths_m); // invariant + const auto& W = pattern_map.at(W_m); // const in the body + const auto& R = pattern_map.at(R_m); // const in the body + const auto& B = pattern_map.at(B_m); // const in the body - return convert_sequence_to_ti(sequence, X, H_t, Output(), seq_lengths, W, R, B, sequence->get_direction()); + return convert_sequence_to_ti(sequence, + X, + H_t, + Output(), + seq_lengths, + W, + R, + B, + sequence->get_direction()); }; auto m = std::make_shared(rnn_seq, matcher_name); @@ -344,12 +360,13 @@ ngraph::pass::ConvertGRUSequenceToTensorIterator::ConvertGRUSequenceToTensorIter auto B_m = pattern::any_input(); auto gru_seq = ngraph::pattern::wrap_type({X_m, H_t_m, seq_lengths_m, W_m, R_m, B_m}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto sequence = ngraph::as_type_ptr(m.get_match_root()); // Bidirectional Sequence op should be decomposed to Reverse + Forward // (e.g. apply BidirectionalRNNSequenceDecomposition transformation before this one) - if (!sequence || sequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL || transformation_callback(sequence)) { + if (!sequence || sequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL || + transformation_callback(sequence)) { return false; } @@ -361,7 +378,15 @@ ngraph::pass::ConvertGRUSequenceToTensorIterator::ConvertGRUSequenceToTensorIter const auto& R = pattern_map.at(R_m); // const in the body const auto& B = pattern_map.at(B_m); // const in the body - return convert_sequence_to_ti(sequence, X, H_t, Output(), seq_lengths, W, R, B, sequence->get_direction()); + return convert_sequence_to_ti(sequence, + X, + H_t, + Output(), + seq_lengths, + W, + R, + B, + sequence->get_direction()); }; auto m = std::make_shared(gru_seq, matcher_name); @@ -384,7 +409,8 @@ ngraph::pass::ConvertLSTMSequenceToTensorIterator::ConvertLSTMSequenceToTensorIt // Bidirectional Sequence op should be decomposed to Reverse + Forward // (e.g. apply BidirectionalRNNSequenceDecomposition transformation before this one) - if (!sequence || sequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL || transformation_callback(sequence)) { + if (!sequence || sequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL || + transformation_callback(sequence)) { return false; } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_shapeof3.cpp b/src/common/transformations/src/transformations/op_conversions/convert_shapeof3.cpp index cf7a84c19fa..ea550eab3a5 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_shapeof3.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_shapeof3.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_shapeof3.hpp" #include -#include - #include #include -#include #include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertShapeOf3, "ConvertShapeOf3", 0); @@ -20,7 +20,7 @@ ngraph::pass::ConvertShapeOf3::ConvertShapeOf3() { auto shapeof = pattern::wrap_type(); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto shapeof = std::dynamic_pointer_cast (m.get_match_root()); + auto shapeof = std::dynamic_pointer_cast(m.get_match_root()); if (!shapeof) { return false; } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp b/src/common/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp index 39cb0d15ed4..4efe3843293 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_shuffle_channels3.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_shuffle_channels3.hpp" #include -#include - #include #include -#include #include +#include +#include + +#include "itt.hpp" using namespace ngraph; @@ -21,7 +21,7 @@ ngraph::pass::ConvertShuffleChannels3::ConvertShuffleChannels3() { MATCHER_SCOPE(ConvertShuffleChannels3); auto shuffle_channels = pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto shuffle_channels = std::dynamic_pointer_cast<::opset3::ShuffleChannels>(m.get_match_root()); if (!shuffle_channels || transformation_callback(shuffle_channels)) { return false; @@ -51,42 +51,41 @@ ngraph::pass::ConvertShuffleChannels3::ConvertShuffleChannels3() { // get input tensor dimensions divided into parts with help of VariadicSplit auto split_input_dimensions = std::make_shared<::opset2::VariadicSplit>( - original_shape->output(0), - ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), - ::opset2::Constant::create(element::i64, Shape({split_lengts.size()}), split_lengts)); + original_shape->output(0), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{0}), + ::opset2::Constant::create(element::i64, Shape({split_lengts.size()}), split_lengts)); // calculate new dimension of the reshape. Start with two elements of {group, -1} ::OutputVector new_dimensions = { - ::opset2::Constant::create(element::i64, Shape({1}), std::vector{shuffle_group}), - ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1})}; + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{shuffle_group}), + ::opset2::Constant::create(element::i64, Shape({1}), std::vector{-1})}; // add more elements to the reshape output dimensions based on shuffle_axis std::vector transpose_order; if (shuffle_axis == 0) { new_dimensions.push_back( - std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true)); + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(1), reduce_axis_const, true)); transpose_order = {1, 0, 2}; } else if (shuffle_axis + 1 == input_rank) { - new_dimensions.insert(new_dimensions.begin(), - std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), - reduce_axis_const, true)); + new_dimensions.insert( + new_dimensions.begin(), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true)); transpose_order = {0, 2, 1}; } else { - new_dimensions.insert(new_dimensions.begin(), - std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), - reduce_axis_const, true)); + new_dimensions.insert( + new_dimensions.begin(), + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(0), reduce_axis_const, true)); new_dimensions.push_back( - std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)); + std::make_shared<::opset2::ReduceProd>(split_input_dimensions->output(2), reduce_axis_const, true)); transpose_order = {0, 2, 1, 3}; } // reshape the tensor to a new shape auto new_shape = std::make_shared<::opset2::Concat>(new_dimensions, 0); auto reshape = std::make_shared<::opset2::Reshape>(shuffle_channels->input_value(0), new_shape, false); // swap dimensions appearing after splitting the "shuffle_axis" dimension into two - auto transpose = std::make_shared<::opset2::Transpose>(reshape->output(0), - ::opset2::Constant::create(element::i64, - Shape({transpose_order.size()}), - transpose_order)); + auto transpose = std::make_shared<::opset2::Transpose>( + reshape->output(0), + ::opset2::Constant::create(element::i64, Shape({transpose_order.size()}), transpose_order)); // restore original shape auto reshape_back = std::make_shared<::opset2::Reshape>(transpose->output(0), original_shape->output(0), false); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp b/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp index 202041c1737..3ac3c409b47 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_slice_to_strided_slice.cpp @@ -2,28 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" +#include #include #include #include +#include -#include "transformations/op_conversions/convert_slice_to_strided_slice.hpp" -#include "transformations/utils/utils.hpp" +#include "itt.hpp" #include "ngraph/node.hpp" #include "ngraph/op/constant.hpp" #include "ngraph/op/util/op_types.hpp" #include "ngraph/validation_util.hpp" - -#include "itt.hpp" +#include "transformations/utils/utils.hpp" using namespace ngraph; NGRAPH_RTTI_DEFINITION(ngraph::pass::SliceToStridedSlice, "SliceToStridedSlice", 0); namespace { - Output align_indices(const Output& indices, +Output align_indices(const Output& indices, const Output& slice_axes, const Output& scatter_axis, size_t slice_indices_length, @@ -39,12 +38,13 @@ namespace { // axes: [2, 3] - apply slice values to 2 and 3 dimension of input data // expected_output_shape: {3, 3, 1, 1} - const auto default_indices = ngraph::opset8::Constant::create(indices.get_element_type(), Shape{slice_indices_length}, {fill_in_value}); - std::shared_ptr adjusted_indices = ngraph::op::util::make_try_fold( - default_indices, - slice_axes, - indices, // updates - scatter_axis); + const auto default_indices = + ngraph::opset8::Constant::create(indices.get_element_type(), Shape{slice_indices_length}, {fill_in_value}); + std::shared_ptr adjusted_indices = + ngraph::op::util::make_try_fold(default_indices, + slice_axes, + indices, // updates + scatter_axis); if (!ngraph::op::is_constant(adjusted_indices)) { new_ops.push_back(default_indices); @@ -95,8 +95,10 @@ ngraph::pass::SliceToStridedSlice::SliceToStridedSlice(bool use_shapes) { std::shared_ptr axes_const; if (slice_node->get_input_size() > 4) { - axes_const = use_shapes ? get_constant_from_source(slice_node->input_value(4)) - : std::dynamic_pointer_cast(slice_node->input_value(4).get_node_shared_ptr()); + axes_const = + use_shapes + ? get_constant_from_source(slice_node->input_value(4)) + : std::dynamic_pointer_cast(slice_node->input_value(4).get_node_shared_ptr()); } else { axes_const = slice_node->get_default_const_axes(start_input); } @@ -109,11 +111,9 @@ ngraph::pass::SliceToStridedSlice::SliceToStridedSlice(bool use_shapes) { auto norm_axes_vec = normalize_axes(slice_node->get_friendly_name(), axes_vec, data_shape.rank()); axes_vec = std::vector(norm_axes_vec.begin(), norm_axes_vec.end()); } else { - const bool need_normalization = std::any_of(axes_vec.begin(), - axes_vec.end(), - [](int64_t axis) { - return axis < 0; - }); + const bool need_normalization = std::any_of(axes_vec.begin(), axes_vec.end(), [](int64_t axis) { + return axis < 0; + }); if (need_normalization) return false; } @@ -133,9 +133,16 @@ ngraph::pass::SliceToStridedSlice::SliceToStridedSlice(bool use_shapes) { stop_input = align_indices(stop_input, slice_axes, scatter_axis, slice_indices_length, 0, new_ops); step_input = align_indices(step_input, slice_axes, scatter_axis, slice_indices_length, 1, new_ops); } - new_ops.insert(new_ops.end(), {start_input.get_node_shared_ptr(), stop_input.get_node_shared_ptr(), step_input.get_node_shared_ptr()}); + new_ops.insert( + new_ops.end(), + {start_input.get_node_shared_ptr(), stop_input.get_node_shared_ptr(), step_input.get_node_shared_ptr()}); - const auto strided_slice = std::make_shared(arg, start_input, stop_input, step_input, begin_end_mask, begin_end_mask); + const auto strided_slice = std::make_shared(arg, + start_input, + stop_input, + step_input, + begin_end_mask, + begin_end_mask); new_ops.push_back(strided_slice); strided_slice->set_friendly_name(slice_node->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp index 616465ff988..b8fdcd758e3 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_softmax_downgrade.cpp @@ -3,11 +3,13 @@ // #include "transformations/op_conversions/convert_softmax_downgrade.hpp" + #include #include -#include #include +#include #include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertSoftMax8ToSoftMax1, "ConvertSoftMax8ToSoftMax1", 0); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_softmax_upgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_softmax_upgrade.cpp index f47701ed0f7..438e860f285 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_softmax_upgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_softmax_upgrade.cpp @@ -3,10 +3,12 @@ // #include "transformations/op_conversions/convert_softmax_upgrade.hpp" + #include #include -#include #include +#include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertSoftMax1ToSoftMax8, "ConvertSoftMax1ToSoftMax8", 0); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_space_to_batch.cpp b/src/common/transformations/src/transformations/op_conversions/convert_space_to_batch.cpp index 1a98df74bf0..d9349f00cb7 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_space_to_batch.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_space_to_batch.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_space_to_batch.hpp" #include -#include - #include #include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertSpaceToBatch, "ConvertSpaceToBatch", 0); @@ -18,7 +18,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch() { MATCHER_SCOPE(ConvertSpaceToBatch_convert_space_to_batch); auto space_to_batch = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto space_to_batch = std::dynamic_pointer_cast (m.get_match_root()); + auto space_to_batch = std::dynamic_pointer_cast(m.get_match_root()); if (!space_to_batch) { return false; } @@ -41,7 +41,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch() { return false; } - const std::vector &block_values = block_const->cast_vector(); + const std::vector& block_values = block_const->cast_vector(); // Zero-pad the start and end of dimensions [D_0, ..., D_{N - 1}] of the input according to // `pads_begin` @@ -49,8 +49,8 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch() { // note: P_0 for batch dimension is expected to be 0 (no-padding). // x = [batch + P_0, D_1 + P_1, D_2 + P_2, ..., D_{N - 1} + P_{N - 1}], where P_i = // pads_begin[i] + pads_end[i] - std::shared_ptr flat_node = std::make_shared(data, pads_begin_const, pads_end_const, - ngraph::op::PadMode::CONSTANT); + std::shared_ptr flat_node = + std::make_shared(data, pads_begin_const, pads_end_const, ngraph::op::PadMode::CONSTANT); auto out_shape = flat_node->get_shape(); new_ops.push_back(flat_node); @@ -67,8 +67,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch() { dispersed_shape.push_back(block_values.at(i)); } - const auto out_pattern = - opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); + const auto out_pattern = opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); flat_node = std::make_shared(flat_node, out_pattern, false); new_ops.push_back(flat_node); @@ -83,9 +82,9 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch() { } const auto axes_order_const = - opset3::Constant::create(element::i64, - Shape{axes_order.size()}, - std::vector(axes_order.begin(), axes_order.end())); + opset3::Constant::create(element::i64, + Shape{axes_order.size()}, + std::vector(axes_order.begin(), axes_order.end())); flat_node = std::make_shared(flat_node, axes_order_const); new_ops.push_back(flat_node); @@ -103,8 +102,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch() { squeezed_shape.push_back(out_shape.at(i) / block_values.at(i)); } - const auto out_pattern_2 = - opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); + const auto out_pattern_2 = opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); flat_node = std::make_shared(flat_node, out_pattern_2, false); new_ops.push_back(flat_node); @@ -122,7 +120,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() { MATCHER_SCOPE(ConvertSpaceToBatch_convert_space_to_batch_by_elements); auto space_to_batch = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto space_to_batch = std::dynamic_pointer_cast (m.get_match_root()); + auto space_to_batch = std::dynamic_pointer_cast(m.get_match_root()); if (!space_to_batch) { return false; } @@ -149,11 +147,12 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() { if (!block_const || !pads_begin_const || !pads_end_const) { return false; } - const std::vector &block_values = block_const->cast_vector(); + const std::vector& block_values = block_const->cast_vector(); NodeVector new_ops; - std::shared_ptr flat_node = std::make_shared(data, pads_begin_const, pads_end_const, ngraph::op::PadMode::CONSTANT); + std::shared_ptr flat_node = + std::make_shared(data, pads_begin_const, pads_end_const, ngraph::op::PadMode::CONSTANT); new_ops.push_back(flat_node); auto out_shape = flat_node->get_shape(); @@ -168,7 +167,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() { dispersed_shape[shape_idx] = block_values[block_idx]; axes_order[0] = shape_idx; } else if (shape_idx == block_idx) { - dispersed_shape[shape_idx] = squeezed_shape[sq_shape_idx]/block_values[block_idx]; + dispersed_shape[shape_idx] = squeezed_shape[sq_shape_idx] / block_values[block_idx]; axes_order[axis_idx] = shape_idx; axis_idx--; sq_shape_idx--; @@ -181,13 +180,13 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() { } const auto out_pattern_1 = - opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); + opset3::Constant::create(element::i64, Shape{dispersed_shape.size()}, dispersed_shape); const bool special_zero = false; flat_node = std::make_shared(flat_node, out_pattern_1, special_zero); new_ops.push_back(flat_node); const auto axes_order_const = - opset3::Constant::create(element::i64, + opset3::Constant::create(element::i64, Shape{axes_order.size()}, std::vector(axes_order.begin(), axes_order.end())); flat_node = std::make_shared(flat_node, axes_order_const); @@ -195,7 +194,7 @@ void ngraph::pass::ConvertSpaceToBatch::convert_space_to_batch_by_elements() { squeezed_shape[0] *= block_values[block_idx]; squeezed_shape[block_idx] /= block_values[block_idx]; const auto out_pattern_2 = - opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); + opset3::Constant::create(element::i64, Shape{squeezed_shape.size()}, squeezed_shape); flat_node = std::make_shared(flat_node, out_pattern_2, special_zero); new_ops.push_back(flat_node); } diff --git a/src/common/transformations/src/transformations/op_conversions/convert_space_to_depth.cpp b/src/common/transformations/src/transformations/op_conversions/convert_space_to_depth.cpp index cce096721c4..632d0388f33 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_space_to_depth.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_space_to_depth.cpp @@ -2,24 +2,25 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertSpaceToDepth, "ConvertSpaceToDepth", 0); ngraph::pass::ConvertSpaceToDepth::ConvertSpaceToDepth() { MATCHER_SCOPE(ConvertSpaceToDepth); - auto dts = ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape())}); + auto dts = + ngraph::pattern::wrap_type({pattern::any_input(pattern::has_static_shape())}); ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { - auto std_node = std::dynamic_pointer_cast (m.get_match_root()); + auto std_node = std::dynamic_pointer_cast(m.get_match_root()); if (!std_node || transformation_callback(std_node)) { return false; } @@ -57,12 +58,12 @@ ngraph::pass::ConvertSpaceToDepth::ConvertSpaceToDepth() { } switch (mode) { - case ngraph::opset1::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST: - order.push_back(1); - break; - case ngraph::opset1::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST: - order.insert(order.begin() + 1, 1); - break; + case ngraph::opset1::SpaceToDepth::SpaceToDepthMode::BLOCKS_FIRST: + order.push_back(1); + break; + case ngraph::opset1::SpaceToDepth::SpaceToDepthMode::DEPTH_FIRST: + order.insert(order.begin() + 1, 1); + break; } for (size_t i = 0, j = 2; i < spatial_dims; ++i, j += 2) { @@ -78,7 +79,7 @@ ngraph::pass::ConvertSpaceToDepth::ConvertSpaceToDepth() { } shape_end.insert(shape_end.begin() + 1, C); - auto create_constant = [](std::vector & v) -> std::shared_ptr { + auto create_constant = [](std::vector& v) -> std::shared_ptr { return opset1::Constant::create(element::i64, Shape{v.size()}, v); }; diff --git a/src/common/transformations/src/transformations/op_conversions/convert_subtract.cpp b/src/common/transformations/src/transformations/op_conversions/convert_subtract.cpp index 8d8322ccfc6..83315561093 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_subtract.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_subtract.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_subtract.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertSubtract, "ConvertSubtract", 0); @@ -42,18 +42,19 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() { ov::is_type(child) || ov::is_type(child) || ov::is_type(child) || - (ov::is_type(child) && - (child->output(0).get_target_inputs().size() == 1ul) && - (ov::is_type(child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) || - ov::is_type(child->output(0).get_target_inputs().begin() - ->get_node()->shared_from_this())))) { + (ov::is_type(child) && (child->output(0).get_target_inputs().size() == 1ul) && + (ov::is_type( + child->output(0).get_target_inputs().begin()->get_node()->shared_from_this()) || + ov::is_type( + child->output(0).get_target_inputs().begin()->get_node()->shared_from_this())))) { const auto input1Type = sub->input(0).get_element_type(); const auto input2Type = sub->input(1).get_element_type(); if (((input1Type == element::u8) && (input2Type == element::u8)) || ((input1Type == element::i8) && (input2Type == element::i8))) { // we should not execute transformation by reasons: // 1. LPT asymmetric quantization pattern has to be keep as is - // 2. Subtract operation has unsigned/signed integer value which is not safe to multiply by -1 + // 2. Subtract operation has unsigned/signed integer value which is not safe to multiply by + // -1 return false; } } @@ -61,8 +62,9 @@ ngraph::pass::ConvertSubtract::ConvertSubtract() { } } - auto neg = std::make_shared(sub->input(1).get_source_output(), - opset1::Constant::create(sub->get_input_element_type(1), Shape{}, {-1})); + auto neg = std::make_shared( + sub->input(1).get_source_output(), + opset1::Constant::create(sub->get_input_element_type(1), Shape{}, {-1})); auto add = std::make_shared(sub->input(0).get_source_output(), neg); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp b/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp index 662efdcb73f..611b0632366 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_ti_to_sequences.cpp @@ -2,20 +2,20 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_ti_to_sequences.hpp" -#include "transformations/utils/utils.hpp" #include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include +#include "itt.hpp" +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToLSTMSequence, "ConvertTensorIteratorToLSTMSequence", 0); NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToRNNSequence, "ConvertTensorIteratorToRNNSequence", 0); @@ -23,27 +23,28 @@ NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToGRUSequence, "Conver NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTensorIteratorToSequence, "ConvertTensorIteratorToSequence", 0); namespace { -bool convertTensorIteratorToSequence( - const std::shared_ptr& ti, - const std::shared_ptr& found_cell, - const ngraph::Output& data, - const ngraph::Output& h_pattern, - const ngraph::Output& c_pattern, - const ngraph::Output& w_pattern, - const ngraph::Output& r_pattern, - const ngraph::Output& b_pattern, - const ngraph::Output& unsqueeze_after_cell) { +bool convertTensorIteratorToSequence(const std::shared_ptr& ti, + const std::shared_ptr& found_cell, + const ngraph::Output& data, + const ngraph::Output& h_pattern, + const ngraph::Output& c_pattern, + const ngraph::Output& w_pattern, + const ngraph::Output& r_pattern, + const ngraph::Output& b_pattern, + const ngraph::Output& unsqueeze_after_cell) { const auto& func = ti->get_function(); const auto& params = func->get_parameters(); std::vector> ordered_in_descs(3); int64_t stride = 0, slice_axis = 0; - // Remember the order of the X and initial_hidden_state (+ initial_cell_state in case of LSTM) in the TensorIterator params + // Remember the order of the X and initial_hidden_state (+ initial_cell_state in case of LSTM) in the TensorIterator + // params for (const auto& input_desc : ti->get_input_descriptions()) { auto param = params[input_desc->m_body_parameter_index]; if (param == data.get_node_shared_ptr()) { - auto slice_input = std::dynamic_pointer_cast(input_desc); + auto slice_input = + std::dynamic_pointer_cast(input_desc); if (!slice_input) return false; @@ -70,7 +71,8 @@ bool convertTensorIteratorToSequence( for (const auto& output_desc : ti->get_output_descriptions()) { std::shared_ptr res = results[output_desc->m_body_value_index]; if (res->input_value(0) == unsqueeze_after_cell) { - auto concat_output = std::dynamic_pointer_cast(output_desc); + auto concat_output = + std::dynamic_pointer_cast(output_desc); if (!concat_output) return false; @@ -92,21 +94,24 @@ bool convertTensorIteratorToSequence( X = std::make_shared(ti_inputs[ordered_in_descs[0]->m_input_index], order); } - // We must prepare cell inputs to sequence creation: insert num_directions elem via unsqueeze where needed (please, see specification) + // We must prepare cell inputs to sequence creation: insert num_directions elem via unsqueeze where needed (please, + // see specification) auto axis_1 = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); - auto initial_hidden_state = std::make_shared(ti_inputs[ordered_in_descs[1]->m_input_index], axis_1); + auto initial_hidden_state = + std::make_shared(ti_inputs[ordered_in_descs[1]->m_input_index], axis_1); // LSTM case - std::shared_ptr initial_cell_state = c_pattern.get_node_shared_ptr() == nullptr ? - nullptr : - std::make_shared(ti_inputs[ordered_in_descs[2]->m_input_index], axis_1); + std::shared_ptr initial_cell_state = + c_pattern.get_node_shared_ptr() == nullptr + ? nullptr + : std::make_shared(ti_inputs[ordered_in_descs[2]->m_input_index], axis_1); const size_t batch_dim = slice_axis == 0 ? 1 : 0; auto batch_dimension = ngraph::op::util::node_to_get_shape_value_of_indices_from_shape_source( ti_inputs[ordered_in_descs[0]->m_input_index], {batch_dim}); - auto seq_lengths_scalar = ngraph::opset5::Constant::create(ngraph::element::i32, {}, { ti->get_num_iterations() }); + auto seq_lengths_scalar = ngraph::opset5::Constant::create(ngraph::element::i32, {}, {ti->get_num_iterations()}); auto seq_lengths = ngraph::op::util::make_try_fold(seq_lengths_scalar, batch_dimension); auto axis_0 = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {0}); @@ -115,51 +120,55 @@ bool convertTensorIteratorToSequence( auto B = ngraph::op::util::make_try_fold(b_pattern, axis_0); std::shared_ptr sequence; - if (ngraph::is_type(found_cell) || ngraph::is_type(found_cell)) { - sequence = std::make_shared( - X, - initial_hidden_state, - initial_cell_state, - seq_lengths, - W, - R, - B, - found_cell->get_hidden_size(), - stride > 0 ? ngraph::op::RecurrentSequenceDirection::FORWARD : ngraph::op::RecurrentSequenceDirection::REVERSE, - found_cell->get_activations_alpha(), - found_cell->get_activations_beta(), - found_cell->get_activations(), - found_cell->get_clip()); + if (ngraph::is_type(found_cell) || + ngraph::is_type(found_cell)) { + sequence = + std::make_shared(X, + initial_hidden_state, + initial_cell_state, + seq_lengths, + W, + R, + B, + found_cell->get_hidden_size(), + stride > 0 ? ngraph::op::RecurrentSequenceDirection::FORWARD + : ngraph::op::RecurrentSequenceDirection::REVERSE, + found_cell->get_activations_alpha(), + found_cell->get_activations_beta(), + found_cell->get_activations(), + found_cell->get_clip()); } else if (ngraph::is_type(found_cell)) { - sequence = std::make_shared( - X, - initial_hidden_state, - seq_lengths, - W, - R, - B, - found_cell->get_hidden_size(), - stride > 0 ? ngraph::op::RecurrentSequenceDirection::FORWARD : ngraph::op::RecurrentSequenceDirection::REVERSE, - found_cell->get_activations(), - found_cell->get_activations_alpha(), - found_cell->get_activations_beta(), - found_cell->get_clip()); + sequence = + std::make_shared(X, + initial_hidden_state, + seq_lengths, + W, + R, + B, + found_cell->get_hidden_size(), + stride > 0 ? ngraph::op::RecurrentSequenceDirection::FORWARD + : ngraph::op::RecurrentSequenceDirection::REVERSE, + found_cell->get_activations(), + found_cell->get_activations_alpha(), + found_cell->get_activations_beta(), + found_cell->get_clip()); } else if (ngraph::is_type(found_cell)) { const auto gru_cell = ngraph::as_type_ptr(found_cell); - sequence = std::make_shared( - X, - initial_hidden_state, - seq_lengths, - W, - R, - B, - gru_cell->get_hidden_size(), - stride > 0 ? ngraph::op::RecurrentSequenceDirection::FORWARD : ngraph::op::RecurrentSequenceDirection::REVERSE, - gru_cell->get_activations(), - gru_cell->get_activations_alpha(), - gru_cell->get_activations_beta(), - gru_cell->get_clip(), - gru_cell->get_linear_before_reset()); + sequence = + std::make_shared(X, + initial_hidden_state, + seq_lengths, + W, + R, + B, + gru_cell->get_hidden_size(), + stride > 0 ? ngraph::op::RecurrentSequenceDirection::FORWARD + : ngraph::op::RecurrentSequenceDirection::REVERSE, + gru_cell->get_activations(), + gru_cell->get_activations_alpha(), + gru_cell->get_activations_beta(), + gru_cell->get_clip(), + gru_cell->get_linear_before_reset()); } else { throw ngraph::ngraph_error("Unsupported sequence type"); } @@ -172,7 +181,7 @@ bool convertTensorIteratorToSequence( ngraph::NodeVector outputs; // We must remove num_directions dimension that was added before sequence creation - auto axis_out = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{ 1 }, { 1 }); + auto axis_out = ngraph::opset5::Constant::create(ngraph::element::i64, ngraph::Shape{1}, {1}); auto out_0 = std::make_shared(out, axis_out); auto out_1 = std::make_shared(sequence->output(1), axis_out); out_0->set_friendly_name(ti->get_friendly_name() + ".0"); @@ -188,11 +197,12 @@ bool convertTensorIteratorToSequence( for (size_t i = 0; i < ordered_out_descs.size(); ++i) { if (ordered_out_descs[i]) { - for (const auto &input : ti->output(ordered_out_descs[i]->m_output_index).get_target_inputs()) { + for (const auto& input : ti->output(ordered_out_descs[i]->m_output_index).get_target_inputs()) { input.replace_source_output(outputs[i]->output(0)); } NGRAPH_SUPPRESS_DEPRECATED_START - outputs[i]->get_output_tensor(0).set_name(ngraph::op::util::create_ie_output_name(ti->output(ordered_out_descs[i]->m_output_index))); + outputs[i]->get_output_tensor(0).set_name( + ngraph::op::util::create_ie_output_name(ti->output(ordered_out_descs[i]->m_output_index))); NGRAPH_SUPPRESS_DEPRECATED_END } } @@ -221,13 +231,13 @@ bool convertTensorIteratorToSequence( copy_runtime_info(ti, new_nodes); return true; } -} // namespace +} // namespace ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSequence() { MATCHER_SCOPE(ConvertTensorIteratorToLSTMSequence); auto tensor_iterator = pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto ti = std::dynamic_pointer_cast(m.get_match_root()); if (!ti || transformation_callback(ti)) return false; @@ -247,7 +257,7 @@ ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSe auto cell = ngraph::pattern::wrap_type(cell_inputs); auto pattern_2 = ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(1)); - auto unsqueeze = ngraph::pattern::wrap_type({ cell, pattern_2 }); + auto unsqueeze = ngraph::pattern::wrap_type({cell, pattern_2}); ngraph::pattern::Matcher matcher(unsqueeze); bool match = false; @@ -268,14 +278,15 @@ ngraph::pass::ConvertTensorIteratorToLSTMSequence::ConvertTensorIteratorToLSTMSe if (lstm_cell == nullptr) return false; - return convertTensorIteratorToSequence(ti, lstm_cell, - pattern_map.at(data), - pattern_map.at(input_H_state), - pattern_map.at(input_C_state), - pattern_map.at(input_W), - pattern_map.at(input_R), - pattern_map.at(input_B), - pattern_map.at(unsqueeze)); + return convertTensorIteratorToSequence(ti, + lstm_cell, + pattern_map.at(data), + pattern_map.at(input_H_state), + pattern_map.at(input_C_state), + pattern_map.at(input_W), + pattern_map.at(input_R), + pattern_map.at(input_B), + pattern_map.at(unsqueeze)); }; auto m = std::make_shared(tensor_iterator, matcher_name); @@ -286,7 +297,7 @@ ngraph::pass::ConvertTensorIteratorToRNNSequence::ConvertTensorIteratorToRNNSequ MATCHER_SCOPE(ConvertTensorIteratorToRNNSequence); auto tensor_iterator = pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [this](pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [this](pattern::Matcher& m) { auto ti = std::dynamic_pointer_cast(m.get_match_root()); if (!ti || transformation_callback(ti)) return false; @@ -305,7 +316,7 @@ ngraph::pass::ConvertTensorIteratorToRNNSequence::ConvertTensorIteratorToRNNSequ auto cell = ngraph::pattern::wrap_type(cell_inputs); auto pattern_2 = ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(1)); - auto unsqueeze = ngraph::pattern::wrap_type({ cell, pattern_2 }); + auto unsqueeze = ngraph::pattern::wrap_type({cell, pattern_2}); ngraph::pattern::Matcher matcher(unsqueeze); bool match = false; @@ -321,18 +332,20 @@ ngraph::pass::ConvertTensorIteratorToRNNSequence::ConvertTensorIteratorToRNNSequ return false; const auto& pattern_map = matcher.get_pattern_value_map(); - const auto& rnn_cell = std::dynamic_pointer_cast(pattern_map.at(cell).get_node_shared_ptr()); + const auto& rnn_cell = + std::dynamic_pointer_cast(pattern_map.at(cell).get_node_shared_ptr()); if (rnn_cell == nullptr) return false; - return convertTensorIteratorToSequence(ti, rnn_cell, - pattern_map.at(data), - pattern_map.at(input_H_state), - ngraph::Output(), - pattern_map.at(input_W), - pattern_map.at(input_R), - pattern_map.at(input_B), - pattern_map.at(unsqueeze)); + return convertTensorIteratorToSequence(ti, + rnn_cell, + pattern_map.at(data), + pattern_map.at(input_H_state), + ngraph::Output(), + pattern_map.at(input_W), + pattern_map.at(input_R), + pattern_map.at(input_B), + pattern_map.at(unsqueeze)); }; auto m = std::make_shared(tensor_iterator, matcher_name); @@ -362,7 +375,7 @@ ngraph::pass::ConvertTensorIteratorToGRUSequence::ConvertTensorIteratorToGRUSequ auto cell = ngraph::pattern::wrap_type(cell_inputs); auto pattern_2 = ngraph::pattern::wrap_type(ngraph::pattern::rank_equals(1)); - auto unsqueeze = ngraph::pattern::wrap_type({ cell, pattern_2 }); + auto unsqueeze = ngraph::pattern::wrap_type({cell, pattern_2}); ngraph::pattern::Matcher matcher(unsqueeze); bool match = false; @@ -378,18 +391,20 @@ ngraph::pass::ConvertTensorIteratorToGRUSequence::ConvertTensorIteratorToGRUSequ return false; const auto& pattern_map = matcher.get_pattern_value_map(); - const auto& gru_cell = std::dynamic_pointer_cast(pattern_map.at(cell).get_node_shared_ptr()); + const auto& gru_cell = + std::dynamic_pointer_cast(pattern_map.at(cell).get_node_shared_ptr()); if (gru_cell == nullptr) return false; - return convertTensorIteratorToSequence(ti, gru_cell, - pattern_map.at(data), - pattern_map.at(input_H_state), - ngraph::Output(), - pattern_map.at(input_W), - pattern_map.at(input_R), - pattern_map.at(input_B), - pattern_map.at(unsqueeze)); + return convertTensorIteratorToSequence(ti, + gru_cell, + pattern_map.at(data), + pattern_map.at(input_H_state), + ngraph::Output(), + pattern_map.at(input_W), + pattern_map.at(input_R), + pattern_map.at(input_B), + pattern_map.at(unsqueeze)); }; auto m = std::make_shared(tensor_iterator, matcher_name); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_topk3.cpp b/src/common/transformations/src/transformations/op_conversions/convert_topk3.cpp index 223fda0d0c2..0cb93dc9535 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_topk3.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_topk3.cpp @@ -2,18 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/convert_topk3.hpp" #include -#include - #include #include #include -#include - #include +#include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertTopK3, "ConvertTopK3", 0); @@ -22,7 +21,7 @@ ngraph::pass::ConvertTopK3::ConvertTopK3() { auto topk = pattern::wrap_type(); ngraph::matcher_pass_callback callback = [](pattern::Matcher& m) { - auto topk = std::dynamic_pointer_cast (m.get_match_root()); + auto topk = std::dynamic_pointer_cast(m.get_match_root()); if (!topk) { return false; } @@ -30,8 +29,12 @@ ngraph::pass::ConvertTopK3::ConvertTopK3() { Output last1; ngraph::NodeVector new_ops; - auto new_topk = std::make_shared(topk->input_value(0), topk->input_value(1), - topk->get_axis(), topk->get_mode(), topk->get_sort_type(), element::i32); + auto new_topk = std::make_shared(topk->input_value(0), + topk->input_value(1), + topk->get_axis(), + topk->get_mode(), + topk->get_sort_type(), + element::i32); new_ops.push_back(new_topk); // if the output is the i32 or output #1 has no consumers // then it matches behavior of the v1::TopK otherwise need to insert Convert diff --git a/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp b/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp index 43211b17a36..c86a591bd9b 100644 --- a/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/detection_output_upgrade.cpp @@ -48,19 +48,17 @@ pass::ConvertDetectionOutput1ToDetectionOutput8::ConvertDetectionOutput1ToDetect std::shared_ptr detection_output_v8_node = nullptr; if (detection_output_v1_node->get_input_size() == 3) { - detection_output_v8_node = - make_shared(detection_output_v1_node->input_value(0), - detection_output_v1_node->input_value(1), - detection_output_v1_node->input_value(2), - attributes_v8); + detection_output_v8_node = make_shared(detection_output_v1_node->input_value(0), + detection_output_v1_node->input_value(1), + detection_output_v1_node->input_value(2), + attributes_v8); } else if (detection_output_v1_node->get_input_size() == 5) { - detection_output_v8_node = - make_shared(detection_output_v1_node->input_value(0), - detection_output_v1_node->input_value(1), - detection_output_v1_node->input_value(2), - detection_output_v1_node->input_value(3), - detection_output_v1_node->input_value(4), - attributes_v8); + detection_output_v8_node = make_shared(detection_output_v1_node->input_value(0), + detection_output_v1_node->input_value(1), + detection_output_v1_node->input_value(2), + detection_output_v1_node->input_value(3), + detection_output_v1_node->input_value(4), + attributes_v8); } if (!detection_output_v8_node) return false; diff --git a/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp index 4c51af566b4..7bc0de7fbe1 100644 --- a/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/einsum_decomposition.cpp @@ -57,12 +57,15 @@ std::vector> compute_einsum_path(std::shared_ptr& input_subscripts, const std::string& output_subscript, - const std::string label_to_check, const std::vector& excluded_indices) { +bool is_dimension_reduced(const std::vector& input_subscripts, + const std::string& output_subscript, + const std::string label_to_check, + const std::vector& excluded_indices) { for (size_t input_ind = 0; input_ind < input_subscripts.size(); ++input_ind) { const auto& input_subscript = input_subscripts[input_ind]; // the subscript is checked only if its index is not in excluded indices list - bool check_subscript = (std::find(excluded_indices.begin(), excluded_indices.end(), input_ind) == excluded_indices.end()); + bool check_subscript = + (std::find(excluded_indices.begin(), excluded_indices.end(), input_ind) == excluded_indices.end()); if (check_subscript && input_subscript.find(label_to_check) != std::string::npos) { return false; } @@ -76,7 +79,7 @@ bool is_dimension_reduced(const std::vector& input_subscripts, cons /// /// \return true - the input vector is a range [0; n]; false - otherwise /// -bool is_range_0_to_n(const std::vector &labels_inds) { +bool is_range_0_to_n(const std::vector& labels_inds) { int64_t check_index = 0; for (auto index : labels_inds) { if (check_index != index) { @@ -99,8 +102,10 @@ bool is_range_0_to_n(const std::vector &labels_inds) { /// /// \return An input subscript for grouping dimensions /// -std::string generate_grouping_subscript(const std::string& input_subscript, const std::vector& common_labels_inds, - const std::vector& separate_labels_inds, const std::vector& reduced_labels_inds, +std::string generate_grouping_subscript(const std::string& input_subscript, + const std::vector& common_labels_inds, + const std::vector& separate_labels_inds, + const std::vector& reduced_labels_inds, bool& is_separate_first) { // transpose is not needed if common labels, reduced labels // and separate labels indices go concurrently @@ -142,8 +147,12 @@ std::string generate_grouping_subscript(const std::string& input_subscript, cons /// \param new_node New input node to be inserted in the tail /// \param new_subscript New input subscript to be inserted in the tail /// -void update_operands(ngraph::OutputVector& input_nodes, std::vector& input_subscripts, size_t input_ind1, size_t input_ind2, - const ngraph::Output& new_node, const std::string& new_subscript) { +void update_operands(ngraph::OutputVector& input_nodes, + std::vector& input_subscripts, + size_t input_ind1, + size_t input_ind2, + const ngraph::Output& new_node, + const std::string& new_subscript) { NGRAPH_CHECK(input_ind1 < input_ind2); NGRAPH_CHECK(input_ind2 < input_nodes.size()); NGRAPH_CHECK(input_ind2 < input_subscripts.size()); @@ -167,7 +176,10 @@ void update_operands(ngraph::OutputVector& input_nodes, std::vector /// \return A vector of input nodes that can be empty (if s_end <= s_begin) /// or contains just one input node with sub-shape or its product /// -ngraph::OutputVector compute_sub_shape(const ngraph::Output& data_shape, size_t s_begin, size_t s_end, ngraph::NodeVector& subgraph_nodes, +ngraph::OutputVector compute_sub_shape(const ngraph::Output& data_shape, + size_t s_begin, + size_t s_end, + ngraph::NodeVector& subgraph_nodes, bool is_product = false) { int64_t begin = static_cast(s_begin); int64_t end = static_cast(s_end); @@ -177,14 +189,16 @@ ngraph::OutputVector compute_sub_shape(const ngraph::Output& data_ } std::vector begin_mask(1, 0); std::vector end_mask(1, 0); - auto begin_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {begin}); - auto end_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {end}); - auto stride_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {1}); - auto sub_shape = std::make_shared(data_shape, begin_const, end_const, begin_mask, end_mask); + auto begin_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape{1}, {begin}); + auto end_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape{1}, {end}); + auto stride_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape{1}, {1}); + auto sub_shape = + std::make_shared(data_shape, begin_const, end_const, begin_mask, end_mask); if (is_product) { - auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {0}); - auto separate_shape_prod = std::make_shared(sub_shape->output(0), reduce_axis_const, true); + auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape{1}, {0}); + auto separate_shape_prod = + std::make_shared(sub_shape->output(0), reduce_axis_const, true); sub_shape_vector.push_back(separate_shape_prod->output(0)); subgraph_nodes.insert(subgraph_nodes.end(), {reduce_axis_const, separate_shape_prod}); } else { @@ -205,12 +219,15 @@ ngraph::OutputVector compute_sub_shape(const ngraph::Output& data_ /// \return Unsqueezed input node if a vector of unsqueezing dimensions is not empty, /// otherwise, the original input node /// -ngraph::Output unsqueeze_input(const ngraph::Output& input_node, const std::vector& unsqueeze_axes, +ngraph::Output unsqueeze_input(const ngraph::Output& input_node, + const std::vector& unsqueeze_axes, ngraph::NodeVector& subgraph_nodes) { if (unsqueeze_axes.empty()) { return input_node; } - auto unsqueeze_axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {unsqueeze_axes.size()}, unsqueeze_axes); + auto unsqueeze_axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, + ngraph::Shape{unsqueeze_axes.size()}, + unsqueeze_axes); auto unsqueeze = std::make_shared(input_node, unsqueeze_axes_const); subgraph_nodes.insert(subgraph_nodes.end(), {unsqueeze_axes_const, unsqueeze}); return unsqueeze->output(0); @@ -230,9 +247,12 @@ ngraph::Output unsqueeze_input(const ngraph::Output& /// /// \return Reshaped input node /// -ngraph::Output reshape_input_for_matmul(const ngraph::Output& input_node, const ngraph::OutputVector& common_sub_shape, - const ngraph::OutputVector& separate_sub_shape, const ngraph::OutputVector& reduced_sub_shape_prod, - bool is_separate_first, ngraph::NodeVector& subgraph_nodes) { +ngraph::Output reshape_input_for_matmul(const ngraph::Output& input_node, + const ngraph::OutputVector& common_sub_shape, + const ngraph::OutputVector& separate_sub_shape, + const ngraph::OutputVector& reduced_sub_shape_prod, + bool is_separate_first, + ngraph::NodeVector& subgraph_nodes) { ngraph::OutputVector new_shape_parts; new_shape_parts.insert(new_shape_parts.end(), common_sub_shape.begin(), common_sub_shape.end()); @@ -242,14 +262,15 @@ ngraph::Output reshape_input_for_matmul(const ngraph::Output 0) { // in this case compute a product of separate dimension sizes since they must be // presented with just one dimension for MatMul - auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {1}, {0}); - auto separate_shape_prod = std::make_shared(separate_sub_shape[0], reduce_axis_const, true); + auto reduce_axis_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape{1}, {0}); + auto separate_shape_prod = + std::make_shared(separate_sub_shape[0], reduce_axis_const, true); separate_parts.push_back(separate_shape_prod->output(0)); subgraph_nodes.insert(subgraph_nodes.end(), {reduce_axis_const, separate_shape_prod}); } @@ -297,7 +318,10 @@ ngraph::Output reshape_input_for_matmul(const ngraph::Output& input_subscripts, const std::string& required_subscript, size_t input_ind, +void transpose_input(ngraph::OutputVector& input_nodes, + std::vector& input_subscripts, + const std::string& required_subscript, + size_t input_ind, ngraph::NodeVector& subgraph_nodes) { // perform sanity check for arguments auto num_inputs = input_nodes.size(); @@ -328,7 +352,8 @@ void transpose_input(ngraph::OutputVector& input_nodes, std::vector // create a sub-graph for transposing into the required layout const auto& input_node = input_nodes[input_ind]; - auto permutation_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {permutation.size()}, permutation); + auto permutation_const = + ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape{permutation.size()}, permutation); auto transpose = std::make_shared(input_node, permutation_const); // update a vector of inputs and input subscripts @@ -351,9 +376,12 @@ void transpose_input(ngraph::OutputVector& input_nodes, std::vector /// \param subgraph_nodes A vector of operation nodes that is included into /// a sub-graph decomposing Einsum that is needed for copy_runtime_info /// -void reduce_input(ngraph::pass::EinsumDecomposition *einsum_decompose_ptr, - ngraph::OutputVector& input_nodes, std::vector& input_subscripts, - const std::string& output_subscript, size_t input_ind, ngraph::NodeVector& subgraph_nodes) { +void reduce_input(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr, + ngraph::OutputVector& input_nodes, + std::vector& input_subscripts, + const std::string& output_subscript, + size_t input_ind, + ngraph::NodeVector& subgraph_nodes) { // perform sanity check for arguments auto num_inputs = input_nodes.size(); NGRAPH_CHECK(num_inputs == input_subscripts.size(), "Each input must have own subscript."); @@ -384,7 +412,9 @@ void reduce_input(ngraph::pass::EinsumDecomposition *einsum_decompose_ptr, // reduce by summed up elements along dimension for which label is met just once const auto& input_node = input_nodes[input_ind]; - auto axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, ngraph::Shape {reduced_axes.size()}, reduced_axes); + auto axes_const = ngraph::opset7::Constant::create(ngraph::element::Type_t::i64, + ngraph::Shape{reduced_axes.size()}, + reduced_axes); auto reduce_sum = einsum_decompose_ptr->register_new_node(input_node, axes_const, false); // update a vector of inputs and input subscripts @@ -410,9 +440,12 @@ void reduce_input(ngraph::pass::EinsumDecomposition *einsum_decompose_ptr, /// sub-graph decomposing Einsum that is needed for copy_runtime_info /// void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr, - ngraph::OutputVector& input_nodes, std::vector& input_subscripts, - const std::string& output_subscript, size_t input_ind1, - size_t input_ind2, ngraph::NodeVector& subgraph_nodes) { + ngraph::OutputVector& input_nodes, + std::vector& input_subscripts, + const std::string& output_subscript, + size_t input_ind1, + size_t input_ind2, + ngraph::NodeVector& subgraph_nodes) { // assume that input_ind1 < input_ind2 without loss of generality, otherwise, just swap them if (input_ind2 < input_ind1) { std::swap(input_ind1, input_ind2); @@ -453,7 +486,8 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr const auto& label = labels1[label_ind]; auto iter = std::find(labels2.begin(), labels2.end(), label); if (iter != labels2.end()) { - bool is_dim_reduced = is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind1, input_ind2}); + bool is_dim_reduced = + is_dimension_reduced(input_subscripts, output_subscript, label, {input_ind1, input_ind2}); common_part += label; if (is_dim_reduced) { reduced_labels_inds1.push_back(static_cast(label_ind)); @@ -500,7 +534,9 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr auto unsqueeze_output2 = unsqueeze_input(input_node2, unsqueeze_axis2, subgraph_nodes); // multiply both operands with broadcasting - auto mul = std::make_shared(unsqueeze_output1, unsqueeze_output2, ngraph::op::AutoBroadcastType::NUMPY); + auto mul = std::make_shared(unsqueeze_output1, + unsqueeze_output2, + ngraph::op::AutoBroadcastType::NUMPY); // update input operand and input subscript for Einsum operation update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, mul->output(0), resultant_subscript); @@ -514,12 +550,18 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr // step 1. transpose both operands so that common labels, separated and reduced labels // are grouped for both operands bool is_separate_first1 = false; - auto int_subscript1 = generate_grouping_subscript(input_subscript1, common_labels_inds1, separate_labels_inds1, - reduced_labels_inds1, is_separate_first1); + auto int_subscript1 = generate_grouping_subscript(input_subscript1, + common_labels_inds1, + separate_labels_inds1, + reduced_labels_inds1, + is_separate_first1); transpose_input(input_nodes, input_subscripts, int_subscript1, input_ind1, subgraph_nodes); bool is_separate_first2 = false; - auto int_subscript2 = generate_grouping_subscript(input_subscript2, common_labels_inds2, separate_labels_inds2, - reduced_labels_inds2, is_separate_first2); + auto int_subscript2 = generate_grouping_subscript(input_subscript2, + common_labels_inds2, + separate_labels_inds2, + reduced_labels_inds2, + is_separate_first2); transpose_input(input_nodes, input_subscripts, int_subscript2, input_ind2, subgraph_nodes); // step 2. reshape both operands so that separate labels and reduced labels are represented @@ -530,22 +572,22 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr // this step is not needed for the operand if it satisfies to one of the requirements: // 1. there is just one separate dimension and just one reduced dimension // 2. there is no separate dimension, no common dimensions, and just one reduced dimension - bool no_reshape_for_matmul1 = (reduced_labels_inds1.size() == 1 && separate_labels_inds1.size() == 1) || - (reduced_labels_inds1.size() == 1 && common_labels_inds1.size() == 0 - && separate_labels_inds1.size() == 0); - bool no_reshape_for_matmul2 = (reduced_labels_inds2.size() == 1 && separate_labels_inds2.size() == 1) || - (reduced_labels_inds2.size() == 1 && common_labels_inds2.size() == 0 - && separate_labels_inds2.size() == 0); + bool no_reshape_for_matmul1 = + (reduced_labels_inds1.size() == 1 && separate_labels_inds1.size() == 1) || + (reduced_labels_inds1.size() == 1 && common_labels_inds1.size() == 0 && separate_labels_inds1.size() == 0); + bool no_reshape_for_matmul2 = + (reduced_labels_inds2.size() == 1 && separate_labels_inds2.size() == 1) || + (reduced_labels_inds2.size() == 1 && common_labels_inds2.size() == 0 && separate_labels_inds2.size() == 0); // reshape back after MatMul is not needed if one of two requrements satisfies for both operands: // 1. there is just one separate dimension // 2. there is no separate dimension and no common dimensions present. // If there is no separate dimension and common dimensions present, reshape is needed // because auxiliary separate dimension has been added by Unsqueeze operation // in the purpose for MatMul - bool no_reshape_back1 = (separate_labels_inds1.size() == 1) || - (common_labels_inds1.size() == 0 && separate_labels_inds1.size() == 0); - bool no_reshape_back2 = (separate_labels_inds2.size() == 1) || - (common_labels_inds2.size() == 0 && separate_labels_inds2.size() == 0); + bool no_reshape_back1 = + (separate_labels_inds1.size() == 1) || (common_labels_inds1.size() == 0 && separate_labels_inds1.size() == 0); + bool no_reshape_back2 = + (separate_labels_inds2.size() == 1) || (common_labels_inds2.size() == 0 && separate_labels_inds2.size() == 0); bool no_reshape_after_matmul = no_reshape_back1 && no_reshape_back2; auto matmul_operand1 = input_node1; @@ -556,25 +598,41 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr if (no_reshape_for_matmul1 == false || no_reshape_for_matmul2 == false) { auto data_shape1 = std::make_shared(input_node1); common_sub_shape = compute_sub_shape(data_shape1, common_dims_begin, common_dims_end, subgraph_nodes); - int64_t reduced_dims_begin = (is_separate_first1 ? common_labels_inds1.size() + separate_labels_inds1.size() : common_labels_inds1.size()); + int64_t reduced_dims_begin = (is_separate_first1 ? common_labels_inds1.size() + separate_labels_inds1.size() + : common_labels_inds1.size()); int64_t reduced_dims_end = reduced_dims_begin + reduced_labels_inds1.size(); - auto reduced_sub_shape_prod = compute_sub_shape(data_shape1, reduced_dims_begin, reduced_dims_end, subgraph_nodes, true); + auto reduced_sub_shape_prod = + compute_sub_shape(data_shape1, reduced_dims_begin, reduced_dims_end, subgraph_nodes, true); if (no_reshape_for_matmul1 == false || no_reshape_after_matmul == false) { - int64_t separate1_dims_begin = (is_separate_first1 ? common_labels_inds1.size() : common_labels_inds1.size() + reduced_labels_inds1.size()); + int64_t separate1_dims_begin = + (is_separate_first1 ? common_labels_inds1.size() + : common_labels_inds1.size() + reduced_labels_inds1.size()); int64_t separate1_dims_end = separate1_dims_begin + separate_labels_inds1.size(); - separate1_sub_shape = compute_sub_shape(data_shape1, separate1_dims_begin, separate1_dims_end, subgraph_nodes); - matmul_operand1 = reshape_input_for_matmul(input_node1, common_sub_shape, separate1_sub_shape, - reduced_sub_shape_prod, is_separate_first1, subgraph_nodes); + separate1_sub_shape = + compute_sub_shape(data_shape1, separate1_dims_begin, separate1_dims_end, subgraph_nodes); + matmul_operand1 = reshape_input_for_matmul(input_node1, + common_sub_shape, + separate1_sub_shape, + reduced_sub_shape_prod, + is_separate_first1, + subgraph_nodes); } if (no_reshape_for_matmul2 == false || no_reshape_after_matmul == false) { auto data_shape2 = std::make_shared(input_node2); - int64_t separate2_dims_begin = (is_separate_first2 ? common_labels_inds2.size() : common_labels_inds2.size() + reduced_labels_inds2.size()); + int64_t separate2_dims_begin = + (is_separate_first2 ? common_labels_inds2.size() + : common_labels_inds2.size() + reduced_labels_inds2.size()); int64_t separate2_dims_end = separate2_dims_begin + separate_labels_inds2.size(); - separate2_sub_shape = compute_sub_shape(data_shape2, separate2_dims_begin, separate2_dims_end, subgraph_nodes); - matmul_operand2 = reshape_input_for_matmul(input_node2, common_sub_shape, separate2_sub_shape, - reduced_sub_shape_prod, is_separate_first2, subgraph_nodes); + separate2_sub_shape = + compute_sub_shape(data_shape2, separate2_dims_begin, separate2_dims_end, subgraph_nodes); + matmul_operand2 = reshape_input_for_matmul(input_node2, + common_sub_shape, + separate2_sub_shape, + reduced_sub_shape_prod, + is_separate_first2, + subgraph_nodes); subgraph_nodes.insert(subgraph_nodes.end(), {data_shape2}); } subgraph_nodes.insert(subgraph_nodes.end(), {data_shape1}); @@ -587,7 +645,8 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr // step 4. reshape back by unrolling dimensions corresponding to separate labels if needed // now dimensions corresponding to reduced labels are reduced by the MatMul operation - std::string resultant_subscript = input_subscript1.substr(common_dims_begin, common_dims_end) + separate_part1 + separate_part2; + std::string resultant_subscript = + input_subscript1.substr(common_dims_begin, common_dims_end) + separate_part1 + separate_part2; if (no_reshape_after_matmul) { // this is a case when Reshape is not needed after MatMul operation // since there are no collapsed (or auxiliary added) separated dimensions @@ -611,7 +670,12 @@ void contract_two_inputs(ngraph::pass::EinsumDecomposition* einsum_decompose_ptr } // update input operand and input subscript for Einsum operation - update_operands(input_nodes, input_subscripts, input_ind1, input_ind2, result_op->output(0), resultant_subscript); + update_operands(input_nodes, + input_subscripts, + input_ind1, + input_ind2, + result_op->output(0), + resultant_subscript); subgraph_nodes.insert(subgraph_nodes.end(), {result_op}); } @@ -659,7 +723,13 @@ ngraph::pass::EinsumDecomposition::EinsumDecomposition() { // contract inputs by Einsum until just one is remained for (auto const& inds_pair : einsum_path) { - contract_two_inputs(this, input_nodes, input_subscripts, output_subscript, inds_pair.first, inds_pair.second, subgraph_nodes); + contract_two_inputs(this, + input_nodes, + input_subscripts, + output_subscript, + inds_pair.first, + inds_pair.second, + subgraph_nodes); } // reduce dimensions for the remained input node diff --git a/src/common/transformations/src/transformations/op_conversions/fq_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/fq_decomposition.cpp index 0b5cde9c989..f638b1098c2 100644 --- a/src/common/transformations/src/transformations/op_conversions/fq_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/fq_decomposition.cpp @@ -2,22 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/fq_decomposition.hpp" +#include #include #include -#include #include -#include - +#include #include +#include "itt.hpp" + NGRAPH_RTTI_DEFINITION(ngraph::pass::FakeQuantizeDecomposition, "FakeQuantizeDecomposition", 0); namespace { -bool isValidRangesInputs(const std::shared_ptr &fq) { +bool isValidRangesInputs(const std::shared_ptr& fq) { auto il = fq->input_value(1); auto ih = fq->input_value(2); auto greater_equal = std::make_shared(il, ih); @@ -30,10 +30,12 @@ bool isValidRangesInputs(const std::shared_ptr &fq const std::vector comp_result = res_node->cast_vector(); - return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { return value; }); + return !std::any_of(comp_result.begin(), comp_result.end(), [](const bool value) { + return value; + }); } -} // namespace +} // namespace ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { MATCHER_SCOPE(FakeQuantizeDecomposition); @@ -44,11 +46,13 @@ ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { auto oh = ngraph::pattern::wrap_type(); auto fake_quantize = ngraph::pattern::wrap_type({data, il, ih, ol, oh}); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); - const auto fake_quantize_node = std::dynamic_pointer_cast(pattern_to_output.at(fake_quantize).get_node_shared_ptr()); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + const auto fake_quantize_node = std::dynamic_pointer_cast( + pattern_to_output.at(fake_quantize).get_node_shared_ptr()); - if (fake_quantize_node == nullptr || transformation_callback(fake_quantize_node) || !isValidRangesInputs(fake_quantize_node)) { + if (fake_quantize_node == nullptr || transformation_callback(fake_quantize_node) || + !isValidRangesInputs(fake_quantize_node)) { return false; } @@ -74,7 +78,8 @@ ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { decomp_ops.push_back(min); // (levels-1) - const auto levels_minus_one = std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); + const auto levels_minus_one = + std::make_shared(input_type, Shape{}, fake_quantize_node->get_levels() - 1); decomp_ops.push_back(levels_minus_one); // (input_high - input_low) const auto subInHighLow = std::make_shared(input_high, input_low); @@ -94,7 +99,8 @@ ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { decomp_ops.push_back(after_ish_apply); // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) - const auto round = std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); + const auto round = + std::make_shared(after_ish_apply, ngraph::opset5::Round::RoundMode::HALF_TO_EVEN); decomp_ops.push_back(round); // (output_high - output_low) @@ -104,10 +110,11 @@ ngraph::pass::FakeQuantizeDecomposition::FakeQuantizeDecomposition() { decomp_ops.push_back(sub_out_high_low); decomp_ops.push_back(osc); - // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * + // (output_high - output_low) / (levels-1) const auto after_osc_apply = std::make_shared(round, osc); - // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * (output_high - output_low) / (levels-1) + - // output_low + // round(x * (levels-1) / (input_high - input_low) - input_low * (levels-1) / (input_high - input_low)) * + // (output_high - output_low) / (levels-1) + output_low std::shared_ptr result = std::make_shared(after_osc_apply, output_low); decomp_ops.push_back(after_osc_apply); decomp_ops.push_back(result); diff --git a/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp b/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp index b0c9905746d..df0621a3bed 100644 --- a/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp +++ b/src/common/transformations/src/transformations/op_conversions/gather_normalize_negative_indices.cpp @@ -5,11 +5,11 @@ #include "transformations/op_conversions/gather_normalize_negative_indices.hpp" #include - #include -#include -#include #include +#include +#include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::GatherNegativeConstIndicesNormalize, "GatherNegativeConstIndicesNormalize", 0); @@ -25,8 +25,10 @@ ngraph::pass::GatherNegativeConstIndicesNormalize::GatherNegativeConstIndicesNor auto& pattern_to_output = m.get_pattern_value_map(); auto gather = pattern_to_output.at(gather_node).get_node_shared_ptr(); auto data = pattern_to_output.at(data_input); - auto axis_constant = std::dynamic_pointer_cast(pattern_to_output.at(axis_input).get_node_shared_ptr()); - auto indices_constant = std::dynamic_pointer_cast(pattern_to_output.at(indices_input).get_node_shared_ptr()); + auto axis_constant = + std::dynamic_pointer_cast(pattern_to_output.at(axis_input).get_node_shared_ptr()); + auto indices_constant = std::dynamic_pointer_cast( + pattern_to_output.at(indices_input).get_node_shared_ptr()); if (!gather || !axis_constant || !indices_constant) { return false; @@ -60,8 +62,10 @@ ngraph::pass::GatherNegativeConstIndicesNormalize::GatherNegativeConstIndicesNor auto input_type = indices_constant->get_element_type(); auto shape_of = std::make_shared(data, input_type); - auto input_gather = std::make_shared(shape_of, - ngraph::opset7::Constant::create(input_type, Shape{}, {axis_value}), ngraph::opset7::Constant::create(input_type, Shape{}, {0})); + auto input_gather = std::make_shared( + shape_of, + ngraph::opset7::Constant::create(input_type, Shape{}, {axis_value}), + ngraph::opset7::Constant::create(input_type, Shape{}, {0})); std::shared_ptr add = std::make_shared(input_gather, indices_constant); if (auto folded_const = ngraph::get_constant_from_source(add)) diff --git a/src/common/transformations/src/transformations/op_conversions/gelu7_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/gelu7_downgrade.cpp index 1751c14d573..def19bea8b4 100644 --- a/src/common/transformations/src/transformations/op_conversions/gelu7_downgrade.cpp +++ b/src/common/transformations/src/transformations/op_conversions/gelu7_downgrade.cpp @@ -5,11 +5,11 @@ #include "transformations/op_conversions/gelu7_downgrade.hpp" #include - #include #include -#include #include +#include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::Gelu7Downgrade, "Gelu7Downgrade", 0); @@ -20,7 +20,8 @@ ngraph::pass::Gelu7Downgrade::Gelu7Downgrade() { ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); - auto gelu_node = std::dynamic_pointer_cast(pattern_to_output.at(gelu).get_node_shared_ptr()); + auto gelu_node = + std::dynamic_pointer_cast(pattern_to_output.at(gelu).get_node_shared_ptr()); if (gelu_node == nullptr || transformation_callback(gelu_node)) { return false; diff --git a/src/common/transformations/src/transformations/op_conversions/gru_cell_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/gru_cell_decomposition.cpp index e9f6a48ec99..3286d98d525 100644 --- a/src/common/transformations/src/transformations/op_conversions/gru_cell_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/gru_cell_decomposition.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/gru_cell_decomposition.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::GRUCellDecomposition, "GRUCellDecomposition", 0); @@ -19,7 +19,7 @@ ngraph::pass::GRUCellDecomposition::GRUCellDecomposition() { MATCHER_SCOPE(GRUCellDecomposition); auto gru_cell = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { - auto gru_cell = std::dynamic_pointer_cast (m.get_match_root()); + auto gru_cell = std::dynamic_pointer_cast(m.get_match_root()); if (!gru_cell || transformation_callback(gru_cell)) { return false; } @@ -65,7 +65,7 @@ ngraph::pass::GRUCellDecomposition::GRUCellDecomposition() { // rt = f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr) auto r_t = ngraph::op::util::activation(gru_cell->get_activations()[0], clamp_r); - std::shared_ptr _h; + std::shared_ptr _h; if (gru_cell->get_linear_before_reset()) { // _h = Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh auto Ht_Rh_Rbh = std::make_shared(Ht_R_zrh->output(2), biases_zrh->output(3)); @@ -97,8 +97,24 @@ ngraph::pass::GRUCellDecomposition::GRUCellDecomposition() { auto out_H = std::make_shared(mul_1, mul_2); out_H->set_friendly_name(gru_cell->get_friendly_name()); - ngraph::copy_runtime_info(gru_cell, {Xt_W, Ht_R, axis_0, Xt_W_zrh, R_zrh, Ht_R_zrh, biases_zrh, - add_z_1, add_z_2, add_r_1, add_r_2, h_t, one, sub, mul_1, mul_2, out_H}); + ngraph::copy_runtime_info(gru_cell, + {Xt_W, + Ht_R, + axis_0, + Xt_W_zrh, + R_zrh, + Ht_R_zrh, + biases_zrh, + add_z_1, + add_z_2, + add_r_1, + add_r_2, + h_t, + one, + sub, + mul_1, + mul_2, + out_H}); ngraph::replace_node(gru_cell, out_H); return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/hsigmoid_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/hsigmoid_decomposition.cpp index 04c3fb741d1..40387499655 100644 --- a/src/common/transformations/src/transformations/op_conversions/hsigmoid_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/hsigmoid_decomposition.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/hsigmoid_decomposition.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::HSigmoidDecomposition, "HSigmoidDecomposition", 0); @@ -18,8 +18,8 @@ ngraph::pass::HSigmoidDecomposition::HSigmoidDecomposition() { // Decomposes HSigmoid(x) op into sub-graph (min(Relu(x + 3), 6) * const(1/6) auto hsigmoid = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto hsigmoid_node = pattern_to_output.at(hsigmoid).get_node_shared_ptr(); if (transformation_callback(hsigmoid_node)) { @@ -32,12 +32,11 @@ ngraph::pass::HSigmoidDecomposition::HSigmoidDecomposition() { auto relu = std::make_shared(add); auto min_constant = ngraph::opset5::Constant::create(input_type, ngraph::Shape{}, {6.0}); auto min = register_new_node(relu, min_constant); - auto mul_constant = ngraph::opset5::Constant::create(input_type, ngraph::Shape{}, {(1.0/6.0)}); // const(1/6) + auto mul_constant = ngraph::opset5::Constant::create(input_type, ngraph::Shape{}, {(1.0 / 6.0)}); // const(1/6) auto mul = std::make_shared(min, mul_constant); mul->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info(hsigmoid_node, - {add_constant, add, relu, min_constant, min, min_constant, mul}); + ngraph::copy_runtime_info(hsigmoid_node, {add_constant, add, relu, min_constant, min, min_constant, mul}); ngraph::replace_node(m.get_match_root(), mul); return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/hswish_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/hswish_decomposition.cpp index a45b2d35f76..7db29c5fe46 100644 --- a/src/common/transformations/src/transformations/op_conversions/hswish_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/hswish_decomposition.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/hswish_decomposition.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::HSwishDecomposition, "HSwishDecomposition", 0); @@ -18,8 +18,8 @@ ngraph::pass::HSwishDecomposition::HSwishDecomposition() { // Decomposes HSwish(x) op into sub-graph x * (min(Relu(x + 3), 6) * const(1/6) auto hswish = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); auto hswish_node = pattern_to_output.at(hswish).get_node_shared_ptr(); if (transformation_callback(hswish_node)) { @@ -33,7 +33,7 @@ ngraph::pass::HSwishDecomposition::HSwishDecomposition() { auto min_constant = ngraph::opset4::Constant::create(input_type, ngraph::Shape{}, {6.0}); auto min = register_new_node(relu, min_constant); auto mul_first = std::make_shared(hswish_node->input_value(0), min); - auto mul_constant = ngraph::opset4::Constant::create(input_type, ngraph::Shape{}, {(1.0/6.0)}); // const(1/6) + auto mul_constant = ngraph::opset4::Constant::create(input_type, ngraph::Shape{}, {(1.0 / 6.0)}); // const(1/6) auto mul_second = std::make_shared(mul_first, mul_constant); mul_second->set_friendly_name(m.get_match_root()->get_friendly_name()); diff --git a/src/common/transformations/src/transformations/op_conversions/log_softmax_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/log_softmax_decomposition.cpp index 08e7875260b..5778dbdd7e6 100644 --- a/src/common/transformations/src/transformations/op_conversions/log_softmax_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/log_softmax_decomposition.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/log_softmax_decomposition.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::LogSoftmaxDecomposition, "LogSoftmaxDecomposition", 0); @@ -20,14 +20,17 @@ ngraph::pass::LogSoftmaxDecomposition::LogSoftmaxDecomposition() { ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto& pattern_to_output = m.get_pattern_value_map(); - auto log_softmax_node = std::dynamic_pointer_cast(pattern_to_output.at(log_softmax).get_node_shared_ptr()); + auto log_softmax_node = std::dynamic_pointer_cast( + pattern_to_output.at(log_softmax).get_node_shared_ptr()); if (log_softmax_node == nullptr || transformation_callback(log_softmax_node)) { return false; } - auto axis1 = ngraph::opset5::Constant::create(element::Type_t::i64, ngraph::Shape{1}, { log_softmax_node->get_axis() }); - auto axis2 = ngraph::opset5::Constant::create(element::Type_t::i64, ngraph::Shape{1}, { log_softmax_node->get_axis() }); + auto axis1 = + ngraph::opset5::Constant::create(element::Type_t::i64, ngraph::Shape{1}, {log_softmax_node->get_axis()}); + auto axis2 = + ngraph::opset5::Constant::create(element::Type_t::i64, ngraph::Shape{1}, {log_softmax_node->get_axis()}); auto max = std::make_shared(log_softmax_node->input_value(0), axis1, true); auto sub = std::make_shared(log_softmax_node->input_value(0), max); auto exp = std::make_shared(sub); @@ -36,7 +39,7 @@ ngraph::pass::LogSoftmaxDecomposition::LogSoftmaxDecomposition() { auto sub_end = std::make_shared(sub, log); sub_end->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info(log_softmax_node, { axis1, axis2, max, sub, exp, sum, log, sub_end }); + ngraph::copy_runtime_info(log_softmax_node, {axis1, axis2, max, sub, exp, sum, log, sub_end}); ngraph::replace_node(m.get_match_root(), sub_end); return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp index 492dff379c2..9c8b1dfffd6 100644 --- a/src/common/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/lstm_cell_decomposition.cpp @@ -2,17 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/lstm_cell_decomposition.hpp" #include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::LSTMCellDecomposition, "LSTMCellDecomposition", 0); @@ -78,10 +78,11 @@ ngraph::pass::LSTMCellDecomposition::LSTMCellDecomposition() { auto hC = ngraph::op::util::activation(lstm_cell->get_activations()[2], out_C); auto out_H = std::make_shared(o_t, hC); - out_H->set_friendly_name(lstm_cell->get_friendly_name()+".0"); - out_C->set_friendly_name(lstm_cell->get_friendly_name()+".1"); - ngraph::copy_runtime_info(lstm_cell, {Xt_W, Ht_R, add, split, mul1, mul2, out_H, hC, out_C, axis_node, XHB, - f_t, i_t, c_t, o_t}); + out_H->set_friendly_name(lstm_cell->get_friendly_name() + ".0"); + out_C->set_friendly_name(lstm_cell->get_friendly_name() + ".1"); + ngraph::copy_runtime_info( + lstm_cell, + {Xt_W, Ht_R, add, split, mul1, mul2, out_H, hC, out_C, axis_node, XHB, f_t, i_t, c_t, o_t}); ngraph::replace_node(lstm_cell, {out_H->output(0), out_C->output(0)}); return true; }; diff --git a/src/common/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp index 88688c9bcfa..0890291b9fd 100644 --- a/src/common/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/mvn6_decomposition.cpp @@ -5,10 +5,10 @@ #include "transformations/op_conversions/mvn6_decomposition.hpp" #include - #include -#include #include +#include + #include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::MVN6Decomposition, "MVN6Decomposition", 0); @@ -37,17 +37,17 @@ ngraph::pass::MVN6Decomposition::MVN6Decomposition() { if (!mvn_node->get_normalize_variance()) { mean_normalization->set_friendly_name(mvn_node->get_friendly_name()); - ngraph::copy_runtime_info(mvn_node, { mean, mean_normalization }); + ngraph::copy_runtime_info(mvn_node, {mean, mean_normalization}); ngraph::replace_node(mvn_node, mean_normalization); } else { // (x - ReduceMean(x, axes)) ^ 2 - auto sqr_const = ngraph::opset6::Constant::create(data.get_element_type(), ngraph::Shape{ 1 }, { 2 }); + auto sqr_const = ngraph::opset6::Constant::create(data.get_element_type(), ngraph::Shape{1}, {2}); auto sqr = std::make_shared(mean_normalization, sqr_const); // ReduceMean((x - ReduceMean(x, axes)) ^ 2) auto mean2 = std::make_shared(sqr, axes, true); auto eps = mvn_node->get_eps(); - auto eps_node = ngraph::opset6::Constant::create(data.get_element_type(), ngraph::Shape{ 1 }, { eps }); + auto eps_node = ngraph::opset6::Constant::create(data.get_element_type(), ngraph::Shape{1}, {eps}); auto eps_mode = mvn_node->get_eps_mode(); std::shared_ptr eps_add; @@ -71,7 +71,7 @@ ngraph::pass::MVN6Decomposition::MVN6Decomposition() { } div->set_friendly_name(mvn_node->get_friendly_name()); - ngraph::copy_runtime_info(mvn_node, { mean, mean_normalization, sqr, mean2, eps_node, eps_add, sqrt, div }); + ngraph::copy_runtime_info(mvn_node, {mean, mean_normalization, sqr, mean2, eps_node, eps_add, sqrt, div}); ngraph::replace_node(mvn_node, div); } return true; diff --git a/src/common/transformations/src/transformations/op_conversions/normalize_l2_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/normalize_l2_decomposition.cpp index 832f4d2f940..bd1cbb24cd2 100644 --- a/src/common/transformations/src/transformations/op_conversions/normalize_l2_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/normalize_l2_decomposition.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/normalize_l2_decomposition.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::NormalizeL2Decomposition, "NormalizeL2Decomposition", 0); @@ -17,28 +17,30 @@ ngraph::pass::NormalizeL2Decomposition::NormalizeL2Decomposition() { MATCHER_SCOPE(NormalizeL2Decomposition); auto normalize_l2_pattern = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { auto normalize_l2 = std::dynamic_pointer_cast(m.get_match_root()); if (!normalize_l2 || transformation_callback(normalize_l2)) { return false; } - auto power = std::make_shared(normalize_l2->input_value(0), - opset8::Constant::create(normalize_l2->get_input_element_type(0), Shape{}, {2.0})); + auto power = std::make_shared( + normalize_l2->input_value(0), + opset8::Constant::create(normalize_l2->get_input_element_type(0), Shape{}, {2.0})); auto reduce_sum = std::make_shared(power, normalize_l2->input_value(1), true); std::shared_ptr eps_node; - auto eps_const_node = opset8::Constant::create(normalize_l2->get_input_element_type(0), Shape{}, {normalize_l2->get_eps()}); + auto eps_const_node = + opset8::Constant::create(normalize_l2->get_input_element_type(0), Shape{}, {normalize_l2->get_eps()}); switch (normalize_l2->get_eps_mode()) { - case op::EpsMode::ADD: - eps_node = std::make_shared(reduce_sum, eps_const_node); - break; - case op::EpsMode::MAX: - eps_node = std::make_shared(reduce_sum, eps_const_node); - break; - default: - return false; + case op::EpsMode::ADD: + eps_node = std::make_shared(reduce_sum, eps_const_node); + break; + case op::EpsMode::MAX: + eps_node = std::make_shared(reduce_sum, eps_const_node); + break; + default: + return false; } auto sqrt = std::make_shared(eps_node); @@ -53,4 +55,3 @@ ngraph::pass::NormalizeL2Decomposition::NormalizeL2Decomposition() { auto m = std::make_shared(normalize_l2_pattern, matcher_name); register_matcher(m, callback); } - diff --git a/src/common/transformations/src/transformations/op_conversions/reduce_l1_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/reduce_l1_decomposition.cpp index de21c3f9a7d..30aef9e95a9 100644 --- a/src/common/transformations/src/transformations/op_conversions/reduce_l1_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/reduce_l1_decomposition.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/reduce_l1_decomposition.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ReduceL1Decomposition, "ReduceL1Decomposition", 0); @@ -18,20 +18,22 @@ ngraph::pass::ReduceL1Decomposition::ReduceL1Decomposition() { // decomposes ReduceL1 operations into ReduceSum(abs(x)) auto reduce_l1 = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); - auto reduce_l1_node = std::dynamic_pointer_cast(pattern_to_output.at(reduce_l1).get_node_shared_ptr()); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto reduce_l1_node = + std::dynamic_pointer_cast(pattern_to_output.at(reduce_l1).get_node_shared_ptr()); if (reduce_l1_node == nullptr || transformation_callback(reduce_l1_node)) { return false; } auto abs = std::make_shared(reduce_l1_node->input_value(0)); - auto reduce_sum = register_new_node(abs, reduce_l1_node->input_value(1), reduce_l1_node->get_keep_dims()); + auto reduce_sum = register_new_node(abs, + reduce_l1_node->input_value(1), + reduce_l1_node->get_keep_dims()); reduce_sum->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info(reduce_l1_node, - {abs, reduce_sum}); + ngraph::copy_runtime_info(reduce_l1_node, {abs, reduce_sum}); ngraph::replace_node(m.get_match_root(), reduce_sum); return true; }; @@ -39,4 +41,3 @@ ngraph::pass::ReduceL1Decomposition::ReduceL1Decomposition() { auto m = std::make_shared(reduce_l1, matcher_name); register_matcher(m, callback); } - diff --git a/src/common/transformations/src/transformations/op_conversions/reduce_l2_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/reduce_l2_decomposition.cpp index c9dbaa161bd..2447fcd4f8c 100644 --- a/src/common/transformations/src/transformations/op_conversions/reduce_l2_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/reduce_l2_decomposition.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/reduce_l2_decomposition.hpp" #include - #include -#include #include +#include + +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ReduceL2Decomposition, "ReduceL2Decomposition", 0); @@ -18,21 +18,24 @@ ngraph::pass::ReduceL2Decomposition::ReduceL2Decomposition() { // decomposes ReduceL2 operations into sqrt(ReduceSum(x * x)) auto reduce_l2 = ngraph::pattern::wrap_type(); - ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher &m) { - auto &pattern_to_output = m.get_pattern_value_map(); - auto reduce_l2_node = std::dynamic_pointer_cast(pattern_to_output.at(reduce_l2).get_node_shared_ptr()); + ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto reduce_l2_node = + std::dynamic_pointer_cast(pattern_to_output.at(reduce_l2).get_node_shared_ptr()); if (reduce_l2_node == nullptr || transformation_callback(reduce_l2_node)) { return false; } - auto const_2 = ngraph::opset4::Constant::create(reduce_l2_node->input_value(0).get_element_type(), Shape{}, {2.0f}); + auto const_2 = + ngraph::opset4::Constant::create(reduce_l2_node->input_value(0).get_element_type(), Shape{}, {2.0f}); auto square = std::make_shared(reduce_l2_node->input_value(0), const_2); - auto reduce_sum = register_new_node(square, reduce_l2_node->input_value(1), reduce_l2_node->get_keep_dims()); + auto reduce_sum = register_new_node(square, + reduce_l2_node->input_value(1), + reduce_l2_node->get_keep_dims()); auto sqrt = std::make_shared(reduce_sum); sqrt->set_friendly_name(m.get_match_root()->get_friendly_name()); - ngraph::copy_runtime_info(reduce_l2_node, - {sqrt, reduce_sum, square, const_2}); + ngraph::copy_runtime_info(reduce_l2_node, {sqrt, reduce_sum, square, const_2}); ngraph::replace_node(m.get_match_root(), sqrt); return true; }; @@ -40,4 +43,3 @@ ngraph::pass::ReduceL2Decomposition::ReduceL2Decomposition() { auto m = std::make_shared(reduce_l2, matcher_name); register_matcher(m, callback); } - diff --git a/src/common/transformations/src/transformations/op_conversions/rnn_cell_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/rnn_cell_decomposition.cpp index 38418614b4d..2e39b7a36fd 100644 --- a/src/common/transformations/src/transformations/op_conversions/rnn_cell_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/rnn_cell_decomposition.cpp @@ -2,16 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/rnn_cell_decomposition.hpp" #include +#include +#include +#include +#include #include -#include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::RNNCellDecomposition, "RNNCellDecomposition", 0); @@ -19,7 +19,7 @@ ngraph::pass::RNNCellDecomposition::RNNCellDecomposition() { MATCHER_SCOPE(RNNCellDecomposition); auto rnn_cell = ngraph::pattern::wrap_type(); ngraph::matcher_pass_callback callback = [this](ngraph::pattern::Matcher& m) { - auto rnn_cell = std::dynamic_pointer_cast (m.get_match_root()); + auto rnn_cell = std::dynamic_pointer_cast(m.get_match_root()); if (!rnn_cell || transformation_callback(rnn_cell)) { return false; } diff --git a/src/common/transformations/src/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.cpp b/src/common/transformations/src/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.cpp index 1b6eb12b1c9..94e04a3e825 100644 --- a/src/common/transformations/src/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.cpp +++ b/src/common/transformations/src/transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.cpp @@ -2,14 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" - #include "transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp" #include +#include #include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SimplifyCTCGreedyDecoderSeqLen, "SimplifyCTCGreedyDecoder", 0); @@ -18,19 +17,20 @@ ngraph::pass::SimplifyCTCGreedyDecoderSeqLen::SimplifyCTCGreedyDecoderSeqLen() { auto decoder = pattern::wrap_type(); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - auto decoder_seq_len = std::dynamic_pointer_cast (m.get_match_root()); + auto decoder_seq_len = std::dynamic_pointer_cast(m.get_match_root()); if (!decoder_seq_len) { return false; } if (decoder_seq_len->get_input_size() > 2) { const auto data_pshape = decoder_seq_len->get_input_partial_shape(0); - auto blank_index = std::dynamic_pointer_cast(decoder_seq_len->input_value(2).get_node_shared_ptr()); + auto blank_index = std::dynamic_pointer_cast( + decoder_seq_len->input_value(2).get_node_shared_ptr()); if (!blank_index || data_pshape.rank().is_dynamic() || data_pshape[2].is_dynamic()) { return false; } - const std::vector &blank_index_values = blank_index->cast_vector(); + const std::vector& blank_index_values = blank_index->cast_vector(); const auto num_classes = decoder_seq_len->get_input_partial_shape(0)[2].get_length(); if (blank_index_values[0] != (num_classes - 1)) { return false; @@ -40,9 +40,9 @@ ngraph::pass::SimplifyCTCGreedyDecoderSeqLen::SimplifyCTCGreedyDecoderSeqLen() { element::Type data_type = decoder_seq_len->input_value(0).get_element_type(); element::Type seq_len_type = decoder_seq_len->input_value(1).get_element_type(); // Transposing input data channels from [N, T, C] to [T, N, C]. Need for compatible with CTCGreedyDecoder v1 - auto transpose = std::make_shared(decoder_seq_len->input_value(0), - ngraph::opset6::Constant::create(element::i32, - Shape({3}), {1, 0, 2})); + auto transpose = std::make_shared( + decoder_seq_len->input_value(0), + ngraph::opset6::Constant::create(element::i32, Shape({3}), {1, 0, 2})); // Receive time and batch dimensions and concatenate to [T, N] tensor shapes auto data_shape = std::make_shared(decoder_seq_len->input_value(0)); auto axisT = ngraph::opset6::Constant::create(seq_len_type, Shape{}, {0}); @@ -61,26 +61,25 @@ ngraph::pass::SimplifyCTCGreedyDecoderSeqLen::SimplifyCTCGreedyDecoderSeqLen() { auto plusT_scalar = std::make_shared(plusT, const_plusT); auto range1T = std::make_shared(start, plusT_scalar, step, seq_len_type); - auto mask_shape = std::make_shared( - OutputVector{T->output(0), N->output(0)}, 0); + auto mask_shape = std::make_shared(OutputVector{T->output(0), N->output(0)}, 0); // Generate 2D tensor [T, N] for seq mask - auto upper_bounds = std::make_shared( - decoder_seq_len->input_value(1), mask_shape->output(0)); - auto transpose_upper_bounds = std::make_shared(upper_bounds->output(0), - ngraph::opset6::Constant::create(seq_len_type, - Shape({2}), {1, 0})); + auto upper_bounds = + std::make_shared(decoder_seq_len->input_value(1), mask_shape->output(0)); + auto transpose_upper_bounds = std::make_shared( + upper_bounds->output(0), + ngraph::opset6::Constant::create(seq_len_type, Shape({2}), {1, 0})); // Compute boolean sequence mask - auto bool_seq_mask = std::make_shared(transpose_upper_bounds->output(0), - range1T->output(0)); + auto bool_seq_mask = + std::make_shared(transpose_upper_bounds->output(0), range1T->output(0)); // Generate resulted seq mask auto mask_val_true = ngraph::opset6::Constant::create(seq_len_type, Shape{}, {1}); auto mask_val_false = ngraph::opset6::Constant::create(seq_len_type, Shape{}, {0}); auto seq_mask = std::make_shared(bool_seq_mask, mask_val_true, mask_val_false); - auto transpose_seq_mask = std::make_shared(seq_mask->output(0), - ngraph::opset6::Constant::create(seq_len_type, - Shape({2}), {1, 0})); + auto transpose_seq_mask = std::make_shared( + seq_mask->output(0), + ngraph::opset6::Constant::create(seq_len_type, Shape({2}), {1, 0})); auto transpose_seq_mask_f = std::make_shared(transpose_seq_mask->output(0), data_type); // Create CTCGreedyDecoder with original merge_repeated attribute and connect data and resulted seq_mask auto decoder = std::make_shared(transpose, @@ -96,8 +95,8 @@ ngraph::pass::SimplifyCTCGreedyDecoderSeqLen::SimplifyCTCGreedyDecoderSeqLen() { element::Type ci_type = decoder_seq_len->get_classes_index_type(); element::Type sl_type = decoder_seq_len->get_sequence_length_type(); - // CTCGreedyDecoder return floating point output. For Normalize output we need to convert output to classes_index_type - // Receive the first output with correct classes_index_type + // CTCGreedyDecoder return floating point output. For Normalize output we need to convert output to + // classes_index_type Receive the first output with correct classes_index_type auto output_i = std::make_shared(squeeze1_output_f->output(0), ci_type); auto minus1 = opset6::Constant::create(ci_type, Shape{}, {-1}); // Get to know where equal -1 @@ -106,18 +105,39 @@ ngraph::pass::SimplifyCTCGreedyDecoderSeqLen::SimplifyCTCGreedyDecoderSeqLen() { // Compute output seq mask auto seq_mask_const0 = opset6::Constant::create(ci_type, Shape{1}, {0}); auto seq_mask_const1 = opset6::Constant::create(ci_type, Shape{1}, {1}); - auto output_seq_mask = std::make_shared(where_equal_minus1, seq_mask_const0, seq_mask_const1); + auto output_seq_mask = + std::make_shared(where_equal_minus1, seq_mask_const0, seq_mask_const1); auto seq_mask_axis = opset6::Constant::create(ci_type, Shape{1}, {1}); // Receive the second output auto output_seq_len = std::make_shared(output_seq_mask, seq_mask_axis); // Receive the second output with correct seq_len_type auto output_seq_len_i = std::make_shared(output_seq_len->output(0), sl_type); - ngraph::copy_runtime_info(decoder_seq_len, {transpose, decoder, data_shape, T, N, plusT, plusT_scalar, range1T, mask_shape, upper_bounds, - squeeze2_output_f, squeeze1_output_f, transpose_upper_bounds, bool_seq_mask, seq_mask, transpose_seq_mask, - transpose_seq_mask_f, output_i, where_equal_minus1, output_seq_mask, output_seq_len, output_seq_len_i}); + ngraph::copy_runtime_info(decoder_seq_len, + {transpose, + decoder, + data_shape, + T, + N, + plusT, + plusT_scalar, + range1T, + mask_shape, + upper_bounds, + squeeze2_output_f, + squeeze1_output_f, + transpose_upper_bounds, + bool_seq_mask, + seq_mask, + transpose_seq_mask, + transpose_seq_mask_f, + output_i, + where_equal_minus1, + output_seq_mask, + output_seq_len, + output_seq_len_i}); - output_i->set_friendly_name(decoder_seq_len->get_friendly_name()+".0"); - output_seq_len_i->set_friendly_name(decoder_seq_len->get_friendly_name()+".1"); + output_i->set_friendly_name(decoder_seq_len->get_friendly_name() + ".0"); + output_seq_len_i->set_friendly_name(decoder_seq_len->get_friendly_name() + ".1"); ngraph::replace_node(decoder_seq_len, {output_i->output(0), output_seq_len_i->output(0)}); return true; diff --git a/src/common/transformations/src/transformations/op_conversions/softmax_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/softmax_decomposition.cpp index 569c5b60a84..f26c33012bf 100644 --- a/src/common/transformations/src/transformations/op_conversions/softmax_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/softmax_decomposition.cpp @@ -2,17 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" -#include - #include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SoftmaxDecomposition, "SoftmaxDecomposition", 0); diff --git a/src/common/transformations/src/transformations/op_conversions/softplus_decomposition.cpp b/src/common/transformations/src/transformations/op_conversions/softplus_decomposition.cpp index 66aef8cdb98..113bda734f5 100644 --- a/src/common/transformations/src/transformations/op_conversions/softplus_decomposition.cpp +++ b/src/common/transformations/src/transformations/op_conversions/softplus_decomposition.cpp @@ -2,15 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "itt.hpp" #include "transformations/op_conversions/softplus_decomposition.hpp" #include +#include +#include +#include #include -#include -#include -#include +#include "itt.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::SoftPlusDecomposition, "SoftPlusDecomposition", 0); @@ -21,7 +21,7 @@ ngraph::pass::SoftPlusDecomposition::SoftPlusDecomposition() { auto softplus = std::make_shared(input); ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) { - auto &pattern_to_output = m.get_pattern_value_map(); + auto& pattern_to_output = m.get_pattern_value_map(); auto softplus_input = pattern_to_output.at(input); auto softplus_node = pattern_to_output.at(softplus).get_node_shared_ptr(); @@ -30,7 +30,8 @@ ngraph::pass::SoftPlusDecomposition::SoftPlusDecomposition() { } auto exp = std::make_shared(softplus_input); - auto add = std::make_shared(exp, + auto add = std::make_shared( + exp, opset4::Constant::create(softplus_input.get_element_type(), ngraph::Shape{1}, {1.0})); auto log = std::make_shared(add); diff --git a/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp b/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp index 15fffe231d1..6886b74b0ef 100644 --- a/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp +++ b/src/common/transformations/src/transformations/opset_conversions/convert_opset2_to_opset1.cpp @@ -4,14 +4,13 @@ #include "transformations/opset_conversions/convert_opset2_to_opset1.hpp" -#include "transformations/op_conversions/convert_batch_to_space.hpp" -#include "transformations/op_conversions/convert_space_to_batch.hpp" -#include "itt.hpp" - #include +#include #include -#include +#include "itt.hpp" +#include "transformations/op_conversions/convert_batch_to_space.hpp" +#include "transformations/op_conversions/convert_space_to_batch.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertOpSet2ToOpSet1, "ConvertOpSet2ToOpSet1", 0); diff --git a/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp b/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp index fb9b36cd6a2..2a910dae4ad 100644 --- a/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp +++ b/src/common/transformations/src/transformations/opset_conversions/convert_opset3_to_opset2.cpp @@ -4,17 +4,16 @@ #include "transformations/opset_conversions/convert_opset3_to_opset2.hpp" +#include +#include +#include + +#include "itt.hpp" #include "transformations/op_conversions/convert_broadcast3.hpp" #include "transformations/op_conversions/convert_shapeof3.hpp" #include "transformations/op_conversions/convert_shuffle_channels3.hpp" #include "transformations/op_conversions/convert_topk3.hpp" #include "transformations/op_conversions/softplus_decomposition.hpp" -#include "itt.hpp" - -#include -#include - -#include NGRAPH_RTTI_DEFINITION(ngraph::pass::ConvertOpSet3ToOpSet2, "ConvertOpSet3ToOpSet2", 0); diff --git a/src/common/transformations/src/transformations/resolve_names_collisions.cpp b/src/common/transformations/src/transformations/resolve_names_collisions.cpp index fda4d38e6dd..a1066b3e0be 100644 --- a/src/common/transformations/src/transformations/resolve_names_collisions.cpp +++ b/src/common/transformations/src/transformations/resolve_names_collisions.cpp @@ -2,11 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "transformations/resolve_names_collisions.hpp" + #include #include #include -#include "transformations/resolve_names_collisions.hpp" #include "openvino/op/parameter.hpp" #include "openvino/op/result.hpp" #include "openvino/op/sink.hpp" @@ -27,7 +28,7 @@ bool ov::pass::ResolveNameCollisions::run_on_model(const std::shared_ptr(node) || - dynamic_cast(node)) { + if (dynamic_cast(node) || dynamic_cast(node)) { // Resolve names for public ops with autogenerated name if (node->m_friendly_name.empty()) nodes_with_conflicts.emplace_back(node); @@ -78,4 +78,3 @@ bool ov::pass::ResolveNameCollisions::run_on_model(const std::shared_ptrget_rt_info().emplace(NmsSelectedIndices::get_type_info_static(), NmsSelectedIndices{}); } -bool ov::has_nms_selected_indices(const Node * node) { +bool ov::has_nms_selected_indices(const Node* node) { return node->get_rt_info().count(NmsSelectedIndices::get_type_info_static()); } diff --git a/src/common/transformations/src/transformations/smart_reshape/broadcast_const_range_replacement.cpp b/src/common/transformations/src/transformations/smart_reshape/broadcast_const_range_replacement.cpp index bc987f6e40f..6b5202fd5bf 100644 --- a/src/common/transformations/src/transformations/smart_reshape/broadcast_const_range_replacement.cpp +++ b/src/common/transformations/src/transformations/smart_reshape/broadcast_const_range_replacement.cpp @@ -2,19 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "transformations/utils/utils.hpp" - #include -#include - #include -#include #include +#include #include #include +#include #include "itt.hpp" - +#include "transformations/utils/utils.hpp" NGRAPH_RTTI_DEFINITION(ngraph::pass::BroadcastConstRangeReplacement, "BroadcastConstRangeReplacement", 0); @@ -35,7 +32,8 @@ ngraph::pass::BroadcastConstRangeReplacement::BroadcastConstRangeReplacement() { const auto data_const_out = broadcast->get_input_source_output(0); const auto target_shape_out = broadcast->get_input_source_output(1); - const auto const_node = std::dynamic_pointer_cast(data_const_out.get_node_shared_ptr()); + const auto const_node = + std::dynamic_pointer_cast(data_const_out.get_node_shared_ptr()); if (!const_node || !const_node->get_element_type().is_integral_number()) return false; @@ -52,19 +50,23 @@ ngraph::pass::BroadcastConstRangeReplacement::BroadcastConstRangeReplacement() { std::vector sequence_pattern(elem_count); std::iota(sequence_pattern.begin(), sequence_pattern.end(), 0); - const auto &const_values = const_node->cast_vector(); + const auto& const_values = const_node->cast_vector(); // Check if the value sequence is contiguous if (const_values != sequence_pattern) return false; const auto data_elem_type = data_const_out.get_element_type(); - const auto target_dim_index = std::distance(const_node_shape.cbegin(), std::find(const_node_shape.cbegin(), const_node_shape.cend(), elem_count)); + const auto target_dim_index = + std::distance(const_node_shape.cbegin(), + std::find(const_node_shape.cbegin(), const_node_shape.cend(), elem_count)); const int64_t target_dim_neg_index = target_dim_index - const_node_shape.size(); const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i32, {}, {0}); - const auto target_dim_index_node = ngraph::opset8::Constant::create(ngraph::element::i64, {}, {target_dim_neg_index}); - const auto gather_dim = std::make_shared(target_shape_out, target_dim_index_node, axis_node); + const auto target_dim_index_node = + ngraph::opset8::Constant::create(ngraph::element::i64, {}, {target_dim_neg_index}); + const auto gather_dim = + std::make_shared(target_shape_out, target_dim_index_node, axis_node); // If the corresponding target dim is 1, use the original end of range const auto one_dim_const = ngraph::opset8::Constant::create(target_shape_out.get_element_type(), {}, {1}); @@ -77,17 +79,31 @@ ngraph::pass::BroadcastConstRangeReplacement::BroadcastConstRangeReplacement() { const auto select_end = std::make_shared(dim_check_one, original_end, cast_gather_dim); const auto default_range_step = ngraph::opset8::Constant::create(data_elem_type, {}, {1}); - const auto range = std::make_shared(start, select_end, default_range_step, data_elem_type); + const auto range = + std::make_shared(start, select_end, default_range_step, data_elem_type); // Unsqueeze the output of the Range op to the original shape of data input std::vector final_shape_axes(const_node_shape.size()); std::iota(final_shape_axes.begin(), final_shape_axes.end(), 0); final_shape_axes.erase(final_shape_axes.begin() + target_dim_index); - const auto axes_to_unsqueeze = ngraph::opset8::Constant::create(ngraph::element::i64, {final_shape_axes.size()}, final_shape_axes); + const auto axes_to_unsqueeze = + ngraph::opset8::Constant::create(ngraph::element::i64, {final_shape_axes.size()}, final_shape_axes); const auto unsqueeze_range = std::make_shared(range, axes_to_unsqueeze); - copy_runtime_info(const_node, {axis_node, target_dim_index_node, gather_dim, cast_gather_dim, one_dim_const, dim_check_one, - start, original_end, select_end, default_range_step, range, axes_to_unsqueeze, unsqueeze_range}); + copy_runtime_info(const_node, + {axis_node, + target_dim_index_node, + gather_dim, + cast_gather_dim, + one_dim_const, + dim_check_one, + start, + original_end, + select_end, + default_range_step, + range, + axes_to_unsqueeze, + unsqueeze_range}); broadcast->input(0).replace_source_output(unsqueeze_range); return false; }; diff --git a/src/common/transformations/src/transformations/smart_reshape/proposal_scales_stridedslice.cpp b/src/common/transformations/src/transformations/smart_reshape/proposal_scales_stridedslice.cpp index c6353c71246..b95fb129b39 100644 --- a/src/common/transformations/src/transformations/smart_reshape/proposal_scales_stridedslice.cpp +++ b/src/common/transformations/src/transformations/smart_reshape/proposal_scales_stridedslice.cpp @@ -48,8 +48,8 @@ ngraph::pass::Proposal1Scales::Proposal1Scales() { (shape[1].get_length() == 3 || shape[1].get_length() == 4); }); auto convert_label = ngraph::pattern::wrap_type({parameter_label}); - auto param_or_convert = std::make_shared(ngraph::OutputVector{parameter_label, - convert_label}); + auto param_or_convert = + std::make_shared(ngraph::OutputVector{parameter_label, convert_label}); auto reshape_label = ngraph::pattern::wrap_type( {param_or_convert, ngraph::pattern::wrap_type()}, [](const Output& output) { @@ -76,8 +76,8 @@ ngraph::pass::Proposal4Scales::Proposal4Scales() { (shape[1].get_length() == 3 || shape[1].get_length() == 4); }); auto convert_label = ngraph::pattern::wrap_type({parameter_label}); - auto param_or_convert = std::make_shared(ngraph::OutputVector{parameter_label, - convert_label}); + auto param_or_convert = + std::make_shared(ngraph::OutputVector{parameter_label, convert_label}); auto reshape_label = ngraph::pattern::wrap_type( {param_or_convert, ngraph::pattern::wrap_type()}, [](const Output& output) { diff --git a/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp b/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp index 789e6e689d5..a440e4e37ec 100644 --- a/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp +++ b/src/common/transformations/src/transformations/smart_reshape/strided_slice_squeeze.cpp @@ -106,8 +106,7 @@ ngraph::pass::StridedSliceSqueeze::StridedSliceSqueeze() { shrink_axis_mask, ellipsis_mask); - return replace_output_update_name(squeeze->output(0), - new_slice->output(squeeze->input_value(0).get_index())); + return replace_output_update_name(squeeze->output(0), new_slice->output(squeeze->input_value(0).get_index())); }; auto m = std::make_shared(squeeze_label /*, matcher_name */); register_matcher(m, callback); @@ -125,11 +124,13 @@ ngraph::pass::SqueezeStridedSlice::SqueezeStridedSlice() { matcher_pass_callback callback = [](pattern::Matcher& m) -> bool { auto slice = std::dynamic_pointer_cast(m.get_match_root()); - if (!slice) return false; + if (!slice) + return false; auto squeeze = slice->get_input_node_shared_ptr(0); const auto& const_axes = std::dynamic_pointer_cast(squeeze->get_input_node_shared_ptr(1)); - if (!const_axes) return false; + if (!const_axes) + return false; auto begin = std::dynamic_pointer_cast(slice->input_value(1).get_node_shared_ptr()); auto end = std::dynamic_pointer_cast(slice->input_value(2).get_node_shared_ptr()); diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp index b0a844e27bc..c5164fa3d21 100644 --- a/src/common/transformations/src/transformations/utils/utils.cpp +++ b/src/common/transformations/src/transformations/utils/utils.cpp @@ -8,12 +8,12 @@ #include #include -#include #include #include -#include #include +#include #include +#include namespace ngraph { namespace op { @@ -50,8 +50,7 @@ bool get_single_value(const std::shared_ptr& const_node, float& va } } -std::shared_ptr normalize_constant(const std::shared_ptr& constant, - const PartialShape& shape) { +std::shared_ptr normalize_constant(const std::shared_ptr& constant, const PartialShape& shape) { auto const_shape = constant->get_shape(); if (static_cast(const_shape.size()) == shape.rank().get_length()) { return constant; @@ -65,11 +64,14 @@ std::shared_ptr normalize_constant(const std::shared_ptr& co } std::shared_ptr broadcastTo(const Output& input, const ngraph::Shape& shape) { - return std::make_shared(input, op::Constant::create(ngraph::element::i64, Shape {shape.size()}, shape)); + return std::make_shared(input, + op::Constant::create(ngraph::element::i64, Shape{shape.size()}, shape)); } -std::shared_ptr reshapeTo(const Output & input, const Shape& shape) { - return std::make_shared(input, op::Constant::create(element::i64, Shape{shape.size()}, shape), true); +std::shared_ptr reshapeTo(const Output& input, const Shape& shape) { + return std::make_shared(input, + op::Constant::create(element::i64, Shape{shape.size()}, shape), + true); } bool constantIsEqualTo(const std::shared_ptr& const_node, float value, float eps) { @@ -81,16 +83,17 @@ bool constantIsEqualTo(const std::shared_ptr& const_node, return std::abs(res - value) < eps; } -bool has_f16_constants(const std::shared_ptr &function) { - for (auto & layer : function->get_ops()) { - if (std::dynamic_pointer_cast(layer) && layer->output(0).get_element_type() == ngraph::element::f16) { +bool has_f16_constants(const std::shared_ptr& function) { + for (auto& layer : function->get_ops()) { + if (std::dynamic_pointer_cast(layer) && + layer->output(0).get_element_type() == ngraph::element::f16) { return true; } } return false; } -bool check_for_broadcast(const ngraph::PartialShape &ref_shape, const ngraph::PartialShape &other_shape) { +bool check_for_broadcast(const ngraph::PartialShape& ref_shape, const ngraph::PartialShape& other_shape) { // Check that other_shape doesn't broadcast ref_shape if (ref_shape.rank().is_dynamic() || other_shape.rank().is_dynamic() || other_shape.size() > ref_shape.size()) { return true; @@ -111,7 +114,8 @@ bool check_for_broadcast(const ngraph::PartialShape &ref_shape, const ngraph::Pa return false; } -std::shared_ptr activation(const std::string& activation_name, const ngraph::Output& apply_to) { +std::shared_ptr activation(const std::string& activation_name, + const ngraph::Output& apply_to) { if (activation_name == "relu") { return std::make_shared(apply_to); } else if (activation_name == "sigmoid") { @@ -123,9 +127,9 @@ std::shared_ptr activation(const std::string& activation_name, con } } -bool is_seq_len_provided(const std::shared_ptr &seq_len_input, int64_t max_seq_len) { - if (const auto &seq_len_const = std::dynamic_pointer_cast(seq_len_input)) { - const auto &seq_len_values = seq_len_const->cast_vector(); +bool is_seq_len_provided(const std::shared_ptr& seq_len_input, int64_t max_seq_len) { + if (const auto& seq_len_const = std::dynamic_pointer_cast(seq_len_input)) { + const auto& seq_len_values = seq_len_const->cast_vector(); return std::any_of(seq_len_values.begin(), seq_len_values.end(), [max_seq_len](const int64_t val) { return val != max_seq_len; }); @@ -155,21 +159,23 @@ std::vector> get_node_target_inputs(const std::shared_ptr& nod return result; } -std::shared_ptr node_to_get_shape_value_of_indices_from_shape_node(const std::shared_ptr& shape_node, - const std::vector& indices) { - return make_try_fold( - shape_node, - v0::Constant::create(ngraph::element::i64, {indices.size()}, indices), - v0::Constant::create(ngraph::element::i64, {}, {0})); +std::shared_ptr node_to_get_shape_value_of_indices_from_shape_node( + const std::shared_ptr& shape_node, + const std::vector& indices) { + return make_try_fold(shape_node, + v0::Constant::create(ngraph::element::i64, {indices.size()}, indices), + v0::Constant::create(ngraph::element::i64, {}, {0})); } -std::shared_ptr node_to_get_shape_value_of_indices_from_shape_source(const ngraph::Output& shape_source, +std::shared_ptr node_to_get_shape_value_of_indices_from_shape_source( + const ngraph::Output& shape_source, const std::vector& indices) { const auto& shape_node = make_try_fold(shape_source); return node_to_get_shape_value_of_indices_from_shape_node(shape_node, indices); } -bool shapes_equal_except_dynamic_expected_batch(const ngraph::PartialShape& expected, const ngraph::PartialShape& actual) { +bool shapes_equal_except_dynamic_expected_batch(const ngraph::PartialShape& expected, + const ngraph::PartialShape& actual) { if (expected[0].is_static()) { return actual == expected; } else { @@ -179,9 +185,7 @@ bool shapes_equal_except_dynamic_expected_batch(const ngraph::PartialShape& expe } } -void visit_shape_path(Node * node, - std::unordered_set& visited, - std::function func) { +void visit_shape_path(Node* node, std::unordered_set& visited, std::function func) { if (!node) return; visited.insert(node); @@ -190,7 +194,8 @@ void visit_shape_path(Node * node, auto curr_node = nodes.front(); nodes.pop_front(); // Do not check if already visited - if (ngraph::is_type(curr_node) || ngraph::is_type(curr_node)) { + if (ngraph::is_type(curr_node) || + ngraph::is_type(curr_node)) { continue; } @@ -198,7 +203,8 @@ void visit_shape_path(Node * node, for (auto& input_value : curr_node->input_values()) { // continue searching const auto& input_node = input_value.get_node(); - if (visited.count(input_node)) continue; + if (visited.count(input_node)) + continue; nodes.push_front(input_node); visited.insert(input_node); } @@ -238,10 +244,10 @@ bool is_dequantization_subgraph(const Output& node) { return input_type.is_integral() && output_type.is_real(); } -bool can_eliminate_eltwise_node(const std::shared_ptr& eltwise, const Output& constant, const Output& non_constant_input) { - if (!is_type(eltwise) && - !is_type(eltwise) && - !is_type(eltwise) && +bool can_eliminate_eltwise_node(const std::shared_ptr& eltwise, + const Output& constant, + const Output& non_constant_input) { + if (!is_type(eltwise) && !is_type(eltwise) && !is_type(eltwise) && !is_type(eltwise)) { return false; } @@ -261,42 +267,41 @@ bool can_eliminate_eltwise_node(const std::shared_ptr& eltwise, const Outp float actual_const = 0; const void* data_ptr = constant_ptr->get_data_ptr(); switch (constant_ptr->get_element_type()) { - case element::f32: - actual_const = reinterpret_cast(data_ptr)[0]; - break; - case element::i32: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::u32: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::i64: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::u64: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::i8: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::u8: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::i16: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::u16: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - case element::f64: - actual_const = static_cast(reinterpret_cast(data_ptr)[0]); - break; - default: - return false; + case element::f32: + actual_const = reinterpret_cast(data_ptr)[0]; + break; + case element::i32: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::u32: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::i64: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::u64: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::i8: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::u8: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::i16: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::u16: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + case element::f64: + actual_const = static_cast(reinterpret_cast(data_ptr)[0]); + break; + default: + return false; } float expected_const = 0; - if (is_type(eltwise) || - is_type(eltwise)) { + if (is_type(eltwise) || is_type(eltwise)) { expected_const = 1; } if (actual_const != expected_const) { From 32edd596e3a48c01a7a507da75d86dadff669bcb Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Fri, 4 Mar 2022 14:42:16 +0300 Subject: [PATCH 182/310] [IE TESTS] Functional test review: Part 4 (#10772) * [IE TESTS] Move specific import_export_tests to gna and myriad * add --- .../base}/import_export_base.cpp | 2 +- .../base}/import_export_base.hpp | 0 .../import_export_batch_size.cpp | 2 +- .../import_export_multi_inputs.cpp | 2 +- .../import_reshape_permute_conv.cpp | 156 ++++++++++++++++++ .../import_reshape_permute_conv.cpp | 117 ------------- .../myriad/import_export/import_nonzero.cpp | 116 +++++++++++++ .../import_export_tests/import_nonzero.cpp | 41 ----- .../import_export_tests/import_nonzero.hpp | 16 -- .../import_reshape_permute_conv.hpp | 16 -- .../import_export_tests/import_nonzero.cpp | 27 --- .../import_reshape_permute_conv.cpp | 44 ----- 12 files changed, 275 insertions(+), 264 deletions(-) rename src/tests/functional/plugin/{shared/src/base/import_export_base => gna/Import_export_tests/base}/import_export_base.cpp (98%) rename src/tests/functional/plugin/{shared/include/base/import_export_base => gna/Import_export_tests/base}/import_export_base.hpp (100%) rename src/tests/functional/plugin/gna/{shared_tests_instances/import_export_tests => Import_export_tests}/import_export_multi_inputs.cpp (98%) create mode 100644 src/tests/functional/plugin/gna/Import_export_tests/import_reshape_permute_conv.cpp delete mode 100644 src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp create mode 100644 src/tests/functional/plugin/myriad/import_export/import_nonzero.cpp delete mode 100644 src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp delete mode 100644 src/tests/functional/plugin/shared/include/import_export_tests/import_nonzero.hpp delete mode 100644 src/tests/functional/plugin/shared/include/import_export_tests/import_reshape_permute_conv.hpp delete mode 100644 src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp delete mode 100644 src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp diff --git a/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp b/src/tests/functional/plugin/gna/Import_export_tests/base/import_export_base.cpp similarity index 98% rename from src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp rename to src/tests/functional/plugin/gna/Import_export_tests/base/import_export_base.cpp index 1607d2cf3e1..3c371158ff0 100644 --- a/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp +++ b/src/tests/functional/plugin/gna/Import_export_tests/base/import_export_base.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "base/import_export_base/import_export_base.hpp" +#include "import_export_base.hpp" #include diff --git a/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp b/src/tests/functional/plugin/gna/Import_export_tests/base/import_export_base.hpp similarity index 100% rename from src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp rename to src/tests/functional/plugin/gna/Import_export_tests/base/import_export_base.hpp diff --git a/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp b/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp index 2907da9338a..d916b55f0bc 100644 --- a/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp +++ b/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp @@ -13,7 +13,7 @@ #include #include "ngraph_functions/builders.hpp" -#include "base/import_export_base/import_export_base.hpp" +#include "base/import_export_base.hpp" namespace LayerTestDefinitions { diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp b/src/tests/functional/plugin/gna/Import_export_tests/import_export_multi_inputs.cpp similarity index 98% rename from src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp rename to src/tests/functional/plugin/gna/Import_export_tests/import_export_multi_inputs.cpp index bf465027f81..bfe4f6f269b 100644 --- a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp +++ b/src/tests/functional/plugin/gna/Import_export_tests/import_export_multi_inputs.cpp @@ -10,7 +10,7 @@ #include #include "ngraph_functions/builders.hpp" -#include "base/import_export_base/import_export_base.hpp" +#include "base/import_export_base.hpp" namespace LayerTestsDefinitions { diff --git a/src/tests/functional/plugin/gna/Import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/gna/Import_export_tests/import_reshape_permute_conv.cpp new file mode 100644 index 00000000000..d5a6e201bae --- /dev/null +++ b/src/tests/functional/plugin/gna/Import_export_tests/import_reshape_permute_conv.cpp @@ -0,0 +1,156 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "base/import_export_base.hpp" +#include "ngraph_functions/builders.hpp" + +namespace LayerTestsDefinitions { + +class ImportReshapePermuteConv : public FuncTestUtils::ImportNetworkTestBase { +protected: + void SetUp() override { + std::vector inputShape; + InferenceEngine::Precision netPrecision; + std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto params = ngraph::builder::makeParams(ngPrc, { inputShape }); + + std::vector outFormShapes1 = { 1, 1, 168, 2 }; + auto pattern1 = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1); + auto reshape1 = std::make_shared(params[0], pattern1, false); + + auto permute1 = std::make_shared(reshape1, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 })); + + auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, + ngraph::op::PadType::VALID, 12); + + auto permute2 = std::make_shared(conv1, + ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 })); + + std::vector outFormShapes2 = { 1, 1932 }; + auto pattern2 = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2); + auto reshape2 = std::make_shared(permute2, pattern2, false); + + ngraph::ResultVector results{ std::make_shared(reshape2) }; + function = std::make_shared(results, params, "ExportImportNetwork"); + }; +}; + +TEST_P(ImportReshapePermuteConv, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions + +using namespace LayerTestsDefinitions; + +namespace { + +class ImportExportGNAModelUnchanged : public ImportReshapePermuteConv { +private: + void exportImportNetwork() override { + { + std::ofstream out(fileName); + out.write(applicationHeader.c_str(), applicationHeader.size()); + executableNetwork.Export(out); + } + { + std::string appHeader(applicationHeader.size(), ' '); + std::fstream inputStream(fileName, std::ios_base::in | std::ios_base::binary); + if (inputStream.fail()) { + FAIL() << "Cannot open file to import model: " << fileName; + } + inputStream.read(&appHeader[0], applicationHeader.size()); + ASSERT_EQ(appHeader, applicationHeader); + executableNetwork = core->ImportNetwork(inputStream, targetDevice, configuration); + } + } + +protected: + void TearDown() override { + if (remove(fileName.c_str()) != 0) { + FAIL() << "Error: could not delete file " << fileName; + } + } + +private: + std::string fileName = "exported_model.blob"; +}; + +class ImportExportGNAModelChanged : public ImportExportGNAModelUnchanged {}; + +TEST_P(ImportExportGNAModelUnchanged, ReshapePermuteConv) { + TestRun(false); +}; + +TEST_P(ImportExportGNAModelChanged, ReshapePermuteConv) { + TestRun(true); +}; + +const std::vector> inputShapes = { + {1, 336} +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> exportConfigs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "327.67"} + } +}; + +const std::vector> importConfigsChanged = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "32767"} + } +}; + +const std::vector> importConfigsUnchanged = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "327.67"} + }, + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "1"} + }, + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"} + } +}; + +const std::vector appHeaders = { + "", + "APPLICATION_HEADER" +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelUnchanged, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(exportConfigs), + ::testing::ValuesIn(importConfigsUnchanged), + ::testing::ValuesIn(appHeaders)), + ImportExportGNAModelUnchanged::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelChanged, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(exportConfigs), + ::testing::ValuesIn(importConfigsChanged), + ::testing::ValuesIn(appHeaders)), + ImportExportGNAModelChanged::getTestCaseName); + +} // namespace + diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp deleted file mode 100644 index 9c1d515244d..00000000000 --- a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "import_export_tests/import_reshape_permute_conv.hpp" - -#include -#include - -using namespace LayerTestsDefinitions; - -namespace { - -class ImportExportGNAModelUnchanged : public ImportReshapePermuteConv { -private: - void exportImportNetwork() override { - { - std::ofstream out(fileName); - out.write(applicationHeader.c_str(), applicationHeader.size()); - executableNetwork.Export(out); - } - { - std::string appHeader(applicationHeader.size(), ' '); - std::fstream inputStream(fileName, std::ios_base::in | std::ios_base::binary); - if (inputStream.fail()) { - FAIL() << "Cannot open file to import model: " << fileName; - } - inputStream.read(&appHeader[0], applicationHeader.size()); - ASSERT_EQ(appHeader, applicationHeader); - executableNetwork = core->ImportNetwork(inputStream, targetDevice, configuration); - } - } - -protected: - void TearDown() override { - if (remove(fileName.c_str()) != 0) { - FAIL() << "Error: could not delete file " << fileName; - } - } - -private: - std::string fileName = "exported_model.blob"; -}; - -class ImportExportGNAModelChanged : public ImportExportGNAModelUnchanged {}; - -TEST_P(ImportExportGNAModelUnchanged, ReshapePermuteConv) { - TestRun(false); -}; - -TEST_P(ImportExportGNAModelChanged, ReshapePermuteConv) { - TestRun(true); -}; - -const std::vector> inputShapes = { - {1, 336} -}; - -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, - InferenceEngine::Precision::FP16 -}; - -const std::vector> exportConfigs = { - { - {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, - {"GNA_SCALE_FACTOR_0", "327.67"} - } -}; - -const std::vector> importConfigsChanged = { - { - {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, - {"GNA_SCALE_FACTOR_0", "32767"} - } -}; - -const std::vector> importConfigsUnchanged = { - { - {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, - {"GNA_SCALE_FACTOR_0", "327.67"} - }, - { - {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, - {"GNA_SCALE_FACTOR_0", "1"} - }, - { - {"GNA_DEVICE_MODE", "GNA_SW_EXACT"} - } -}; - -const std::vector appHeaders = { - "", - "APPLICATION_HEADER" -}; - -INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelUnchanged, - ::testing::Combine( - ::testing::ValuesIn(inputShapes), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(CommonTestUtils::DEVICE_GNA), - ::testing::ValuesIn(exportConfigs), - ::testing::ValuesIn(importConfigsUnchanged), - ::testing::ValuesIn(appHeaders)), - ImportExportGNAModelUnchanged::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelChanged, - ::testing::Combine( - ::testing::ValuesIn(inputShapes), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(CommonTestUtils::DEVICE_GNA), - ::testing::ValuesIn(exportConfigs), - ::testing::ValuesIn(importConfigsChanged), - ::testing::ValuesIn(appHeaders)), - ImportExportGNAModelChanged::getTestCaseName); - -} // namespace diff --git a/src/tests/functional/plugin/myriad/import_export/import_nonzero.cpp b/src/tests/functional/plugin/myriad/import_export/import_nonzero.cpp new file mode 100644 index 00000000000..14d3aace585 --- /dev/null +++ b/src/tests/functional/plugin/myriad/import_export/import_nonzero.cpp @@ -0,0 +1,116 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/opsets/opset5.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" + +namespace LayerTestsDefinitions { + +typedef std::tuple< + std::vector, // Input Shape + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::string // Application Header +> exportImportNetworkParams; + +class ImportNonZero : public testing::WithParamInterface, + virtual public LayerTestsUtils::LayerTestsCommon { +protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + ngraph::Shape inputShape; + std::tie(inputShape, netPrecision, targetDevice, applicationHeader) = this->GetParam(); + const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + const auto parameter = std::make_shared(ngPrc, inputShape); + const auto nonZero = std::make_shared(parameter); + + function = std::make_shared(nonZero->outputs(), ngraph::ParameterVector{parameter}, "ExportImportNetwork"); + functionRefs = ngraph::clone_function(*function); + } + + void exportImportNetwork() { + std::stringstream strm; + strm.write(applicationHeader.c_str(), applicationHeader.size()); + executableNetwork.Export(strm); + + strm.seekg(0, strm.beg); + std::string appHeader(applicationHeader.size(), ' '); + strm.read(&appHeader[0], applicationHeader.size()); + ASSERT_EQ(appHeader, applicationHeader); + executableNetwork = core->ImportNetwork(strm, targetDevice, configuration); + } + + void Run() override { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + functionRefs = ngraph::clone_function(*function); + // load export configuration and save outputs + LoadNetwork(); + GenerateInputs(); + Infer(); + auto actualOutputs = GetOutputs(); + + auto referenceOutputs = CalculateRefs(); + Compare(referenceOutputs, actualOutputs); + + const auto compiledExecNetwork = executableNetwork; + exportImportNetwork(); + const auto importedExecNetwork = executableNetwork; + + GenerateInputs(); + Infer(); + + ASSERT_EQ(importedExecNetwork.GetInputsInfo().size(), compiledExecNetwork.GetInputsInfo().size()); + ASSERT_EQ(importedExecNetwork.GetOutputsInfo().size(), compiledExecNetwork.GetOutputsInfo().size()); + + for (const auto& next_input : importedExecNetwork.GetInputsInfo()) { + ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]); + Compare(next_input.second->getTensorDesc(), compiledExecNetwork.GetInputsInfo()[next_input.first]->getTensorDesc()); + } + for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) { + ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]); + } + auto importedOutputs = GetOutputs(); + + ASSERT_EQ(actualOutputs.size(), importedOutputs.size()); + + for (size_t i = 0; i < actualOutputs.size(); i++) { + Compare(actualOutputs[i]->getTensorDesc(), importedOutputs[i]->getTensorDesc()); + Compare(actualOutputs[i], importedOutputs[i]); + } + } + + + std::string applicationHeader; +}; + +TEST_P(ImportNonZero, CompareWithRefImpl) { + Run(); +}; + +} // namespace LayerTestsDefinitions + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, +}; + +const std::vector appHeaders = { + "", + "APPLICATION_HEADER" +}; + +std::vector inputShape = ngraph::Shape{1000}; + +INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkCase, ImportNonZero, + ::testing::Combine( + ::testing::Values(inputShape), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), + ::testing::ValuesIn(appHeaders))); + +} // namespace diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp deleted file mode 100644 index 0a415c98a3a..00000000000 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "import_export_tests/import_nonzero.hpp" -#include "vpu/private_plugin_config.hpp" - -using namespace LayerTestsDefinitions; - -namespace { - -const std::vector netPrecisions = { - InferenceEngine::Precision::FP32, -}; - -const std::vector> exportConfigs = { - {} -}; - -const std::vector> importConfigs = { - {} -}; - -const std::vector appHeaders = { - "", - "APPLICATION_HEADER" -}; - -std::vector inputShape = ngraph::Shape{1000}; - -INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkCase, ImportNonZero, - ::testing::Combine( - ::testing::Values(inputShape), - ::testing::ValuesIn(netPrecisions), - ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), - ::testing::ValuesIn(exportConfigs), - ::testing::ValuesIn(importConfigs), - ::testing::ValuesIn(appHeaders)), - ImportNonZero::getTestCaseName); - -} // namespace diff --git a/src/tests/functional/plugin/shared/include/import_export_tests/import_nonzero.hpp b/src/tests/functional/plugin/shared/include/import_export_tests/import_nonzero.hpp deleted file mode 100644 index d40dcae248c..00000000000 --- a/src/tests/functional/plugin/shared/include/import_export_tests/import_nonzero.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "base/import_export_base/import_export_base.hpp" - -namespace LayerTestsDefinitions { - -class ImportNonZero : public FuncTestUtils::ImportNetworkTestBase { -protected: - void SetUp() override; -}; - -} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/plugin/shared/include/import_export_tests/import_reshape_permute_conv.hpp b/src/tests/functional/plugin/shared/include/import_export_tests/import_reshape_permute_conv.hpp deleted file mode 100644 index d95efdaea02..00000000000 --- a/src/tests/functional/plugin/shared/include/import_export_tests/import_reshape_permute_conv.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "base/import_export_base/import_export_base.hpp" - -namespace LayerTestsDefinitions { - -class ImportReshapePermuteConv : public FuncTestUtils::ImportNetworkTestBase { -protected: - void SetUp() override; -}; - -} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp b/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp deleted file mode 100644 index d365d365c1f..00000000000 --- a/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "import_export_tests/import_nonzero.hpp" - -#include "ngraph/opsets/opset5.hpp" - -namespace LayerTestsDefinitions { - -void ImportNonZero::SetUp() { - InferenceEngine::Precision netPrecision; - ngraph::Shape inputShape; - std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); - const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - - const auto parameter = std::make_shared(ngPrc, inputShape); - const auto nonZero = std::make_shared(parameter); - - function = std::make_shared(nonZero->outputs(), ngraph::ParameterVector{parameter}, "ExportImportNetwork"); -} - -TEST_P(ImportNonZero, CompareWithRefImpl) { - Run(); -}; - -} // namespace LayerTestsDefinitions diff --git a/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp deleted file mode 100644 index 6d6bbf223fa..00000000000 --- a/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (C) 2018-2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "import_export_tests/import_reshape_permute_conv.hpp" - -#include "ngraph_functions/builders.hpp" - -namespace LayerTestsDefinitions { - -void ImportReshapePermuteConv::SetUp() { - std::vector inputShape; - InferenceEngine::Precision netPrecision; - std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - - auto params = ngraph::builder::makeParams(ngPrc, { inputShape }); - - std::vector outFormShapes1 = { 1, 1, 168, 2 }; - auto pattern1 = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1); - auto reshape1 = std::make_shared(params[0], pattern1, false); - - auto permute1 = std::make_shared(reshape1, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 3, 1, 2 })); - - auto conv1 = ngraph::builder::makeConvolution(permute1, ngPrc, { 1, 8 }, { 1, 1 }, { 0, 0 }, { 0, 0 }, { 1, 1 }, - ngraph::op::PadType::VALID, 12); - - auto permute2 = std::make_shared(conv1, - ngraph::opset1::Constant::create(ngraph::element::i64, ngraph::Shape{ 4 }, { 0, 2, 3, 1 })); - - std::vector outFormShapes2 = { 1, 1932 }; - auto pattern2 = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{ 2 }, outFormShapes2); - auto reshape2 = std::make_shared(permute2, pattern2, false); - - ngraph::ResultVector results{ std::make_shared(reshape2) }; - function = std::make_shared(results, params, "ExportImportNetwork"); -} - -TEST_P(ImportReshapePermuteConv, CompareWithRefImpl) { - Run(); -}; - -} // namespace LayerTestsDefinitions From 69ad9e80e1e69fb740b5f891559a6dfd17fb9552 Mon Sep 17 00:00:00 2001 From: Nikita Malinin Date: Fri, 4 Mar 2022 14:50:44 +0300 Subject: [PATCH 183/310] [POT] Update OverflowCorrection algo for nodes without bias (#10687) * Update OverflowCorrection algo for nodes without bias * Pylint line fix * Update OC with the last add name * Pylint fix --- .../overflow_correction/algorithm.py | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/overflow_correction/algorithm.py b/tools/pot/openvino/tools/pot/algorithms/quantization/overflow_correction/algorithm.py index d6f351a35a7..d65e5e42215 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/overflow_correction/algorithm.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/overflow_correction/algorithm.py @@ -44,12 +44,11 @@ class OverflowCorrection(Algorithm): weighted_nodes = [n for n in weighted_nodes if nu.node_with_quantized_weights(n)] for weighted_node in weighted_nodes: bias_node = nu.get_bias_for_node(weighted_node) - if bias_node is None: - continue - add_node = nu.get_node_output(bias_node, 0)[0] - add_node_name = add_node.fullname - if add_node_name not in activation_statistics \ - or 'max_per_tensor' not in activation_statistics[add_node_name]: + output_node = weighted_node if bias_node is None else nu.get_node_output(bias_node, 0)[0] + output_node_name = output_node['orig_node_name'] if 'orig_node_name' in output_node \ + else output_node.fullname + if output_node_name not in activation_statistics \ + or 'max_per_tensor' not in activation_statistics[output_node_name]: logger.debug('Skipping {}'.format(weighted_node.fullname)) continue logger.debug('Processing {}'.format(weighted_node.fullname)) @@ -57,7 +56,8 @@ class OverflowCorrection(Algorithm): if weight_fq.levels <= np.iinfo(np.uint8).max: logger.debug('Skipping {} due to INT8 weights quantization'.format(weighted_node.fullname)) continue - rescale_value = correct_node_overflow(weighted_node, activation_statistics[add_node_name]['max_per_tensor']) + rescale_value = correct_node_overflow(weighted_node, + activation_statistics[output_node_name]['max_per_tensor']) if rescale_value: logger.debug('Weights and scales for node {} ' 'updated with scale coefficient: {}'.format(weighted_node.fullname, rescale_value)) @@ -69,10 +69,8 @@ class OverflowCorrection(Algorithm): stats_layout = {} for conv_node in conv_nodes: bias_node = nu.get_bias_for_node(conv_node) - if bias_node is None: - continue - add_node = nu.get_node_output(bias_node, 0)[0] - stats_layout[add_node.fullname] = {'max_per_tensor': acf.abs_max_per_tensor} + output_node = conv_node if bias_node is None else nu.get_node_output(bias_node, 0)[0] + stats_layout[output_node.fullname] = {'max_per_tensor': acf.abs_max_per_tensor} quantized_model = deepcopy(model) fqut.insert_fake_quantize_nodes(self._config, quantized_model) layers_mapping = fqut.create_renamed_layers_mapping(quantized_model, stats_layout) From 7e8bbf4968dd7b94000f3bf6fa35672262aaf456 Mon Sep 17 00:00:00 2001 From: Anuj Mittal Date: Fri, 4 Mar 2022 20:41:37 +0800 Subject: [PATCH 184/310] installing-openvino-yocto.md: fix install instructions (#10785) Change _ to : as per the new override syntax. Signed-off-by: Anuj Mittal --- docs/install_guides/installing-openvino-yocto.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/install_guides/installing-openvino-yocto.md b/docs/install_guides/installing-openvino-yocto.md index af85024de05..4842a34edd1 100644 --- a/docs/install_guides/installing-openvino-yocto.md +++ b/docs/install_guides/installing-openvino-yocto.md @@ -43,26 +43,26 @@ MACHINE = "intel-skylake-64" # Enable clDNN GPU plugin when needed. # This requires meta-clang and meta-oe layers to be included in bblayers.conf # and is not enabled by default. -PACKAGECONFIG_append_pn-openvino-inference-engine = " opencl" +PACKAGECONFIG:append:pn-openvino-inference-engine = " opencl" # Enable building OpenVINO Python API. # This requires meta-python layer to be included in bblayers.conf. -PACKAGECONFIG_append_pn-openvino-inference-engine = " python3" +PACKAGECONFIG:append:pn-openvino-inference-engine = " python3" # This adds OpenVINO related libraries in the target image. -CORE_IMAGE_EXTRA_INSTALL_append = " openvino-inference-engine" +CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine" # This adds OpenVINO samples in the target image. -CORE_IMAGE_EXTRA_INSTALL_append = " openvino-inference-engine-samples" +CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-samples" # Include OpenVINO Python API package in the target image. -CORE_IMAGE_EXTRA_INSTALL_append = " openvino-inference-engine-python3" +CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-python3" # Enable MYRIAD plugin -CORE_IMAGE_EXTRA_INSTALL_append = " openvino-inference-engine-vpu-firmware" +CORE_IMAGE_EXTRA_INSTALL:append = " openvino-inference-engine-vpu-firmware" # Include Model Optimizer in the target image. -CORE_IMAGE_EXTRA_INSTALL_append = " openvino-model-optimizer" +CORE_IMAGE_EXTRA_INSTALL:append = " openvino-model-optimizer" ``` ## Step 2: Build a Yocto Image with OpenVINO Packages From c28cebb2a689bb0c119a32234f995bfd963383ef Mon Sep 17 00:00:00 2001 From: Dmitry Pigasin Date: Fri, 4 Mar 2022 15:41:47 +0300 Subject: [PATCH 185/310] [CPP Speech Sample] Fix result saving when batch size is not 1 (#10714) * Fix result saving when batch size is not 1 * Remove useless if statement * improved processing scores for model with more than one outputs * added checking on count of model outputs * improve if statements * divide fix for model with several outputs to other PR Co-authored-by: Maxim Gordeev --- samples/cpp/speech_sample/main.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/cpp/speech_sample/main.cpp b/samples/cpp/speech_sample/main.cpp index 431bddee3e6..7ebc4adde8c 100644 --- a/samples/cpp/speech_sample/main.cpp +++ b/samples/cpp/speech_sample/main.cpp @@ -494,9 +494,9 @@ int main(int argc, char* argv[]) { if (!FLAGS_o.empty()) { /* Prepare output data for save to file in future */ - auto outputFrame = - &vectorPtrScores[next_output].front() + - numScoresPerOutput[next_output] * sizeof(float) * (inferRequest.frameIndex); + auto outputFrame = &vectorPtrScores[next_output].front() + + numScoresPerOutput[next_output] * sizeof(float) * + (inferRequest.frameIndex) / batchSize; ov::Tensor outputBlob = inferRequest.inferRequest.get_tensor(executableNet.output(outputName)); @@ -653,7 +653,7 @@ int main(int argc, char* argv[]) { uttName, &vectorPtrScores[next_output].front(), numFramesFile, - numScoresPerOutput[next_output]); + numScoresPerOutput[next_output] / batchSize); } if (!FLAGS_r.empty()) { // print statistical score error From cb9049076b04a3ea67e5abe3298d73cf6bc65be8 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Fri, 4 Mar 2022 18:40:18 +0300 Subject: [PATCH 186/310] Enabled clang-format for cc and itt libs (#10793) --- .../conditional_compilation/CMakeLists.txt | 2 +- .../include/openvino/cc/factory.h | 70 +++++---- .../include/openvino/cc/pass/itt.hpp | 35 ++--- .../include/openvino/cc/selective_build.h | 142 +++++++++--------- src/common/itt/CMakeLists.txt | 2 +- src/common/itt/src/itt.cpp | 29 ++-- src/common/util/.clang-format | 28 ---- 7 files changed, 136 insertions(+), 172 deletions(-) delete mode 100644 src/common/util/.clang-format diff --git a/src/common/conditional_compilation/CMakeLists.txt b/src/common/conditional_compilation/CMakeLists.txt index 1d0a4a568ab..04aee579926 100644 --- a/src/common/conditional_compilation/CMakeLists.txt +++ b/src/common/conditional_compilation/CMakeLists.txt @@ -49,5 +49,5 @@ endif() ov_install_static_lib(${TARGET_NAME} core) file(GLOB_RECURSE hdrs ${CMAKE_CURRENT_SOURCE_DIR}/include/*.h ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) -add_cpplint_target(${TARGET_NAME}_cpplint FOR_SOURCES ${hdrs}) +add_clang_format_target(${TARGET_NAME}_clang FOR_SOURCES ${hdrs}) openvino_developer_export_targets(COMPONENT core TARGETS openvino::conditional_compilation) diff --git a/src/common/conditional_compilation/include/openvino/cc/factory.h b/src/common/conditional_compilation/include/openvino/cc/factory.h index 822098aa834..532c6916f4e 100644 --- a/src/common/conditional_compilation/include/openvino/cc/factory.h +++ b/src/common/conditional_compilation/include/openvino/cc/factory.h @@ -3,19 +3,20 @@ // #pragma once -#include "selective_build.h" -#include #include +#include #include #include +#include "selective_build.h" + namespace openvino { namespace cc { -template +template class Factory; -template +template class Factory { Factory(Factory const&) = delete; Factory& operator=(Factory const&) = delete; @@ -23,27 +24,25 @@ class Factory { public: using builder_t = std::function; - Factory(const std::string & name) - : name(name) {} + Factory(const std::string& name) : name(name) {} #ifdef SELECTIVE_BUILD - #define registerNodeIfRequired(Module, Name, key, Impl) \ - OV_PP_EXPAND(OV_PP_CAT(registerImpl, OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, Name)))(key)) - #define createNodeIfRegistered(Module, key, ...) createImpl(key, __VA_ARGS__) +# define registerNodeIfRequired(Module, Name, key, Impl) \ + OV_PP_EXPAND(OV_PP_CAT(registerImpl, OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(Module, _, Name))) < Impl > (key)) +# define createNodeIfRegistered(Module, key, ...) createImpl(key, __VA_ARGS__) - template - void registerImpl0(const Key &) { - } + template + void registerImpl0(const Key&) {} - template - void registerImpl1(const Key & key) { + template + void registerImpl1(const Key& key) { builders[key] = [](Args... args) -> T { - Impl *impl = new Impl(args...); + Impl* impl = new Impl(args...); return static_cast(impl); }; } - T createImpl(const Key & key, Args... args) { + T createImpl(const Key& key, Args... args) { auto builder = builders.find(key); if (builder != builders.end()) { return builder->second(args...); @@ -52,21 +51,22 @@ public: } #elif defined(SELECTIVE_BUILD_ANALYZER) - #define registerNodeIfRequired(Module, Name, key, Impl) registerImpl(key, OV_PP_TOSTRING(Name)) - #define createNodeIfRegistered(Module, key, ...) createImpl(key, __VA_ARGS__) +# define registerNodeIfRequired(Module, Name, key, Impl) \ + registerImpl(key, OV_PP_TOSTRING(Name)) +# define createNodeIfRegistered(Module, key, ...) createImpl(key, __VA_ARGS__) - template - void registerImpl(const Key & key, const char *typeName) { + template + void registerImpl(const Key& key, const char* typeName) { const std::string task_name = "REG$" + name + "$" + to_string(key) + "$" + typeName; openvino::itt::ScopedTask task(openvino::itt::handle(task_name)); builders[key] = [](Args... args) -> T { - Impl *impl = new Impl(args...); + Impl* impl = new Impl(args...); return static_cast(impl); }; } - template - T createImpl(const Key & key, Args... args) { + template + T createImpl(const Key& key, Args... args) { auto builder = builders.find(key); if (builder != builders.end()) { const std::string task_name = "CREATE$" + name + "$" + to_string(key); @@ -78,18 +78,18 @@ public: #else - #define registerNodeIfRequired(Module, Name, key, Impl) registerImpl(key) - #define createNodeIfRegistered(Module, key, ...) createImpl(key, __VA_ARGS__) +# define registerNodeIfRequired(Module, Name, key, Impl) registerImpl(key) +# define createNodeIfRegistered(Module, key, ...) createImpl(key, __VA_ARGS__) - template - void registerImpl(const Key & key) { + template + void registerImpl(const Key& key) { builders[key] = [](Args... args) -> T { - Impl *impl = new Impl(args...); + Impl* impl = new Impl(args...); return static_cast(impl); }; } - T createImpl(const Key & key, Args... args) { + T createImpl(const Key& key, Args... args) { auto builder = builders.find(key); if (builder != builders.end()) { return builder->second(args...); @@ -98,8 +98,8 @@ public: } #endif - template - void foreach(Fn fn) const { + template + void foreach (Fn fn) const { for (auto itm : builders) fn(itm); } @@ -109,18 +109,16 @@ public: } private: - const std::string & to_string(const std::string & str) const noexcept { + const std::string& to_string(const std::string& str) const noexcept { return str; } - template::value, bool>::type = true> + template ::value, bool>::type = true> std::string to_string(V val) const { return std::to_string(static_cast(val)); } - template::value, bool>::type = true> + template ::value, bool>::type = true> std::string to_string(V val) const { return std::to_string(val); } diff --git a/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp b/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp index dd3c23f9f5b..4305459b4c0 100644 --- a/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp +++ b/src/common/conditional_compilation/include/openvino/cc/pass/itt.hpp @@ -5,6 +5,7 @@ #pragma once #include + #include OV_CC_DOMAINS(ov_pass); @@ -16,31 +17,27 @@ OV_CC_DOMAINS(ov_pass); */ #if defined(SELECTIVE_BUILD_ANALYZER) -#define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ov_pass, OV_PP_CAT(region, _run_on_function)) -#define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) -#define RUN_ON_MODEL_SCOPE(region) OV_SCOPE(ov_pass, OV_PP_CAT(region, _run_on_model)) +# define RUN_ON_FUNCTION_SCOPE(region) OV_SCOPE(ov_pass, OV_PP_CAT(region, _run_on_function)) +# define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) +# define RUN_ON_MODEL_SCOPE(region) OV_SCOPE(ov_pass, OV_PP_CAT(region, _run_on_model)) #elif defined(SELECTIVE_BUILD) -#define MATCHER_SCOPE_(scope, region) \ - if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0) \ - throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) + \ - " is disabled!") +# define MATCHER_SCOPE_(scope, region) \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(scope, _, region)) == 0) \ + throw ngraph::ngraph_error(std::string(OV_PP_TOSTRING(OV_PP_CAT3(scope, _, region))) + " is disabled!") -#define MATCHER_SCOPE(region) \ - const std::string matcher_name(OV_PP_TOSTRING(region)); \ - if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ov_pass, _, region)) == 0) \ - return -#define RUN_ON_FUNCTION_SCOPE(region) \ - MATCHER_SCOPE_(ov_pass, OV_PP_CAT(region, _run_on_function)) +# define MATCHER_SCOPE(region) \ + const std::string matcher_name(OV_PP_TOSTRING(region)); \ + if (OV_CC_SCOPE_IS_ENABLED(OV_PP_CAT3(ov_pass, _, region)) == 0) \ + return +# define RUN_ON_FUNCTION_SCOPE(region) MATCHER_SCOPE_(ov_pass, OV_PP_CAT(region, _run_on_function)) -#define RUN_ON_MODEL_SCOPE(region) \ - MATCHER_SCOPE_(ov_pass, OV_PP_CAT(region, _run_on_model)) +# define RUN_ON_MODEL_SCOPE(region) MATCHER_SCOPE_(ov_pass, OV_PP_CAT(region, _run_on_model)) #else -#define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) -#define RUN_ON_FUNCTION_SCOPE(region) -#define RUN_ON_MODEL_SCOPE(region) +# define MATCHER_SCOPE(region) const std::string matcher_name(OV_PP_TOSTRING(region)) +# define RUN_ON_FUNCTION_SCOPE(region) +# define RUN_ON_MODEL_SCOPE(region) #endif - diff --git a/src/common/conditional_compilation/include/openvino/cc/selective_build.h b/src/common/conditional_compilation/include/openvino/cc/selective_build.h index 061068174f3..720c9ee1e39 100644 --- a/src/common/conditional_compilation/include/openvino/cc/selective_build.h +++ b/src/common/conditional_compilation/include/openvino/cc/selective_build.h @@ -55,19 +55,19 @@ * */ -#include #include +#include -#define OV_CC_EXPAND OV_PP_EXPAND -#define OV_CC_CAT OV_PP_CAT +#define OV_CC_EXPAND OV_PP_EXPAND +#define OV_CC_CAT OV_PP_CAT #define OV_CC_TOSTRING OV_PP_TOSTRING #ifdef SELECTIVE_BUILD_ANALYZER -# include +# include #endif -#include #include +#include namespace openvino { namespace cc { @@ -76,31 +76,29 @@ namespace cc { namespace internal { -template +template struct case_wrapper { using type = T; - const C value {}; + const C value{}; - case_wrapper(C && val) - : value(std::forward(val)) - {} + case_wrapper(C&& val) : value(std::forward(val)) {} }; -template -case_wrapper make_case_wrapper(C && val) { +template +case_wrapper make_case_wrapper(C&& val) { return case_wrapper(std::forward(val)); } -template class Fn, typename Ctx, typename T, typename Case> -bool match(Ctx&& ctx, T&& val, Case && cs) { +template