From 2f07b982517137031f530b6940baf7b4eb745c6b Mon Sep 17 00:00:00 2001 From: Indira Salyahova Date: Wed, 15 Dec 2021 12:12:54 +0300 Subject: [PATCH 01/27] [POT] Support layout in pot (#9060) * support layout pot * pylint --- .../pot/configs/simplified_mode_template.json | 1 + .../tools/pot/data_loaders/creator.py | 1 + .../tools/pot/data_loaders/image_loader.py | 29 ++++++++++++++++++- .../openvino/tools/pot/data_loaders/utils.py | 6 ++-- tools/pot/tests/test_image_loading.py | 26 +++++++++++++++++ 5 files changed, 59 insertions(+), 4 deletions(-) diff --git a/tools/pot/configs/simplified_mode_template.json b/tools/pot/configs/simplified_mode_template.json index 52db5686cd4..2df4aa1e898 100644 --- a/tools/pot/configs/simplified_mode_template.json +++ b/tools/pot/configs/simplified_mode_template.json @@ -17,6 +17,7 @@ of all possible parameters can be found in the default_quantization_spec.json */ "engine": { "type": "simplified", + "layout": "NCHW", // Layout of input data. Supported ["NCHW", "NHWC", "CHW", "CWH"] layout "data_source": "PATH_TO_SOURCE" // You can specify path to directory with images. Also you can // specify template for file names to filter images to load. // Templates are unix style (This option valid only in simplified mode) diff --git a/tools/pot/openvino/tools/pot/data_loaders/creator.py b/tools/pot/openvino/tools/pot/data_loaders/creator.py index 14e76e92f00..f4cd1e05fa9 100644 --- a/tools/pot/openvino/tools/pot/data_loaders/creator.py +++ b/tools/pot/openvino/tools/pot/data_loaders/creator.py @@ -26,6 +26,7 @@ def create_data_loader(config, model): if tuple(in_node.shape) != (1, 3): data_loader = ImageLoader(config) data_loader.shape = in_node.shape + data_loader.get_layout(in_node) return data_loader if data_loader is None: diff --git a/tools/pot/openvino/tools/pot/data_loaders/image_loader.py b/tools/pot/openvino/tools/pot/data_loaders/image_loader.py index 4ba603555e6..d81a5586d4c 100644 --- a/tools/pot/openvino/tools/pot/data_loaders/image_loader.py +++ b/tools/pot/openvino/tools/pot/data_loaders/image_loader.py @@ -3,6 +3,7 @@ from cv2 import imread, IMREAD_GRAYSCALE +from openvino.runtime import Layout, Dimension # pylint: disable=E0611,E0401 from ..api.data_loader import DataLoader from ..data_loaders.utils import prepare_image, collect_img_files @@ -14,6 +15,7 @@ class ImageLoader(DataLoader): self._img_files = collect_img_files(config.data_source) self._shape = None + self._layout = config.get('layout', None) self._crop_central_fraction = config.get('central_fraction', None) def __getitem__(self, idx): @@ -37,4 +39,29 @@ class ImageLoader(DataLoader): if image is None: raise Exception('Can not read the image: {}'.format(img_path)) - return prepare_image(image, self.shape[-2:], self._crop_central_fraction) + return prepare_image(image, self._layout, self.shape[-2:], self._crop_central_fraction) + + def get_layout(self, input_node): + if self._layout is not None: + if 'C' not in self._layout or 'H' not in self._layout or 'W' not in self._layout: + raise ValueError('Unexpected {} layout'.format(self._layout)) + self._layout = Layout(self._layout) + return + + layout_from_ir = input_node.graph.graph.get('layout', None) + if layout_from_ir is not None: + self._layout = Layout(layout_from_ir) + return + + image_colors_dim = (Dimension(3), Dimension(1)) + num_dims = len(self._shape) + if num_dims == 4: + if self._shape[1] in image_colors_dim: + self._layout = Layout("NCHW") + elif self._shape[3] in image_colors_dim: + self._layout = Layout("NHWC") + elif num_dims == 3: + if self._shape[0] in image_colors_dim: + self._layout = Layout("CHW") + elif self._shape[2] in image_colors_dim: + self._layout = Layout("HWC") diff --git a/tools/pot/openvino/tools/pot/data_loaders/utils.py b/tools/pot/openvino/tools/pot/data_loaders/utils.py index d60d5b4d1ff..fde14d66ba2 100644 --- a/tools/pot/openvino/tools/pot/data_loaders/utils.py +++ b/tools/pot/openvino/tools/pot/data_loaders/utils.py @@ -9,6 +9,7 @@ from pathlib import Path import numpy as np import cv2 as cv +from openvino.runtime import Layout # pylint: disable=E0611,E0401 from openvino.tools.pot.utils.logger import get_logger logger = get_logger(__name__) @@ -34,12 +35,11 @@ def crop(image, central_fraction): return image[start_height:start_height + dst_height, start_width:start_width + dst_width] -def prepare_image(image, dst_shape, central_fraction=None): - +def prepare_image(image, layout, dst_shape, central_fraction=None): if central_fraction: image = crop(image, central_fraction) - if image.shape[-1] in [3, 1]: + if layout == Layout('NCHW') or layout == Layout('CHW'): image = cv.resize(image, dst_shape[::-1]) return image.transpose(2, 0, 1) diff --git a/tools/pot/tests/test_image_loading.py b/tools/pot/tests/test_image_loading.py index 0836e3025ff..ff82d73c3d6 100644 --- a/tools/pot/tests/test_image_loading.py +++ b/tools/pot/tests/test_image_loading.py @@ -44,3 +44,29 @@ def test_check_image(tmp_path, models, model_name, model_framework): num_images_in_dir = len(os.listdir(path_image_data)) assert num_images_from_data_loader == num_images_in_dir + + +TEST_MODELS_LAYOUT = [('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (3, 224, 224)), + ('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (224, 224, 3)), + ('mobilenet-v2-pytorch', 'pytorch', None, (3, 224, 224)), + ('mobilenet-v1-1.0-224-tf', 'tf', None, (224, 224, 3))] + + +@pytest.mark.parametrize( + 'model_name, model_framework, layout, reference_shape', TEST_MODELS, + ids=['{}_{}'.format(m[0], m[1]) for m in TEST_MODELS]) +def test_check_layout(tmp_path, models, model_name, model_framework, layout, reference_shape): + test_dir = Path(__file__).parent + path_image_data = os.path.join(test_dir, "data/image_data") + + engine_config = Dict({"device": "CPU", + "type": "simplified", + "layout": layout, + "data_source": path_image_data}) + model = models.get(model_name, model_framework, tmp_path) + model = load_model(model.model_params) + + data_loader = create_data_loader(engine_config, model) + image = data_loader.item() + + assert image.shape == reference_shape From b492b591363fe5493b02656a86ab186c910b00ab Mon Sep 17 00:00:00 2001 From: Ilya Znamenskiy Date: Wed, 15 Dec 2021 12:17:13 +0300 Subject: [PATCH 02/27] [GPU] Fix for conv/deconv weights calculated in runtime (#8952) --- .../convolution/convolution_kernel_base.cpp | 5 +- .../clDNN/src/binary_convolution.cpp | 2 +- .../thirdparty/clDNN/src/convolution.cpp | 13 +- .../thirdparty/clDNN/src/deconvolution.cpp | 24 +- .../src/graph_optimizer/reorder_inputs.cpp | 33 +- .../thirdparty/clDNN/src/layout_optimizer.cpp | 20 +- .../thirdparty/clDNN/src/network.cpp | 18 +- .../thirdparty/clDNN/src/program.cpp | 14 +- .../thirdparty/clDNN/src/program_node.cpp | 3 +- .../tests/test_cases/convolution_gpu_test.cpp | 816 ++++++++---------- .../tests/test_cases/fusings_gpu_test.cpp | 8 +- .../clDNN/tests/test_cases/memory_test.cpp | 50 +- .../intel_gpu/src/plugin/ops/convolution.cpp | 10 +- src/plugins/intel_gpu/src/plugin/program.cpp | 2 +- 14 files changed, 474 insertions(+), 544 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp index e810a835807..c08ba2d292a 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp @@ -196,15 +196,16 @@ KernelsData ConvolutionKernelBase::GetCommonKernelsData(const Params& params, return {}; } + auto preferredWeightsLayout = GetPreferredWeightsLayout(newParams); bool succeed = UpdateWeightsParams(newParams, options, - GetPreferredWeightsLayout(newParams), + preferredWeightsLayout, kd.weightsReorderParams, GetSupportedKey(), newParams.groups, newParams.transposed); - bool bSupportedWeightsLayout = newParams.weights.GetLayout() == GetPreferredWeightsLayout(newParams); + bool bSupportedWeightsLayout = newParams.weights.GetLayout() == preferredWeightsLayout; const bool bWeightsOK = bSupportedWeightsLayout || options.allowStaticInputReordering; if (!succeed || !bWeightsOK) { diff --git a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp index d10ee575a93..acec60b0e5e 100644 --- a/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/binary_convolution.cpp @@ -125,7 +125,7 @@ binary_convolution_inst::typed_primitive_inst(network& network, binary_convoluti "Only one-dimensional batch size are supported"); CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", - (input_inst.size.feature[0] + pad.feature[0]) / split, + input_inst.size.feature[0], "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismatch"); diff --git a/inference-engine/thirdparty/clDNN/src/convolution.cpp b/inference-engine/thirdparty/clDNN/src/convolution.cpp index 0c478a0da6c..fe1de244f9c 100644 --- a/inference-engine/thirdparty/clDNN/src/convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/convolution.cpp @@ -97,7 +97,7 @@ layout convolution_inst::calc_output_layout(convolution_node const& node) { input_layout.format == format::image_2d_weights_winograd_6x3_s1_xfbyb) CLDNN_ERROR_MESSAGE( node.id(), - "Input for convolution should not be in windograd weights format - it is reserved for weights only"); + "Input for convolution should not be in winograd weights format - it is reserved for weights only"); if (input_layout.format == format::winograd_2x3_s1_data) { CLDNN_ERROR_NOT_EQUAL(node.id(), @@ -369,10 +369,19 @@ convolution_inst::typed_primitive_inst(network& network, convolution_node const& "Only one-dimensional batch size are supported"); CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", - (input_inst.size.feature[0] + pad.feature[0]) / split, + input_inst.size.feature[0], "input feature maps number", weights_ifm, "Weights/ifm mismatch"); + + if (!argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) { + CLDNN_ERROR_NOT_EQUAL(node.id(), + "Weights feature maps number", + input_inst.size.feature[0], + "input feature maps number", + weights_ifm, + "Weights/ifm mismatch"); + } } } } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp index c506c77d058..ab11ef233f4 100644 --- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp @@ -82,11 +82,11 @@ layout deconvolution_inst::calc_output_layout(deconvolution_node const& node) { int32_t off_factor = -2; size_t spatial_dims = cldnn::format::traits(input_layout.format).spatial_num; CLDNN_ERROR_GREATER_THAN(node.id(), - "number of spatial dimensions", - spatial_dims, - "expected number of dimensions", - 3, - "As for now, deconvolutions with more than 3 dimensions are not supported"); + "number of spatial dimensions", + spatial_dims, + "expected number of dimensions", + 3, + "As for now, deconvolutions with more than 3 dimensions are not supported"); int32_t x = off_factor * pad.spatial[0] + (input_layout.size.spatial[0] - 1) * strd.spatial[0] + filter_size.spatial[0]; int32_t y = 1; @@ -208,6 +208,7 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co 1, "Spatial[0] of bias should be 1. Bias isn't 1D vector."); } + CLDNN_ERROR_NOT_EQUAL(node.id(), "deconvolution padding filling value", node.get_output_layout().data_padding.filling_value(), @@ -240,10 +241,19 @@ deconvolution_inst::typed_primitive_inst(network& network, deconvolution_node co "Only one-dimensional features are supported"); CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", - (input_inst.size.feature[0] + pad.feature[0]) / split, + input_inst.size.feature[0], "input feature maps number", weights_ifm, - "Weights/ifm mimsmatch"); + "Weights/ifm mismatch"); + + if (!argument.grouped_weights_shape && !format::is_grouped(filter_inst.format)) { + CLDNN_ERROR_NOT_EQUAL(node.id(), + "Weights feature maps number", + input_inst.size.feature[0], + "input feature maps number", + weights_ifm, + "Weights/ifm mismatch"); + } } } } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp index f0869e06b2a..4e2b1892b60 100644 --- a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp +++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp @@ -536,7 +536,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) } }; - const auto reorder_input_deconvolution = [&p, &lo, &rf](typed_program_node& deconv_node) { + const auto reorder_input_and_weights_deconvolution = [&p, &lo, &rf](typed_program_node& deconv_node) { auto& input = deconv_node.input(); auto input_layout = input.get_output_layout(); auto new_format = lo.get_preferred_format(deconv_node); @@ -547,14 +547,41 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf) p.add_intermediate(reorder.first, deconv_node, 0, !reorder.second); } } + + auto& weights = deconv_node.weights(); + auto weights_layout = weights.get_output_layout(); + if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type() && !weights.is_constant()) { + auto dims = weights_layout.format.dimension(); + auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx; + auto reorder = rf.get_reorder(weights.id(), weights_layout, + layout{ weights_layout.data_type, preferred_format, weights_layout.size }); + if (reorder.first) { + p.add_intermediate(reorder.first, deconv_node, 1, !reorder.second); + } + } + }; + + const auto reorder_weights_convolution = [&p, &lo, &rf](typed_program_node& conv_node) { + auto& weights = conv_node.weights(); + auto weights_layout = weights.get_output_layout(); + if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type() && !weights.is_constant()) { + auto dims = weights_layout.format.dimension(); + auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx; + auto reorder = rf.get_reorder(weights.id(), weights_layout, + layout{ weights_layout.data_type, preferred_format, weights_layout.size }); + if (reorder.first) { + p.add_intermediate(reorder.first, conv_node, 1, !reorder.second); + } + } }; for (auto& prim : p.get_processing_order()) { - program_helpers::do_for_types( + program_helpers::do_for_types( *prim, reorder_input_detection_output, reorder_input_binary_convolution, - reorder_input_deconvolution); + reorder_input_and_weights_deconvolution, + reorder_weights_convolution); } for (auto n : p.get_processing_order()) { diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index f2c3ae9c84d..6156ef8e8eb 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -1320,17 +1320,27 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format impl_candidate = impl_types::ocl; } + size_t eltw_dep = 0; for (auto& fo : node.get_fused_primitives()) { if (fo.node->is_type()) { auto in_layout = node.get_dependency(fo.dep_start_idx).get_output_layout(); auto out_layout = node.get_output_layout(); auto in_dt = in_layout.data_type; auto out_dt = out_layout.data_type; - if ((out_layout.count() == in_layout.count()) && - (data_type_traits::is_floating_point(in_dt) || data_type_traits::is_floating_point(out_dt)) && in_dt != out_dt && - fo.node->as().get_primitive()->needs_onednn_sum_post_op(in_layout)) { - impl_candidate = impl_types::ocl; - break; + if (fo.node->as().get_primitive()->needs_onednn_sum_post_op(in_layout)) { + if ((out_layout.count() == in_layout.count()) && + (data_type_traits::is_floating_point(in_dt) || data_type_traits::is_floating_point(out_dt)) && in_dt != out_dt) { + impl_candidate = impl_types::ocl; + break; + } + if (in_layout.size == out_layout.size && in_layout.format == out_layout.format && in_layout.data_padding == out_layout.data_padding && + data_type_traits::size_of(in_dt) == data_type_traits::size_of(out_dt)) { + if (eltw_dep > 0) { + impl_candidate = impl_types::ocl; + break; + } + eltw_dep = fo.dep_start_idx; + } } } else if (fo.node->is_type()) { // Some activations aren't implemented in oneDNN diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp index 98b3a3160a3..febc8d61bce 100644 --- a/inference-engine/thirdparty/clDNN/src/network.cpp +++ b/inference-engine/thirdparty/clDNN/src/network.cpp @@ -514,15 +514,17 @@ void network::allocate_primitives() { can_reuse_eltwise_mem = true; } - if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) { - auto& eltw_inst = _primitives.at(eltw_in.id()); - auto& prim_inst = _primitives.at(node->id()); - auto eltw_mem_type = eltw_inst->output_memory().get_allocation_type(); - auto prim_mem_type = prim_inst->output_memory().get_allocation_type(); + if (!can_reuse_eltwise_mem) { + if (_primitives.find(eltw_in.id()) != _primitives.end() && _primitives.find(node->id()) != _primitives.end()) { + auto& eltw_inst = _primitives.at(eltw_in.id()); + auto& prim_inst = _primitives.at(node->id()); + auto eltw_mem_type = eltw_inst->output_memory().get_allocation_type(); + auto prim_mem_type = prim_inst->output_memory().get_allocation_type(); - // Keep lockable memory type for `prim_inst` output if needed - if (eltw_mem_type != prim_mem_type && eltw_mem_type != allocation_type::cl_mem && eltw_mem_type != allocation_type::usm_host) - can_reuse_eltwise_mem = false; + // Keep lockable memory type for `prim_inst` output if needed + if (eltw_mem_type != prim_mem_type && eltw_mem_type != allocation_type::cl_mem && eltw_mem_type != allocation_type::usm_host) + can_reuse_eltwise_mem = false; + } } if (fused_op.node->as().get_primitive()->needs_onednn_sum_post_op(eltw_in_layout) && !can_reuse_eltwise_mem) { diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index 9d60731d1e2..ebdc3f3920e 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -741,10 +741,10 @@ program_node& program::get_or_create(std::shared_ptr prim) { } void program::add_intermediate(program_node& node, - program_node& next, - size_t prev_idx, - bool connect_int_node_with_old_dep, - bool move_usrs_of_prev_to_node) { + program_node& next, + size_t prev_idx, + bool connect_int_node_with_old_dep, + bool move_usrs_of_prev_to_node) { if (connect_int_node_with_old_dep && !node.dependencies.empty()) throw std::invalid_argument( "Node which is about to be added in between two other nodes should not have any existing dependencies"); @@ -1112,8 +1112,8 @@ void program::remove_nodes(std::vector& to_remove) { // TODO: break this function into number of smaller ones + add per-primitive fields (possibly use // primitive_inst::to_string?) void program::dump_program(const char* stage, - bool with_full_info, - std::function const& filter) const { + bool with_full_info, + std::function const& filter) const { std::string path = get_dir_path(options); if (path.empty() || !with_full_info) { return; @@ -1230,7 +1230,7 @@ void program::save_pass_info(std::string pass_name) { } void program::add_optimized_primitive_info(primitive_id optimized_primitive_id, - std::vector replaced_with_ids) { + std::vector replaced_with_ids) { for (auto& e : optimized) { auto it = std::find_if(e.second.begin(), e.second.end(), [&optimized_primitive_id](const primitive_id& id) { return optimized_primitive_id == id; diff --git a/inference-engine/thirdparty/clDNN/src/program_node.cpp b/inference-engine/thirdparty/clDNN/src/program_node.cpp index 30aece05a5b..acc2b143bfd 100644 --- a/inference-engine/thirdparty/clDNN/src/program_node.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_node.cpp @@ -428,7 +428,8 @@ dnnl::post_ops program_node::try_optimize_post_ops(dnnl::post_ops& p_ops, const // Ignore optimized operations for "previous" operation in our operation pair while (type_is_any_optimized(prev_type) && cur_post_op_idx < post_ops_size - 1) { prev_post_op_idx++; - cur_post_op_idx++; + if (prev_post_op_idx == cur_post_op_idx) + cur_post_op_idx++; prev_type = cur_post_ops[prev_post_op_idx].op_type; cur_type = cur_post_ops[cur_post_op_idx].op_type; } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp index 607164c7799..6c224fd0e67 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp @@ -344,13 +344,13 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1_ data("biases", biases), convolution( "conv", - {"input", "trans"}, + { "input", "trans" }, { "weights" }, { "biases" }, 1, 1, { 1, 1, 1, 1 }, - tensor{{ 0, 0, 1, 1 }, 0}, + tensor{ { 0, 0, 1, 1 }, 0 }, { 1, 1, 1, 1 }, { 1, 4, 4, 4 }) ); @@ -475,13 +475,13 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution_def_group1) data("biases", biases), convolution( "conv", - {"input", "trans"}, + { "input", "trans" }, { "weights" }, { "biases" }, 1, 1, { 1, 1, 1, 1 }, - tensor{{ 0, 0, 2, 2 }, 0}, + tensor{ { 0, 0, 2, 2 }, 0 }, { 1, 1, 2, 2 }, { 1, 4, 4, 4 }) ); @@ -638,13 +638,13 @@ TEST(deformable_convolution_f32_fw_gpu, basic_deformable_convolution) { data("biases", biases), convolution( "conv", - {"input", "trans"}, + { "input", "trans" }, { "weights" }, { "biases" }, 1, 2, { 1, 1, 1, 1 }, - tensor{{ 0, 0, 2, 2 }, 0}, + tensor{ { 0, 0, 2, 2 }, 0 }, { 1, 1, 2, 2 }, { 1, 4, 4, 4 }) ); @@ -698,8 +698,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32,format::yxfb,{ 1, 1, 5, 4 } }); - auto weights = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 4.0f, 6.0f, 3.0f, 3.0f, 3.0f, 5.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f }); @@ -710,7 +710,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) { topology topology( input_layout("input", input->get_layout()), data("weights", weights), - convolution("conv", "input", { "weights" }, { 1,1,1,2 })); + convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 })); network network(engine, topology); network.set_input_data("input", input); @@ -769,8 +769,8 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = engine.allocate_memory({ data_types::i8,format::bfyx,{ 1, 1, 3, 2 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 3, 2 } }); set_values(input, { 1.1f, 2.4f, 3.5f, 4.5f, 5.8f, 2.9f, 2.3f, 3.5f, 4.4f, 6.6f, @@ -784,10 +784,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) { topology topology( input_layout("input", input->get_layout()), - reorder("to_int","input", { data_types::i8,format::bfyx,{ 1, 1, 5, 4 } }), + reorder("to_int","input", { data_types::i8, format::bfyx, { 1, 1, 5, 4 } }), data("weights", weights), - convolution("conv", "to_int", { "weights" }, { 1,1,1,2 }), - reorder("output", "conv", { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } })); + convolution("conv", "to_int", { "weights" }, { 1, 1, 1, 2 }), + reorder("output", "conv", { data_types::f32, format::bfyx, { 1, 1, 3, 2 } })); network network(engine, topology); network.set_input_data("input", input); @@ -839,7 +839,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_no_bias) { topology topology( input_layout("input", input->get_layout()), data("weights", weights), - convolution("conv", "input", { "weights" }, { 1,1,1,2 })); + convolution("conv", "input", { "weights" }, { 1, 1, 1, 2 })); network network(engine, topology); network.set_input_data("input", input); @@ -921,9 +921,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 4, 4, 4 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 1, 2, 2, 2 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 4, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 2, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1, 1 } }); set_values(input, { 1.0f, 0.0f, 1.0f, 0.0f, @@ -1009,10 +1009,10 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D) { } } -TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { +TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { // data is similar as in basic_convolution3D auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 2, 4, 4, 4 } }); auto weights_1 = engine.allocate_memory({ data_types::f32, format::goizyx, tensor(cldnn::group(2), cldnn::batch(1), cldnn::feature(1), cldnn::spatial(2, 2, 2))}); auto biases_1 = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(feature(2)) }); @@ -1078,7 +1078,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { }, { { 3.0f, 4.0f, 6.0f }, - { 6.0f, 5.0f, 10.0f}, + { 6.0f, 5.0f, 10.0f }, { 9.0f, 4.0f, 1.0f } }, }, @@ -1105,7 +1105,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { input_layout("input", input->get_layout()), data("weights_1", weights_1), data("biases_1", biases_1), - convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{1, 2, 3, 3, 3}, data_types::f32, true)); + convolution("conv", "input", { "weights_1" }, { "biases_1" }, 2, tensor(1), tensor(0), tensor(1), tensor{ 1, 2, 3, 3, 3 }, data_types::f32, true)); network network(engine, topology); network.set_input_data("input", input); @@ -1141,138 +1141,6 @@ TEST(convolution_f32_fw_gpu, basic_convolution3D_split2) { } } -TEST(convolution_f32_fw_gpu, basic_convolution3D_group2) { - // data is similar as in basic_convolution3D_split2 - auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 1, 2, 4, 4, 4 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfzyx,{ 2, 1, 2, 2, 2 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1, 1 } }); - - set_values(input, { - 1.0f, 0.0f, 1.0f, 0.0f, - 1.0f, 1.0f, 3.0f, 1.0f, - 1.0f, 1.0f, 0.0f, 2.0f, - 0.0f, 2.0f, 1.0f, 1.0f, - 1.0f, 0.0f, 0.0f, 1.0f, - 2.0f, 0.0f, 1.0f, 2.0f, - 3.0f, 1.0f, 1.0f, 1.0f, - 0.0f, 0.0f, 3.0f, 1.0f, - 2.0f, 0.0f, 1.0f, 1.0f, - 3.0f, 3.0f, 1.0f, 0.0f, - 2.0f, 1.0f, 1.0f, 0.0f, - 3.0f, 2.0f, 1.0f, 2.0f, - 1.0f, 0.0f, 2.0f, 0.0f, - 1.0f, 0.0f, 3.0f, 3.0f, - 3.0f, 1.0f, 0.0f, 0.0f, - 1.0f, 1.0f, 0.0f, 2.0f, - 1.0f, 0.0f, 1.0f, 0.0f, - 1.0f, 1.0f, 3.0f, 1.0f, - 1.0f, 1.0f, 0.0f, 2.0f, - 0.0f, 2.0f, 1.0f, 1.0f, - 1.0f, 0.0f, 0.0f, 1.0f, - 2.0f, 0.0f, 1.0f, 2.0f, - 3.0f, 1.0f, 1.0f, 1.0f, - 0.0f, 0.0f, 3.0f, 1.0f, - 2.0f, 0.0f, 1.0f, 1.0f, - 3.0f, 3.0f, 1.0f, 0.0f, - 2.0f, 1.0f, 1.0f, 0.0f, - 3.0f, 2.0f, 1.0f, 2.0f, - 1.0f, 0.0f, 2.0f, 0.0f, - 1.0f, 0.0f, 3.0f, 3.0f, - 3.0f, 1.0f, 0.0f, 0.0f, - 1.0f, 1.0f, 0.0f, 2.0f, - }); - - set_values(weights, { - 0.0f, 1.0f, - 0.0f, 0.0f, - 2.0f, 1.0f, - 0.0f, 0.0f, - 0.0f, 1.0f, - 0.0f, 0.0f, - 2.0f, 1.0f, - 0.0f, 0.0f, - }); - - set_values(biases, { 1.0f, 2.0f }); - - VVVVF output_vec = { - { - { - { 3.0f, 2.0f, 2.0f }, - { 6.0f, 5.0f, 6.0f }, - { 9.0f, 4.0f, 6.0f } - }, - { - { 5.0f, 2.0f, 5.0f }, - { 10.0f, 9.0f, 5.0f }, - { 7.0f, 5.0f, 4.0f } - }, - { - { 3.0f, 4.0f, 6.0f }, - { 6.0f, 5.0f, 10.0f }, - { 9.0f, 4.0f, 1.0f } - }, - }, - { - { - { 4.0f, 3.0f, 3.0f }, - { 7.0f, 6.0f, 7.0f }, - { 10.0f, 5.0f, 7.0f } - }, - { - { 6.0f, 3.0f, 6.0f }, - { 11.0f, 10.0f, 6.0f }, - { 8.0f, 6.0f, 5.0f } - }, - { - { 4.0f, 5.0f, 7.0f }, - { 7.0f, 6.0f, 11.0f }, - { 10.0f, 5.0f, 2.0f } - }, - } - }; - - topology topology( - input_layout("input", input->get_layout()), - data("weights", weights), - data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" })); - - network network(engine, topology); - network.set_input_data("input", input); - - auto outputs = network.execute(); - EXPECT_EQ(outputs.size(), size_t(1)); - EXPECT_EQ(outputs.begin()->first, "conv"); - - auto output_memory = outputs.at("conv").get_memory(); - auto output_layout = output_memory->get_layout(); - cldnn::mem_lock output_ptr(output_memory, get_test_stream()); - - int z_size = output_layout.size.spatial[2]; - int y_size = output_layout.size.spatial[1]; - int x_size = output_layout.size.spatial[0]; - int f_size = output_layout.size.feature[0]; - int b_size = output_layout.size.batch[0]; - EXPECT_EQ(output_layout.format, format::bfzyx); - EXPECT_EQ(b_size, 1); - EXPECT_EQ(f_size, 2); - EXPECT_EQ(z_size, 3); - EXPECT_EQ(y_size, 3); - EXPECT_EQ(x_size, 3); - for (int f = 0; f < f_size; ++f) { - for (int z = 0; z < z_size; ++z) { - for (int y = 0; y < y_size; ++y) { - for (int x = 0; x < x_size; ++x) { - EXPECT_EQ(output_vec[f][z][y][x], - output_ptr[f * z_size * y_size * x_size + z * y_size * x_size + y * x_size + x]); - } - } - } - } -} - TEST(convolution_f32_fw_gpu, with_output_size_same_input) { auto& engine = get_test_engine(); @@ -1284,8 +1152,8 @@ TEST(convolution_f32_fw_gpu, with_output_size_same_input) { input_layout("input", input->get_layout()), data("weights", weights), data("weights2", weights2), - convolution::create_with_output_size("conv1", "input", { "weights" }, {1, 64, 160, 160}, {1, 1, 2, 2}, {0, 0, -3, -3}), - convolution::create_with_output_size("conv2", "input", { "weights2" }, {1, 64, 320, 320}, {1, 1, 1, 1}, {0, 0, -3, -3}) + convolution::create_with_output_size("conv1", "input", { "weights" }, { 1, 64, 160, 160 }, { 1, 1, 2, 2 }, { 0, 0, -3, -3 }), + convolution::create_with_output_size("conv2", "input", { "weights2" }, { 1, 64, 320, 320 }, { 1, 1, 1, 1 }, { 0, 0, -3, -3 }) ); network network(engine, topology); @@ -1315,8 +1183,8 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfyx, {1,2,2,2} }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2,2,1,1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 2, 2 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); set_values(input, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1398,7 +1266,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution( "conv", "input", { "weights" }, { "biases" }, { 0,0,1,2 })); + convolution( "conv", "input", { "weights" }, { "biases" }, { 0, 0, 1, 2 })); network network(engine, topology); network.set_input_data("input", input); @@ -1462,7 +1330,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) { , { "biases" } , - { 0,0,1,2 } + { 0, 0, 1, 2 } )); cldnn::build_options options; options.set_option(cldnn::build_option::optimize_data(true)); @@ -1552,11 +1420,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) { "input", { "weights" }, { "biases" }, - { 1,1,1,1 }, - tensor{{ 0,0,1,2 }, 0}, + { 1, 1, 1, 1 }, + tensor{ { 0, 0, 1, 2 }, 0 }, { 1, 1, 1, 1 }, "", - padding{ { 0,0,0,0 }, 0 }) + padding{ { 0, 0, 0, 0 }, 0 }) ); network network(engine, topology); @@ -1629,9 +1497,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1653,13 +1521,13 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) { "input", { "weights" }, { "biases" }, - { 1,1,1,1 }, - { 0,0,0,0 }, { 1, 1, 1, 1 }, - { 0,0,1,2 }, - { 0,0,1,2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }, + { 0, 0, 1, 2 }, + { 0, 0, 1, 2 }, "", - padding{ { 0,0,0,0 }, 0 }) + padding{ { 0, 0, 0, 0 }, 0 }) ); network network(engine, topology); @@ -1726,9 +1594,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1749,15 +1617,15 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) { convolution( "conv", "input", - {"weights"}, - {"biases"}, - {1, 1, 1, 1}, - {0, 0, 0, 0}, - {1, 1, 1, 1}, - tensor{{0, 0, 1, 2}, 0}, - tensor{{0, 0, 2, 3}, 0}, + { "weights" }, + { "biases" }, + { 1, 1, 1, 1 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }, + tensor{ { 0, 0, 1, 2 }, 0 }, + tensor{ { 0, 0, 2, 3 }, 0 }, "", - padding{{0, 0, 0, 0}, 0})); + padding{ { 0, 0, 0, 0 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -1830,9 +1698,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_pad) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1858,13 +1726,13 @@ TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_pad) { "input", { "weights" }, { "biases" }, - { 1,1,1,1 }, - { 0,0,1,2 }, { 1, 1, 1, 1 }, - { 0,0,1,2 }, - { 0,0,1,2 }, + { 0, 0, 1, 2 }, + { 1, 1, 1, 1 }, + { 0, 0, 1, 2 }, + { 0, 0, 1, 2 }, "", - padding{ { 0,0,0,0 }, 0 }) + padding{ { 0, 0, 0, 0 }, 0 }) ); network network(engine, topology); @@ -1940,9 +1808,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_pad) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 1, 4, 3 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 2, 2 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 1, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 3 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f }); set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f }); @@ -1967,15 +1835,15 @@ TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_pad) { convolution( "conv", "input", - {"weights"}, - {"biases"}, - {1, 1, 1, 1}, - tensor{{0, 0, 1, 2}, 0}, - {1, 1, 1, 1}, - tensor{{0, 0, 1, 2}, 0}, - tensor{{0, 0, 2, 3}, 0}, + { "weights" }, + { "biases" }, + { 1, 1, 1, 1 }, + tensor{ { 0, 0, 1, 2 }, 0 }, + { 1, 1, 1, 1 }, + tensor{ { 0, 0, 1, 2 }, 0 }, + tensor{ { 0, 0, 2, 3 }, 0 }, "", - padding{{0, 0, 0, 0}, 0})); + padding{ { 0, 0, 0, 0 }, 0 })); network network(engine, topology); network.set_input_data("input", input); @@ -2069,11 +1937,11 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) { "input", { "weights" }, { "biases" }, - { 1,1,1,1 }, - tensor{{ 0,0,1,2 }, 0}, + { 1, 1, 1, 1 }, + tensor{ { 0, 0, 1, 2 }, 0 }, { 1, 1, 1, 1 }, "", - padding{ { 0,0,-x_pad,-y_pad }, 0 }) + padding{ { 0, 0, -x_pad, -y_pad }, 0 }) ); network network(engine, topology); @@ -2156,7 +2024,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -2168,7 +2036,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", {"weights"}, {"biases"}, {1,1,2,2}) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }) ); network network(engine, topology); @@ -2226,7 +2094,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 2, { 1, 1 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -2238,7 +2106,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }) ); network network(engine, topology); @@ -2284,7 +2152,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -2296,7 +2164,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }) ); network network(engine, topology); @@ -2338,7 +2206,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 1, 2, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 2, { 1, 1 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -2350,7 +2218,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 } ) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 } ) ); network network(engine, topology); @@ -2390,7 +2258,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 1, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 2 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); @@ -2402,7 +2270,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 }) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 5, 5 }) ); network network(engine, topology); @@ -2449,7 +2317,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 1, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 3 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); @@ -2461,7 +2329,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,5,5 }) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 5, 5 }) ); network network(engine, topology); @@ -2505,7 +2373,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 3 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); @@ -2517,7 +2385,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }) ); network network(engine, topology); @@ -2561,7 +2429,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 3 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -2573,7 +2441,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, { 1,1,2,2 }) + convolution("conv", "input", { "weights" }, { "biases" }, { 1, 1, 2, 2 }) ); network network(engine, topology); @@ -2618,7 +2486,7 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 2, 2 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 3 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -2635,11 +2503,11 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) { "input", { "weights" }, { "biases" }, - { 1,1,2,2 }, - tensor{{ 0,0,1,1 }, 0}, + { 1, 1, 2, 2 }, + tensor{ { 0, 0, 1, 1 }, 0 }, { 1, 1, 1, 1 }, "", - padding{ { 0,0,1,1 }, 0 }) + padding{ { 0, 0, 1, 1 }, 0 }) ); network network(engine, topology); @@ -2694,8 +2562,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 2 } }); - auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2))}); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 2 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2))}); auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { @@ -2717,9 +2585,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) { { "weights1" }, { "biases1" }, 2, - { 0,0,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 0, 0, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -2792,8 +2660,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 4, 4 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 2, 2 }, 2 } }); - auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2,2)) }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 2, { 2, 2 }, 2 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { @@ -2815,9 +2683,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) { { "weights1" }, { "biases1" }, 2, - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -2853,9 +2721,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) { // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2 auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 4, 4 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f, @@ -2879,9 +2747,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) { { "weights" }, { "biases" }, 2, // number of groups - { 0,0,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 0, 0, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -2910,9 +2778,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 1, 2, 4, 4 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { -0.5f, 0.5f, 1.0f, 1.5f, 0.5f, 2.3f, 2.0f, -0.4f, @@ -2928,7 +2796,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) topology topology( input_layout("input", input->get_layout()), - reorder("input_1", "input", { data_types::f32,format::bfyx,{ 1, 2, 4, 4 } }), + reorder("input_1", "input", { data_types::f32, format::bfyx, { 1, 2, 4, 4 } }), data("weights", weights), data("biases", biases), convolution( @@ -2937,9 +2805,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) { "weights" }, { "biases" }, 2, // number of groups - { 0,0,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 0, 0, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -2968,9 +2836,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 2, 4, 4 } }); - auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(2), batch(1), feature(1), spatial(2,2)) }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 2, 4, 4 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(2), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, 1.0f, 1.0f, 1.5f, 1.5f, 0.5f, 0.5f, 2.3f, 2.3f, 2.0f, 2.0f, -0.4f, -0.4f, @@ -2994,9 +2862,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) { { "weights" }, { "biases" }, 2, // number of groups - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3034,7 +2902,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 16, 4, 4 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, @@ -3055,8 +2923,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, -0.5f, -0.5f, 3.0f, 3.0f, }); - auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } }); set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, @@ -3089,9 +2957,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw { weights_id }, { bias_id }, 16, // number of groups - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3123,7 +2991,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1 auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 16, 4, 4 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, @@ -3144,8 +3012,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, }); - auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights1 = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) }); + auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } }); set_values(weights1, { -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, -2.0f, 0.5f, 3.5f, 1.5f, -1.2f, 1.5f, 0.5f, -0.5f, @@ -3178,9 +3046,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw { weights_id }, { bias_id }, 16, // number of groups - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3217,7 +3085,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::yxfb,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 2, 16, 4, 4 } }); set_values(input, { -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, -0.5f, -0.5f, 0.5f, 0.5f, @@ -3240,8 +3108,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { topology topology(input_layout("input", input->get_layout())); - auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } }); set_values(weights, { @@ -3263,7 +3131,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { -1.2f, 1.5f, 0.5f, -0.5f } ); - set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f}); + set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f }); topology.add( data("weights", weights), @@ -3277,9 +3145,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) { { "weights" }, { "bias" }, 16, - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3311,7 +3179,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) // data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 2, 16, 4, 4 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 16, 4, 4 } }); set_values(input, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, @@ -3334,8 +3202,8 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) topology topology(input_layout("input", input->get_layout())); - auto weights = engine.allocate_memory({ data_types::f32, format::goiyx ,tensor(group(16), batch(1), feature(1), spatial(2,2)) }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 16, 1, 1 } }); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, tensor(group(16), batch(1), feature(1), spatial(2, 2)) }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 16, 1, 1 } }); set_values(weights, { @@ -3358,7 +3226,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) } ); - set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f}); + set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f }); topology.add( data("weights", weights), @@ -3372,9 +3240,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) { "weights" }, { "bias" }, 16, - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3442,7 +3310,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 4, 1, 1 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 4 } }); auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); @@ -3467,9 +3335,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) "input", { "weights1", "weights2" }, { "biases1", "biases2" }, - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3523,7 +3391,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 2, 1, 1 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 4 } }); auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 2, 1, 1, 1 } }); @@ -3548,9 +3416,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) { "input", { "weights1", "weights2" }, { "biases1", "biases2" }, - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3610,7 +3478,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 4, 1, 1 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 1, 1 }, 6 } }); auto weights1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); auto biases1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 3, 1, 1 } }); auto weights2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 3, 2, 1, 1 } }); @@ -3635,9 +3503,9 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no "input", { "weights1", "weights2" }, { "biases1", "biases2" }, - { 1,1,2,2 }, - { 0,0,0,0 }, - { 1,1,1,1 }) + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, + { 1, 1, 1, 1 }) ); network network(engine, topology); @@ -3687,7 +3555,7 @@ TEST(convolution_gpu, trivial_convolution_relu) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -3709,8 +3577,8 @@ TEST(convolution_gpu, trivial_convolution_relu) { "input", { "weights" }, { "biases" }, - { 1,1,2,2 }, - { 0,0,0,0 }, + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, { 1, 1, 1, 1 }), activation( "out", @@ -3764,7 +3632,7 @@ TEST(convolution_gpu, relu_with_negative_slope) { auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::yxfb, { 1, 1, 4, 4 } }); - //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } }); + //auto output = memory::allocate({ memory::format::yxfb_f32, { 1, { 2, 2 }, 1 } }); auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 2, 2 } }); auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); @@ -3786,14 +3654,14 @@ TEST(convolution_gpu, relu_with_negative_slope) { "input", { "weights" }, { "biases" }, - { 1,1,2,2 }, - { 0,0,0,0 }, + { 1, 1, 2, 2 }, + { 0, 0, 0, 0 }, { 1, 1, 1, 1 }), activation( "out", "conv", activation_func::relu_negative_slope, - {0.1f, 0.0f} + { 0.1f, 0.0f } ) ); @@ -3820,9 +3688,9 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) { extern const std::vector conv_1x1_output; - auto input = engine.allocate_memory({ data_types::f32, format::bfyx,{ 16, 8, 16, 16 } }); - auto weights_conv_1 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 8, 8, 1, 1 } }); - auto weights_conv_2 = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 8, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 16, 8, 16, 16 } }); + auto weights_conv_1 = engine.allocate_memory({ data_types::f32, format::bfyx, { 8, 8, 1, 1 } }); + auto weights_conv_2 = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 8, 1, 1 } }); set_random_values(input); set_random_values(weights_conv_1); @@ -3909,9 +3777,9 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) auto input = engine.allocate_memory({ data_types::f32, input_format, input_size }); auto weights_size = tensor( output_feature_count, input_feature_count, weights_x, weights_y ); auto weights = engine.allocate_memory({ data_types::f32, weights_format, weights_size }); - auto biases = engine.allocate_memory({ data_types::f32, biases_format, {1,output_feature_count,1,1}}); + auto biases = engine.allocate_memory({ data_types::f32, biases_format, { 1, output_feature_count, 1, 1 } }); - //auto output = memory::allocate({output_format, {batch_size, {output_x, output_y}, output_feature_count}}); + //auto output = memory::allocate({ output_format, { batch_size, { output_x, output_y }, output_feature_count } }); // input: std::vector input_vals_template { @@ -4014,8 +3882,8 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) "input", { "weights" }, { "biases" }, - { 1,1,stride_x,stride_y }, - { 0,0,0,0 }, + { 1, 1, stride_x, stride_y }, + { 0, 0, 0, 0 }, { 1, 1, 1, 1 }), activation( "out", @@ -4060,7 +3928,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32) } void add_primitives(engine& engine, topology& topology) { - auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 2 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 2 } }); std::vector weights_values = { 1, 2, 1, 2, 1, 2, @@ -4068,7 +3936,7 @@ void add_primitives(engine& engine, topology& topology) { 19, 17, -1, -10, 32, 23 }; set_values(weights, weights_values); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(biases, { 1.0f, -8.0f }); topology.add( @@ -4108,7 +3976,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) { // Bias: // 1 -8 auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::i8, format::bfyx,{ 1, 1, 5, 4 } }); + auto input = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 5, 4 } }); VVVF output_vec = { { @@ -4153,7 +4021,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) { for (int f = 0; f < f_size; f++) for (int y = 0; y < y_size; ++y) { for (int x = 0; x < x_size; ++x) { - EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 3.0f); + EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 3.0f); } } } @@ -4161,9 +4029,9 @@ TEST(convolution_f32_fw_gpu, byte_activation) { TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { auto& engine = get_test_engine(); - auto input = engine.allocate_memory({ data_types::u8, format::bfyx,{ 1, 1, 5, 4 } }); - auto weights = engine.allocate_memory({ data_types::i8, format::bfyx,{ 2, 1, 3, 3 } }); - auto biases = engine.allocate_memory({ data_types::f32, format::bfyx,{ 1, 2, 1, 1 } }); + auto input = engine.allocate_memory({ data_types::u8, format::bfyx, { 1, 1, 5, 4 } }); + auto weights = engine.allocate_memory({ data_types::i8, format::bfyx, { 2, 1, 3, 3 } }); + auto biases = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 1, 1 } }); set_values(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, @@ -4192,7 +4060,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { input_layout("input", input->get_layout()), data("weights", weights), data("biases", biases), - convolution("conv", "input", { "weights" }, { "biases" }, tensor{ 0, 0, 2, 2 }, tensor(0), tensor{1, 1, 1, 1}, tensor{1, 2, 3, 2}), + convolution("conv", "input", { "weights" }, { "biases" }, tensor{ 0, 0, 2, 2 }, tensor(0), tensor{ 1, 1, 1, 1 }, tensor{ 1, 2, 3, 2 }), reorder("out", "conv", format::bfyx, data_types::f32)); build_options opts; @@ -4219,7 +4087,7 @@ TEST(convolution_int8_fw_gpu, quantized_convolution_u8s8f32_symmetric) { for (int f = 0; f < f_size; f++) for (int y = 0; y < y_size; ++y) { for (int x = 0; x < x_size; ++x) { - EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f*y_size*x_size + y * x_size + x]), 1e-5f) << + EXPECT_NEAR(output_vec[f][y][x], ((float)output_ptr[f * y_size * x_size + y * x_size + x]), 1e-5f) << " x="<get_layout()), - reorder("cvt_input", "input", {data_types::f16, input_format, input_size}), + reorder("cvt_input", "input", { data_types::f16, input_format, input_size }), data("weights", weights), - reorder("cvt_weights", "weights", {data_types::f16, weights_format, weights_size}), + reorder("cvt_weights", "weights", { data_types::f16, weights_format, weights_size }), data("biases", biases), - reorder("cvt_biases", "biases", {data_types::f16, biases_format, biases_size}), + reorder("cvt_biases", "biases", { data_types::f16, biases_format, biases_size }), convolution( "conv", "cvt_input", { "cvt_weights" }, { "cvt_biases" }, - { 1,1,stride_x,stride_y }), - reorder("output", "conv", {data_types::f32, output_format, output_size}) + { 1, 1, stride_x, stride_y }), + reorder("output", "conv", { data_types::f32, output_format, output_size }) ); network network(engine, topology); @@ -5092,7 +4960,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) topology.add(data("biases_fsv", biases_mem)); auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, { "biases_fsv" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -5115,7 +4983,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) } auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -5124,7 +4992,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32, fs_byx_fsv32) build_options options; implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); options.set_option(build_option::optimize_data(true)); network network(engine, topology, options); @@ -5194,7 +5062,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { data("weights_fsv", weights_mem)); // add input padding by X and Y - layout w_pad(data_types::f16, format::bfyx, input_size, padding({ 0,0,1,1 }, { 0, 0, 0, 0 })); + layout w_pad(data_types::f16, format::bfyx, input_size, padding({ 0, 0, 1, 1 }, { 0, 0, 0, 0 })); topology.add(reorder("input_fsv", "input", w_pad)); // Generate bias data @@ -5225,7 +5093,7 @@ TEST(convolution_f16_fsv_gpu, convolution_f16_fsv_gpu_padding) { build_options options; implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; - options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); options.set_option(build_option::optimize_data(true)); network network(engine, topology, options); @@ -5383,7 +5251,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) topology.add(data("biases_fsv", biases_mem)); auto conv_fsv = convolution("conv_fsv", "right_crop", { "weights_fsv" }, { "biases_fsv" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); } @@ -5405,7 +5273,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) } auto conv_fsv = convolution("conv_fsv", "right_crop", { "weights_fsv" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); } @@ -5432,7 +5300,7 @@ TEST_P(convolution_gpu_fs_byx_fsv32_crop, fs_byx_fsv32_crop) build_options options; implementation_desc conv_impl = { format::fs_b_yx_fsv32, "convolution_gpu_bfyx_to_fs_byx_fsv32" }; - options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); options.set_option(build_option::optimize_data(true)); network network(engine, topology, options); @@ -5497,12 +5365,12 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { topology topology_ref( input_layout("input", input->get_layout()), - reorder("to_int", "input", {data_types::i8, format::bfyx, {batch_num, input_f, input_size_x, input_size_y}}), + reorder("to_int", "input", { data_types::i8, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } }), data("weights", weights), data("biases", biases), - convolution("conv", "to_int", {"weights"}, {"biases"}, {1, 1, 1, 1}, tensor{{0, 0, 2, 2}, 0}, {1, 1, 1, 1}, "", - padding{{0, 0, output_padding, output_padding}, 0}), - reorder("output", "conv", {data_types::f32, format::bfyx, {batch_num, input_f, input_size_x, input_size_y}})); + convolution("conv", "to_int", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, tensor{ { 0, 0, 2, 2 }, 0 }, { 1, 1, 1, 1 }, "", + padding{ { 0, 0, output_padding, output_padding }, 0 }), + reorder("output", "conv", { data_types::f32, format::bfyx, { batch_num, input_f, input_size_x, input_size_y } })); build_options build_opt; @@ -5519,12 +5387,12 @@ TEST(convolution_f32_fw_gpu, convolution_int8_b_fs_yx_fsv4_to_bfyx) { topology topology_act( input_layout("input", input->get_layout()), - reorder("to_int", "input", { data_types::i8,format::b_fs_yx_fsv4,{ batch_num, input_f, input_size_x, input_size_y } }), + reorder("to_int", "input", { data_types::i8,format::b_fs_yx_fsv4, { batch_num, input_f, input_size_x, input_size_y } }), data("weights", weights), data("biases", biases), - convolution("conv", "to_int", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, tensor{{ 0, 0, 2, 2 }, 0}, { 1, 1, 1, 1 }, "", + convolution("conv", "to_int", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, tensor{ { 0, 0, 2, 2 }, 0 }, { 1, 1, 1, 1 }, "", padding{ { 0, 0, output_padding, output_padding }, 0 }), - reorder("output", "conv", { data_types::f32,format::bfyx,{ batch_num, input_f, input_size_x, input_size_y } })); + reorder("output", "conv", { data_types::f32,format::bfyx, { batch_num, input_f, input_size_x, input_size_y } })); build_options build_opt_act; @@ -5634,7 +5502,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) topology.add(data("biases_fsv", biases_mem)); auto conv_fsv = convolution("conv_fsv", "input", { "weights_fsv" }, { "biases_fsv" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -5659,7 +5527,7 @@ TEST(convolution_gpu, bfyx_iyxo_5x5_fp16) auto conv_fsv = convolution("conv_fsv", "input", { "weights_fsv" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -5737,7 +5605,7 @@ void blockedFormatZeroCheck(cldnn::memory::ptr out_mem) { // skip on new batch if (i % (number_of_zeroes / batch_skip) == 0) zero_ind += to_skip; - if (zero_ind >= (size_t)b*f*spatials) + if (zero_ind >= (size_t) b * f * spatials) return; zero_ind += f_mod; @@ -5862,7 +5730,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) topology.add(data("biases", biases_mem)); auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); @@ -5885,7 +5753,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) } auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); @@ -5895,7 +5763,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32) build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfzyx"})); + options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -5999,7 +5867,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) topology.add(data("biases", biases_mem)); auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6022,7 +5890,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) } auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6032,7 +5900,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp16) build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfzyx"})); + options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6128,7 +5996,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) topology.add(data("biases", biases_mem)); auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6151,15 +6019,15 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) } auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad, 0 }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad, 0 }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); } const float scalar = 5.5f; - auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, {1, 1, 1, 1, 1} }); - set_values(scale_mem, {scalar}); + auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfzyx, { 1, 1, 1, 1, 1 } }); + set_values(scale_mem, { scalar }); topology.add(data("scalar", scale_mem)); topology.add(scale("scale", "conv_bsv16_fsv16", "scalar")); @@ -6168,7 +6036,7 @@ TEST_P(convolution_gpu_block_layout3D, bfzyx_bsv16_fsv16_fp32_fused_ops) build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfzyx"})); + options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfzyx" })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6296,7 +6164,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) topology.add(data("biases", biases_mem)); auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6319,7 +6187,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) } auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6329,9 +6197,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32) build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfyx"})); + options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" })); implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; - options.set_option(build_option::force_implementations({{"conv_bsv16_fsv16", conv_impl}})); + options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6436,7 +6304,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) topology.add(data("biases", biases_mem)); auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding, 0 }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6459,7 +6327,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) } auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6469,9 +6337,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp16) build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfyx"})); + options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" })); implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; - options.set_option(build_option::force_implementations({{"conv_bsv16_fsv16", conv_impl}})); + options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6567,7 +6435,7 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) topology.add(data("biases", biases_mem)); auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, { "biases" }, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad, pad }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_bsv16_fsv16); @@ -6590,15 +6458,15 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) } auto conv_bsv16_fsv16 = convolution("conv_bsv16_fsv16", "input_bsv16_fsv16", { "weights" }, - { 1, 1, stride, stride }, tensor{ {0, 0, pad, pad}, 0 }); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad, pad }, 0 }); conv_bsv16_fsv16.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_bsv16_fsv16); } const float scalar = 5.5f; - auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx, {1, 1, 1, 1} }); - set_values(scale_mem, {scalar}); + auto scale_mem = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 1, 1 } }); + set_values(scale_mem, { scalar }); topology.add(data("scalar", scale_mem)); topology.add(scale("scale", "conv_bsv16_fsv16", "scalar")); @@ -6607,9 +6475,9 @@ TEST_P(convolution_gpu_block_layout, bfyx_bsv16_fsv16_fp32_fused_ops) build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv_bsv16_fsv16", "reorder_bfyx"})); + options.set_option(build_option::outputs({ "conv_bsv16_fsv16", "reorder_bfyx" })); implementation_desc conv_impl = { format::bs_fs_yx_bsv16_fsv16, "" }; - options.set_option(build_option::force_implementations({{"conv_bsv16_fsv16", conv_impl}})); + options.set_option(build_option::force_implementations({ { "conv_bsv16_fsv16", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6738,7 +6606,7 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) } auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, groups, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad_x, pad_y }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad_x, pad_y }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -6746,7 +6614,7 @@ TEST_P(convolution_depthwise_gpu, depthwise_conv_fs_b_yx_fsv32) build_options options; options.set_option(build_option::optimize_data(true)); implementation_desc conv_impl = { format::fs_b_yx_fsv32, "" }; - options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6881,7 +6749,7 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) } auto conv_fsv = convolution("conv_fsv", "input_fsv", { "weights_fsv" }, groups, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad_x, pad_y }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad_x, pad_y }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -6889,7 +6757,7 @@ TEST_P(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16) build_options options; options.set_option(build_option::optimize_data(true)); implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -6937,15 +6805,15 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa auto stride = tensor{ 1, 1, 1, 1 }; auto pad = tensor{ 0 }; auto dilation = tensor{ 1, 1, 1, 1 }; - auto output_size = tensor{ 1, num_groups, 1, 2}; + auto output_size = tensor{ 1, num_groups, 1, 2 }; auto input_lower_sizes = { 0, 16, 0, 0 }; auto input_upper_sizes = { 0, 64, 0, 0 }; auto& engine = get_test_engine(); auto input = engine.allocate_memory({ data_types::f32, format::bfyx, input_size }); - auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, weights_size}); - auto bias = engine.allocate_memory({ data_types::f32, format::bfyx, bias_size}); + auto weights = engine.allocate_memory({ data_types::f32, format::goiyx, weights_size }); + auto bias = engine.allocate_memory({ data_types::f32, format::bfyx, bias_size }); set_values(input, { 3, -1, -1, -1, 2, -2, 2, 2, 0, 1, -5, 4, -1, 4, 1, 0, @@ -6983,7 +6851,7 @@ TEST(convolution_depthwise_gpu_fsv16, depthwise_conv_b_fs_yx_fsv16_in_feature_pa build_options options; options.set_option(build_option::optimize_data(true)); implementation_desc conv_impl = { format::b_fs_yx_fsv16, "" }; - options.set_option(build_option::force_implementations({ {"conv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input); @@ -7088,7 +6956,7 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) } auto conv_fsv = convolution("conv", "input", { "weights" }, groups, - { 1, 1, stride, stride }, tensor{{ 0, 0, pad_x, pad_y }, 0}); + { 1, 1, stride, stride }, tensor{ { 0, 0, pad_x, pad_y }, 0 }); conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); @@ -7096,7 +6964,7 @@ TEST_P(convolution_depthwise_gpu_bfyx, depthwise_conv_bfyx) build_options options; options.set_option(build_option::optimize_data(true)); implementation_desc conv_impl = { format::bfyx, "" }; - options.set_option(build_option::force_implementations({ {"conv", conv_impl} })); + options.set_option(build_option::force_implementations({ { "conv", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -7385,10 +7253,10 @@ TEST_P(convolution_grouped_gpu, base) { topology topology(input_layout("input", input->get_layout()), data("weights", weights), - reorder("input_fsv", "input", {data_types::i8, input_data_format, input_size}), + reorder("input_fsv", "input", { data_types::i8, input_data_format, input_size }), convolution("conv", "input_fsv", - {"weights"}, + { "weights" }, std::vector(0), weights_zp_prim_name, input_zp_prim_name, @@ -7400,7 +7268,7 @@ TEST_P(convolution_grouped_gpu, base) { tensor(batch(1), feature(1), spatial(1, 1, 1, 1)), ref_conv_out_size, true), - reorder("out", "conv", {data_types::f32, format::bfzyx, ref_conv_out_size})); + reorder("out", "conv", { data_types::f32, format::bfzyx, ref_conv_out_size })); if (has_input_zp) topology.add(data(input_zp_prim_name[0], input_zp)); @@ -7413,9 +7281,9 @@ TEST_P(convolution_grouped_gpu, base) { build_options options; options.set_option(build_option::optimize_data(true)); - options.set_option(build_option::outputs({"conv", "out"})); - implementation_desc conv_impl = {input_data_format, impl_name}; - options.set_option(build_option::force_implementations({{"conv", conv_impl}})); + options.set_option(build_option::outputs({ "conv", "out" })); + implementation_desc conv_impl = { input_data_format, impl_name }; + options.set_option(build_option::force_implementations({ { "conv", conv_impl } })); cldnn::network network(engine, topology, options); network.set_input_data("input", input); @@ -7505,7 +7373,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { auto weights_size = tensor(output_f, input_f, filter_y, filter_x, 1); auto weights_data = generate_random_4d(output_f, input_f, filter_y, filter_x, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size}); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias @@ -7516,7 +7384,7 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { if (with_bias) { auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = engine.allocate_memory({data_types::f16, format::bfyx, biases_size}); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); for (auto bi = 0; bi < batch_num; ++bi) { @@ -7534,16 +7402,16 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { topology.add(input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem), data("bias", biases_mem), - reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size})); + reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size })); auto conv_fsv = convolution("conv_fsv", "input_fsv", - {"weights_fsv"}, - {"bias"}, + { "weights_fsv" }, + { "bias" }, groups, - {1, 1, stride, stride}, - tensor{{0, 0, pad_x, pad_y}, 0}); - conv_fsv.output_padding = padding({0, 0, output_padding, output_padding}, 0.f); + { 1, 1, stride, stride }, + tensor{ { 0, 0, pad_x, pad_y }, 0 }); + conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); } else { @@ -7561,21 +7429,21 @@ TEST_P(convolution_general_gpu, conv_fp16_cases) { topology.add(input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem), - reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size})); + reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size })); auto conv_fsv = convolution("conv_fsv", "input_fsv", - {"weights_fsv"}, + { "weights_fsv" }, groups, - {1, 1, stride, stride}, - tensor{{0, 0, pad_x, pad_y}, 0}); - conv_fsv.output_padding = padding({0, 0, output_padding, output_padding}, 0.f); + { 1, 1, stride, stride }, + tensor{ { 0, 0, pad_x, pad_y }, 0 }); + conv_fsv.output_padding = padding({ 0, 0, output_padding, output_padding }, 0.f); topology.add(conv_fsv); } build_options options; options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = {input_data_format, impl_name}; - options.set_option(build_option::force_implementations({{"conv_fsv", conv_impl}})); + implementation_desc conv_impl = { input_data_format, impl_name }; + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -7668,10 +7536,10 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding) // Add convolution auto input_stride = tensor(1, 1, stride, stride); - auto pad = tensor({0, 0, pad_x, pad_y}, 0); + auto pad = tensor({ 0, 0, pad_x, pad_y }, 0); auto input_dilation = tensor(1, 1, 1, 1); - auto input_padding_before = tensor({0, 0, pad_x, pad_y}, 0); - auto input_padding_after = tensor({0, 0, pad_x, pad_y}, 0); + auto input_padding_before = tensor({ 0, 0, pad_x, pad_y }, 0); + auto input_padding_after = tensor({ 0, 0, pad_x, pad_y }, 0); auto conv_fsv = convolution("conv_fsv", "input_fsv16", { "weights_fsv" }, input_stride, pad, input_dilation, input_padding_before, input_padding_after); conv_fsv.output_padding = padding({ 0, 32, 2, 2 }, 0.f); @@ -7697,7 +7565,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_padding) // Exec target network (fusing: conv+reorder) build_options options_target; implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; - options_target.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options_target.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); options_target.set_option(build_option::optimize_data(true)); network network_target(engine, topology, options_target); @@ -7770,9 +7638,9 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type) // Add convolution auto input_stride = tensor(1, 1, stride, stride); - auto pad = tensor({0, 0, pad_x, pad_y}, 0); + auto pad = tensor({ 0, 0, pad_x, pad_y }, 0); auto input_dilation = tensor(1, 1, 1, 1); - auto no_padding = tensor({0, 0, pad_x, pad_y}, 0); + auto no_padding = tensor({ 0, 0, pad_x, pad_y }, 0); auto conv_fsv = convolution("conv_fsv", "input_fsv16", { "weights_fsv" }, input_stride, pad, input_dilation, no_padding, no_padding); topology.add(conv_fsv); // format 8 to 8 -> after fusing, format 8 to 3 @@ -7796,7 +7664,7 @@ TEST_P(convolution_gpu_fsv16_to_bfyx, conv_b_fs_yx_fsv16_to_bfyx_different_type) // Exec target network (fusing: conv+reorder) build_options options_target; implementation_desc conv_impl = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16" }; - options_target.set_option(build_option::force_implementations({ {"conv_fsv", conv_impl} })); + options_target.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); options_target.set_option(build_option::optimize_data(true)); network network_target(engine, topology, options_target); @@ -7836,7 +7704,7 @@ public: auto wei_mem = engine.allocate_memory(wei_lay); auto weights_flat = flatten_4d(format::bfyx, _weights); set_values(wei_mem, weights_flat); - layout reordered_layout = layout{input_type(), input_format(), input_size(), padding_size()}; + layout reordered_layout = layout{ input_type(), input_format(), input_size(), padding_size() }; auto topo = topology(); topo.add(input_layout("input", input_lay)); topo.add(reorder("input_reorder", "input", reordered_layout)); @@ -7866,7 +7734,7 @@ public: { weights_id }, static_cast(groups()), tensor(batch(0), feature(0), spatial(_stride_x, _stride_y)), - tensor({0, 0, _offset_x, _offset_y}, 0), + tensor({ 0, 0, _offset_x, _offset_y }, 0), tensor(batch(0), feature(0), spatial(_dilation_x, _dilation_y))); conv_prim.output_data_type = output_type(); topo.add(conv_prim); @@ -7882,7 +7750,7 @@ public: { "bias" }, static_cast(groups()), tensor(batch(0), feature(0), spatial(_stride_x, _stride_y)), - tensor({0, 0, _offset_x, _offset_y}, 0), + tensor({ 0, 0, _offset_x, _offset_y }, 0), tensor(batch(0), feature(0), spatial(_dilation_x, _dilation_y))); conv_prim.output_data_type = output_type(); topo.add(conv_prim); @@ -7902,7 +7770,7 @@ public: auto build_opts = build_options( build_option::optimize_data(true), - build_option::force_implementations({ {"conv", {input_format(), ""}} }) + build_option::force_implementations({ { "conv", { input_format(), "" } } }) ); auto prog = program::build_program(engine, topo, build_opts); @@ -8201,7 +8069,7 @@ public: auto wei_mem = engine.allocate_memory(wei_lay); auto wei_flat = flatten_4d(format::bfyx, this->_weights); set_values(wei_mem, wei_flat); - layout reordered_layout = layout{this->input_type(), this->input_format(), this->input_size(), this->padding_size()}; + layout reordered_layout = layout{ this->input_type(), this->input_format(), this->input_size(), this->padding_size() }; auto topo = topology(); topo.add(input_layout("input", input_lay)); topo.add(reorder("input_reorder", "input", reordered_layout)); @@ -8231,7 +8099,7 @@ public: { weights_id }, static_cast(this->groups()), tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)), - tensor({0,0, this->_offset_x, this->_offset_y}, 0), + tensor({ 0, 0, this->_offset_x, this->_offset_y }, 0), tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y))); conv_prim.output_data_type = this->output_type(); topo.add(conv_prim); @@ -8247,7 +8115,7 @@ public: { "bias" }, static_cast(this->groups()), tensor(batch(0), feature(0), spatial(this->_stride_x, this->_stride_y)), - tensor({0,0, this->_offset_x, this->_offset_y}, 0), + tensor({ 0, 0, this->_offset_x, this->_offset_y }, 0), tensor(batch(0), feature(0), spatial(this->_dilation_x, this->_dilation_y))); conv_prim.output_data_type = this->output_type(); topo.add(conv_prim); @@ -8262,7 +8130,7 @@ public: auto build_opts = build_options( build_option::optimize_data(true), - build_option::force_implementations({ {"conv", { this->input_format(), ""}} }) + build_option::force_implementations({ { "conv", { this->input_format(), "" } } }) ); auto prog = program::build_program(engine, topo, build_opts); @@ -8851,9 +8719,9 @@ TEST_P(convolution_test, CONVOLUTION) { } INSTANTIATE_TEST_SUITE_P(DISABLED_CONVOLUTION, - convolution_test, - ::testing::ValuesIn(convolution_test::generate_all_test_params()), - tests::generic_test::custom_param_name_functor()); + convolution_test, + ::testing::ValuesIn(convolution_test::generate_all_test_params()), + tests::generic_test::custom_param_name_functor()); #ifdef ENABLE_ONEDNN_FOR_GPU @@ -8952,7 +8820,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { auto weights_size = tensor(output_f, input_f, filter_y, filter_x, 1); auto weights_data = generate_random_4d(output_f, input_f, filter_y, filter_x, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size}); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); // Will be used to store reference values calculated in branches depending on bias @@ -8963,7 +8831,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { if (with_bias) { auto biases_size = tensor(1, output_f, 1, 1); auto biases_data = generate_random_1d(output_f, -1, 1); - auto biases_mem = engine.allocate_memory({data_types::f16, format::bfyx, biases_size}); + auto biases_mem = engine.allocate_memory({ data_types::f16, format::bfyx, biases_size }); set_values(biases_mem, biases_data); for (auto bi = 0; bi < batch_num; ++bi) { @@ -8981,16 +8849,16 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { topology.add(input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem), data("bias", biases_mem), - reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size})); + reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size })); auto conv_fsv = convolution("conv_fsv", "input_fsv", - {"weights_fsv"}, - {"bias"}, + { "weights_fsv" }, + { "bias" }, groups, - {1, 1, stride, stride}, - {0, 0, 0, 0}); - conv_fsv.output_padding = padding({0, 0, 0, 0}, 0.f); + { 1, 1, stride, stride }, + { 0, 0, 0, 0 }); + conv_fsv.output_padding = padding({ 0, 0, 0, 0 }, 0.f); topology.add(conv_fsv); } else { @@ -9008,21 +8876,21 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { topology.add(input_layout("input", input_mem->get_layout()), data("weights_fsv", weights_mem), - reorder("input_fsv", "input", {data_types::f16, input_data_format, input_size})); + reorder("input_fsv", "input", { data_types::f16, input_data_format, input_size })); auto conv_fsv = convolution("conv_fsv", "input_fsv", - {"weights_fsv"}, + { "weights_fsv" }, groups, - {1, 1, stride, stride}, - {0, 0, 0, 0}); - conv_fsv.output_padding = padding({0, 0, 0, 0}, 0.f); + { 1, 1, stride, stride }, + { 0, 0, 0, 0 }); + conv_fsv.output_padding = padding({ 0, 0, 0, 0 }, 0.f); topology.add(conv_fsv); } build_options options; options.set_option(build_option::optimize_data(true)); - implementation_desc conv_impl = {input_data_format, impl_name, prim_impl_types}; - options.set_option(build_option::force_implementations({{"conv_fsv", conv_impl}})); + implementation_desc conv_impl = { input_data_format, impl_name, prim_impl_types }; + options.set_option(build_option::force_implementations({ { "conv_fsv", conv_impl } })); network network(engine, topology, options); network.set_input_data("input", input_mem); @@ -9032,7 +8900,7 @@ TEST_P(convolution_gpu_onednn, conv_onednn_cases) { std::cerr << p.original_id << " " << p.kernel_id << std::endl; auto out_mem = network.get_output("conv_fsv").get_memory(); - mem_lock out_ptr{out_mem, get_test_stream()}; + mem_lock out_ptr{ out_mem, get_test_stream() }; auto out_lay = out_mem->get_layout(); ASSERT_EQ(out_mem->get_layout().format, input_data_format); @@ -9075,15 +8943,15 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) { auto weights_size = tensor(16, 16, 1, 1, 1); auto weights_data = generate_random_4d(output_f, input_f, 1, 1, -1, 1); auto weights_data_bfyx = flatten_4d(format::bfyx, weights_data); - auto weights_mem = engine.allocate_memory({data_types::f16, format::bfyx, weights_size}); + auto weights_mem = engine.allocate_memory({ data_types::f16, format::bfyx, weights_size }); set_values(weights_mem, weights_data_bfyx); auto input = input_layout("input", input_mem->get_layout()); auto weights = data("weights", weights_mem); - auto input_reorder = reorder("input_fsv", "input", {data_types::f16, format::b_fs_yx_fsv16, input_size}); + auto input_reorder = reorder("input_fsv", "input", { data_types::f16, format::b_fs_yx_fsv16, input_size }); auto conv1 = convolution("conv1", "input_fsv", { "weights" }); - auto conv2 = convolution("conv2", "conv1", { "weights" }, {1, 1, 1, 1}, {0, 0, -1, -1}, {1, 1, 1, 1}, {output_b, output_f, output_x, output_x}); - auto output_reorder = reorder("reorder", "conv2", {data_types::f32, format::bfyx, {output_b, output_f, output_x, output_x}}); + auto conv2 = convolution("conv2", "conv1", { "weights" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }, { 1, 1, 1, 1 }, { output_b, output_f, output_x, output_x }); + auto output_reorder = reorder("reorder", "conv2", { data_types::f32, format::bfyx, { output_b, output_f, output_x, output_x } }); topology topology_test(input, weights, input_reorder, conv1, conv2, output_reorder); topology topology_ref(input, weights, input_reorder, conv1, conv2, output_reorder); @@ -9091,13 +8959,13 @@ TEST(convolution_gpu_onednn, padding_for_cldnn_kernel_after_onednn) { build_options options_test; implementation_desc conv1_impl_test = { format::b_fs_yx_fsv16, "", impl_types::onednn }; implementation_desc conv2_impl_test = { format::b_fs_yx_fsv16, "convolution_gpu_bfyx_f16", impl_types::ocl }; - options_test.set_option(build_option::force_implementations({ {"conv1", conv1_impl_test}, {"conv2", conv2_impl_test} })); + options_test.set_option(build_option::force_implementations({ { "conv1", conv1_impl_test }, { "conv2", conv2_impl_test } })); options_test.set_option(build_option::optimize_data(true)); build_options options_ref; implementation_desc conv1_impl_ref = { format::bfyx, "", impl_types::ocl }; implementation_desc conv2_impl_ref = { format::bfyx, "", impl_types::ocl }; - options_ref.set_option(build_option::force_implementations({ {"conv1", conv1_impl_ref}, {"conv2", conv2_impl_ref} })); + options_ref.set_option(build_option::force_implementations({ { "conv1", conv1_impl_ref }, { "conv2", conv2_impl_ref } })); options_ref.set_option(build_option::optimize_data(true)); network network_test(engine, topology_test, options_test); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 957ca159a0a..962759bdc7c 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -681,7 +681,7 @@ TEST_P(conv_fp32_reorder_fsv16_to_bfyx_conv, basic) { reorder("reorder_fsv16", "input", format::b_fs_yx_fsv16, data_types::f32), convolution("conv_prim", "reorder_fsv16", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_bfyx", "conv_prim", format::bfyx, data_types::f32), - convolution("conv_output", "reorder_bfyx", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation), + convolution("conv_output", "reorder_bfyx", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), activation("activation", "conv_output", activation_func::abs), reorder("reorder_output", "activation", p.default_format, data_types::f32) ); @@ -10059,7 +10059,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_subtract, have_subtract_per_feature) data("weights_dw", get_mem(dw_weights_layout, -127, 127)), convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32, values_to_subtract), - convolution("conv_output", "reorder_fsv32", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation), + convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), activation("activation", "conv_output", activation_func::abs) ); @@ -10088,7 +10088,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_fused_activation, have_fused_activat convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_fsv32", "conv_prim", format::fs_b_yx_fsv32, data_types::f32), activation("activation_quantize", "reorder_fsv32", activation_func::relu), - convolution("conv_output", "activation_quantize", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation), + convolution("conv_output", "activation_quantize", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), activation("activation", "conv_output", activation_func::abs) ); @@ -10116,7 +10116,7 @@ TEST_P(conv_fp32_reorder_bfyx_to_fsv32_conv_data_padding, have_data_padding) { data("weights_dw", get_mem(dw_weights_layout, -127, 127)), convolution("conv_prim", "input", { "weights" }, p.groups, p.stride, p.pad, p.dilation), reorder("reorder_fsv32", "conv_prim", layout(data_types::f32, format::fs_b_yx_fsv32, dw_tensor, padding{ {0, 0, 1, 1}, 0 })), - convolution("conv_output", "reorder_fsv32", { "weights_dw" }, 1, dw_stride, p.pad, p.dilation), + convolution("conv_output", "reorder_fsv32", { "weights_dw" }, p.out_shape.feature[0], dw_stride, p.pad, p.dilation), activation("activation", "conv_output", activation_func::abs), activation("activation2", "conv_prim", activation_func::abs), eltwise("add_bias", { "activation", "activation2" }, eltwise_mode::sum) diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp index d1c76a316c4..642fb65f603 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp @@ -43,7 +43,7 @@ TEST(memory_tests, DISABLED_network_creation_loop) { engine eng; - memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx,{ 1, 1, 1000, 1000 } }); + memory in = memory::allocate(eng, layout{ data_types::f32, format::bfyx, { 1, 1, 1000, 1000 } }); topology tpl{ input_layout("in", in->get_layout()), @@ -66,7 +66,7 @@ TEST(memory_pool, basic_non_padded_relu_pipe) { auto x_size = 1; auto y_size = 1; - auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; topology.add(input_layout("input", input->get_layout())); @@ -86,7 +86,7 @@ TEST(memory_pool, basic_non_padded_relu_pipe) { network.set_input_data("input", input); auto outputs = network.execute(); - EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 64); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)64); } TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) { @@ -99,13 +99,13 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) { auto x_size = 4; auto y_size = 4; - auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu", "input", activation_func::relu)); topology.add(activation("relu1", "relu", activation_func::relu)); - topology.add(pooling("pool1", "relu1",pooling_mode::max, { 1,1,3,3 }, { 1,1,2,2 })); + topology.add(pooling("pool1", "relu1", pooling_mode::max, { 1, 1, 3, 3 }, { 1, 1, 2, 2 })); topology.add(activation("relu2", "pool1", activation_func::relu)); topology.add(activation("relu3", "relu2", activation_func::relu)); topology.add(activation("relu4", "relu3", activation_func::relu)); @@ -133,7 +133,7 @@ TEST(memory_pool, multi_outputs_network) { auto x_size = 4; auto y_size = 4; - auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; topology.add(input_layout("input", input->get_layout())); @@ -153,7 +153,7 @@ TEST(memory_pool, multi_outputs_network) { network.set_input_data("input", input); auto outputs = network.execute(); - EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)1536); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 1536); } TEST(memory_pool, oooq) { @@ -171,14 +171,14 @@ TEST(memory_pool, oooq) { auto x_size = 4; auto y_size = 4; - auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } }); topology topology; topology.add(input_layout("input", input->get_layout())); topology.add(activation("relu1", "input", activation_func::relu)); topology.add(activation("relu2", "input", activation_func::relu)); topology.add(activation("relu3", "input", activation_func::relu)); - topology.add(concatenation("concat1", { "relu1", "relu2"},concatenation::along_f)); + topology.add(concatenation("concat1", { "relu1", "relu2" },concatenation::along_f)); topology.add(activation("relu4", "concat1", activation_func::relu)); topology.add(activation("relu5", "relu3", activation_func::relu)); topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f)); @@ -209,7 +209,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { auto inp_x_size = 4; auto inp_y_size = 4; - auto input = engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); + auto input = engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); set_values(input, { 1.0f, 2.5f, 3.0f, 4.0f, 5.0f, 2.0f, 2.0f, 3.0f, 6.1f, 4.7f, 1.0f, 1.0f, 8.2f, 1.0f, 2.0f, 1.0f, @@ -227,7 +227,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) { topology.add(activation("relu4", "concat1", activation_func::relu)); topology.add(activation("relu5", "relu3", activation_func::relu)); topology.add(concatenation("concat2", { "relu4", "relu5" }, concatenation::along_f)); - topology.add(activation("relu6", "concat2", activation_func::linear, {1.0f, 0.5f})); + topology.add(activation("relu6", "concat2", activation_func::linear, { 1.0f, 0.5f })); build_options bo; bo.set_option(build_option::optimize_data(true)); @@ -286,8 +286,8 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice_weights) { auto inp_x_size = 4; auto inp_y_size = 4; - auto input= engine->allocate_memory({ data_types::f32, format::bfyx,{ tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); - auto weights = engine->allocate_memory({ data_types::f32,format::bfyx,{ 1, 1, 3, 2 } }); + auto input= engine->allocate_memory({ data_types::f32, format::bfyx, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_num)) } }); + auto weights = engine->allocate_memory({ data_types::f32, format::bfyx, { 1, 1, 3, 2 } }); std::vector dummy_input_data_1 = { /*f0 xy*/ 0.8f, 0.65f, 0.1f, 1.0f, 1.0f, 0.5f, 0.11f, 0.33f, 0.66f, 0.11f, 0.22f, 0.33f, 0.99f, 0.8f, 0.7f, 0.5f, @@ -373,10 +373,10 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { layout lay_batch_8 = { dt, fmt, { tensor(spatial(inp_x_size, inp_y_size), feature(feature_num), batch(batch_8)) }}; auto input_1 = engine->allocate_memory(lay_batch_1); auto input_8 = engine->allocate_memory(lay_batch_8); - auto weights = engine->allocate_memory({ dt, fmt, { 1, 1, 3, 2 } }); + auto weights = engine->allocate_memory({ dt, fmt, { 1, 3, 3, 2 } }); - std::vector dummy_input_data_1 = generate_random_1d(batch_1*feature_num*inp_x_size*inp_y_size, 0, 1); - std::vector dummy_input_data_8 = generate_random_1d(batch_8*feature_num*inp_x_size*inp_y_size, 0, 1); + std::vector dummy_input_data_1 = generate_random_1d(batch_1 * feature_num * inp_x_size * inp_y_size, 0, 1); + std::vector dummy_input_data_8 = generate_random_1d(batch_8 * feature_num * inp_x_size * inp_y_size, 0, 1); set_values(input_1, dummy_input_data_1); set_values(input_8, dummy_input_data_8); @@ -396,14 +396,14 @@ TEST(memory_pool, shared_mem_pool_diff_batches) { auto outputs = network_first.execute(); auto dev_info = engine->get_device_info(); - EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)3928); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 4744); topo.change_input_layout("input", input_1->get_layout());//change input layout to batch=1 network network_second(*engine, topo, bo); network_second.set_input_data("input", input_1); auto outputs_second = network_second.execute(); - EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t)4328); + EXPECT_EQ(engine->get_max_used_device_memory(), (uint64_t) 5912); } TEST(memory_pool, shared_dep_two_output) { @@ -459,20 +459,20 @@ TEST(memory_pool, non_opt_intermidate_opt_after) { auto input_memory1 = engine.allocate_memory(input_layout1); auto input_memory2 = engine.allocate_memory(input_layout2); - auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 })); + auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 1, 1 })); auto data_memory = cldnn::data("scale_mem", scale_memory); set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f }); set_values(input_memory2, { 5.0f, 6.0f, 7.0f, 8.0f }); - set_values(scale_memory, { 1.0f}); + set_values(scale_memory, { 1.0f }); auto reshape_tensor = cldnn::tensor(8, 1, 1, 1); auto input = cldnn::input_layout("input1", input_layout1); auto input2 = cldnn::input_layout("input2", input_layout2); auto concat = cldnn::concatenation("concat", { "input1", "input2" }, cldnn::concatenation::along_b); auto reshape = cldnn::reshape("reshape", "concat", reshape_tensor); - auto crop1 = cldnn::crop("crop1", "reshape", { 1,1,1,1 }, { 0, 0, 0, 0 }); - auto crop2 = cldnn::crop("crop2", "reshape", { 1,1,1,1 }, { 1, 0, 0, 0 }); + auto crop1 = cldnn::crop("crop1", "reshape", { 1, 1, 1, 1 }, { 0, 0, 0, 0 }); + auto crop2 = cldnn::crop("crop2", "reshape", { 1, 1, 1, 1 }, { 1, 0, 0, 0 }); auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem"); auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem"); @@ -508,7 +508,7 @@ TEST(memory_pool, add_mem_dep_test) { auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 }); auto input_memory1 = engine.allocate_memory(input_layout1); - auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 })); + auto scale_memory = engine.allocate_memory(layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 1, 1 })); auto data_memory = cldnn::data("scale_mem", scale_memory); set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f, @@ -518,8 +518,8 @@ TEST(memory_pool, add_mem_dep_test) { auto input = cldnn::input_layout("input1", input_layout1); auto actv1 = cldnn::activation("input_activ1", "input1", activation_func::abs); auto actv2 = cldnn::activation("input_activ2", "input1", activation_func::abs); - auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 }); - auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 }); + auto crop1 = cldnn::crop("crop1", "input_activ1", { 1, 1, 2, 2 }, { 0, 0, 0, 0 }); + auto crop2 = cldnn::crop("crop2", "input_activ2", { 1, 1, 2, 2 }, { 0, 1, 0, 0 }); auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem"); auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem"); auto actv3 = cldnn::activation("out3", "elt1", activation_func::abs); diff --git a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp index e04e2a390db..248a0d3758b 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convolution.cpp @@ -137,11 +137,12 @@ static void CreateConvolutionBackpropDataOp(Program& p, const std::shared_ptrget_input_node_shared_ptr(1); - // WA: For the cases like Const(weights)->Sub(zp)->Deconv. + bool hasConstantWeights = IsNodeOnConstPath(weights_node); + // WA: For the cases like Const(weights)->Sub(zp)->Deconv. And also for the cases with real runtime weights. // Dimensions order of weights blob is IOYX, but // the selected format is OIYX by default. So we need to swap (and transpose) I and O dimensions to match the format // For Constant node on input transpose is not needed, because the data is transposed on const node creation - if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast(weights_node) == nullptr) { + if ((hasConstantWeights && std::dynamic_pointer_cast(weights_node) == nullptr) || !hasConstantWeights) { std::string permuteName = layerName + "_cldnn_weights_permute"; auto weights_rank = op->get_input_shape(1).size(); std::vector permute_order(weights_rank); @@ -195,11 +196,12 @@ static void CreateGroupConvolutionBackpropDataOp(Program& p, const std::shared_p auto weightsName = inputs[1]; auto weights_node = op->get_input_node_shared_ptr(1); - // WA: For the cases like Const(weights)->Sub(zp)->Deconv. + bool hasConstWeights = IsNodeOnConstPath(weights_node); + // WA: For the cases like Const(weights)->Sub(zp)->Deconv. And also for the cases with real runtime weights. // Dimensions order of weights blob is IOYX, but // the selected format is OIYX by default. So we need to swap I and O dimensions to match the format. // For Constant node on input transpose is not needed, because the data is transposed on const node creation - if (IsNodeOnConstPath(weights_node) && std::dynamic_pointer_cast(weights_node) == nullptr) { + if ((hasConstWeights && std::dynamic_pointer_cast(weights_node) == nullptr) || !hasConstWeights) { std::string permuteName = layerName + "_cldnn_weights_permute"; auto weights_rank = op->get_input_shape(1).size(); std::vector permute_order(weights_rank); diff --git a/src/plugins/intel_gpu/src/plugin/program.cpp b/src/plugins/intel_gpu/src/plugin/program.cpp index c2cc3875497..cc3fb1a6e10 100644 --- a/src/plugins/intel_gpu/src/plugin/program.cpp +++ b/src/plugins/intel_gpu/src/plugin/program.cpp @@ -346,7 +346,7 @@ bool IsNodeOnConstPath(const std::shared_ptr& node) { std::function&)> is_const_node = [&nodes_processed, &is_const_node](const std::shared_ptr& node) { if (nodes_processed.count(node)) return true; nodes_processed.insert(node); - // If input is constant, then drop if from the processing list + // If input is constant, then drop it from the processing list if (std::dynamic_pointer_cast(node) != nullptr) return true; // If the node doesn't have any parents and it's not a constant, then we deal with dynamic path From 1177d2b282a7699cc398717ccd8a35dd7a375bf0 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Wed, 15 Dec 2021 13:15:13 +0300 Subject: [PATCH 03/27] [GPU] Change FQ output for first Convolution (#9200) * update onednn_gpu * [GPU] Add bs_fs_yx_bsv8_fsv4 format Co-authored-by: Kim,SungEun --- .../clDNN/api/intel_gpu/runtime/tensor.hpp | 2 + .../kernel_selector/common/tensor_type.cpp | 6 +++ .../kernel_selector/common/tensor_type.h | 1 + .../include/batch_headers/fetch_data.cl | 33 ++++++++++++++ .../kernel_selector/core/common/jitter.cpp | 2 + .../core/kernel_selector_common.cpp | 1 + .../clDNN/src/impls/ocl/convolution.cpp | 5 +++ .../clDNN/src/impls/ocl/eltwise.cpp | 7 +++ .../src/impls/onednn/concatenation_onednn.cpp | 5 +++ .../src/impls/onednn/convolution_onednn.cpp | 5 +++ .../src/impls/onednn/deconvolution_onednn.cpp | 5 +++ .../clDNN/src/impls/onednn/utils.cpp | 1 + .../clDNN/src/include/to_string_utils.h | 2 + .../clDNN/src/kernel_selector_helper.cpp | 4 ++ .../thirdparty/clDNN/src/layout_optimizer.cpp | 6 ++- .../thirdparty/clDNN/src/program_helpers.cpp | 43 ++++++++----------- 16 files changed, 102 insertions(+), 26 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp index 846cf6e4bf6..aeea86c190e 100644 --- a/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp +++ b/inference-engine/thirdparty/clDNN/api/intel_gpu/runtime/tensor.hpp @@ -85,6 +85,7 @@ struct format { bs_fs_zyx_bsv16_fsv16, ///< format used for 3D blocked convolution (batch and features blocked by 16) bs_fs_yx_bsv16_fsv16, ///< format used for 2D blocked convolution (batch and features blocked by 16) bs_fs_yx_bsv4_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 4) + bs_fs_yx_bsv8_fsv4, ///< format used for 2D blocked convolution (batch and features blocked by 8 and 4) bs_fs_yx_bsv4_fsv2, ///< format used for 2D blocked convolution (batch blocked by 4, features blocked by 2) bs_fs_zyx_bsv4_fsv4, ///< format used for 3D blocked convolution (batch and features blocked by 4) bs_fs_zyx_bsv4_fsv2, ///< format used for 3D blocked convolution (batch blocked by 4, features blocked by 2) @@ -255,6 +256,7 @@ struct format { { bs_fs_zyx_bsv16_fsv16, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 16 }, {1, 16}}}}, { bs_fs_yx_bsv16_fsv16, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 16 }, {1, 16}}}}, { bs_fs_yx_bsv4_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 4}}}}, + { bs_fs_yx_bsv8_fsv4, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 8 }, {1, 4}}}}, { bs_fs_yx_bsv4_fsv2, { 1, 1, 2, 0, "bfyx", "bfxy?", {{0, 4 }, {1, 2}}}}, { bs_fs_zyx_bsv4_fsv4, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 4}}}}, { bs_fs_zyx_bsv4_fsv2, { 1, 1, 3, 0, "bfzyx", "bfxyz", {{0, 4 }, {1, 2}}}}, diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp index 97d6c7da91b..ce7ec16ad4e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp @@ -29,6 +29,7 @@ DataTensor::DataChannelArray DataTensor::dataChannelArray {{ { DataLayout::bs_fs_zyx_bsv16_fsv16, { 0, 1, 2, -1, 3, 4 } }, { DataLayout::bs_fs_yx_bsv16_fsv16, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv4_fsv4, { 0, 1, -1, -1, 2, 3 } }, + { DataLayout::bs_fs_yx_bsv8_fsv4, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv4_fsv2, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv32_fsv32, { 0, 1, -1, -1, 2, 3 } }, { DataLayout::bs_fs_yx_bsv32_fsv16, { 0, 1, -1, -1, 2, 3 } }, @@ -206,6 +207,11 @@ NDims DataTensor::GetSimpleDims(const std::vector& d, DataLayout l) { newDims[2] = RoundUp(newDims[2], 4); newDims[3] = RoundUp(newDims[3], 4); break; + case bs_fs_yx_bsv8_fsv4: + assert(newDims.size() == 4); + newDims[2] = RoundUp(newDims[2], 4); + newDims[3] = RoundUp(newDims[3], 8); + break; case bs_fs_yx_bsv4_fsv2: assert(newDims.size() == 4); newDims[2] = RoundUp(newDims[2], 2); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h index 7ed87ec644b..fb57e4592dc 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h @@ -39,6 +39,7 @@ enum DataLayout { bs_fs_yx_bsv16_fsv16, // batch, feature, 2D spatial. Blocks of 16 batch and channels bs_fs_zyx_bsv16_fsv16, // batch, feature, 3D spatial. Blocks of 16 batch and channels bs_fs_yx_bsv4_fsv4, // batch, feature, 2D spatial. Blocks of 4 batch and 4 channels + bs_fs_yx_bsv8_fsv4, // batch, feature, 2D spatial. Blocks of 8 batch and 4 channels bs_fs_yx_bsv4_fsv2, // batch, feature, 2D spatial. Blocks of 4 batch and 2 channels bs_fs_yx_bsv32_fsv32, // batch, feature, 2D spatial. Blocks of 32 batch and 32 channels bs_fs_yx_bsv32_fsv16, // batch, feature, 2D spatial. Blocks of 32 batch and 16 channels diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl index b35522168b5..5af9d161ce3 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/batch_headers/fetch_data.cl @@ -506,6 +506,22 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f, uint z, uint y, uint x, CAT(prefix, _PAD_BEFORE_SIZE_X), \ CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4) +#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX(prefix, b, f, y, x) \ + get_bs_fs_zyx_bsv_fsv_index( \ + b, f, 0, y, x, \ + CAT(prefix, _SIZE_X), \ + CAT(prefix, _SIZE_Y), \ + CAT(prefix, _SIZE_Z), \ + CAT(prefix, _FEATURE_NUM), \ + CAT(prefix, _PAD_BEFORE_FEATURE_NUM), \ + CAT(prefix, _PAD_AFTER_FEATURE_NUM), \ + CAT(prefix, _PAD_BEFORE_SIZE_Z), \ + CAT(prefix, _PAD_AFTER_SIZE_Z), \ + CAT(prefix, _PAD_BEFORE_SIZE_Y), \ + CAT(prefix, _PAD_AFTER_SIZE_Y), \ + CAT(prefix, _PAD_BEFORE_SIZE_X), \ + CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4) + #define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX(prefix, b, f, y, x) \ get_bs_fs_zyx_bsv_fsv_index( \ b, f, 0, y, x, \ @@ -605,6 +621,23 @@ inline uint get_bs_fs_zyx_bsv_fsv_index(uint b, uint f, uint z, uint y, uint x, CAT(prefix, _PAD_BEFORE_SIZE_X), \ CAT(prefix, _PAD_AFTER_SIZE_X), 4, 4) +#define GET_DATA_BS_FS_YX_BSV8_FSV4_INDEX_SAFE(prefix, b, f, y, x) \ + get_bs_fs_zyx_bsv_fsv_index_safe( \ + b, f, 0, y, x, \ + CAT(prefix, _SIZE_X), \ + CAT(prefix, _SIZE_Y), \ + CAT(prefix, _SIZE_Z), \ + CAT(prefix, _FEATURE_NUM), \ + CAT(prefix, _BATCH_NUM), \ + CAT(prefix, _PAD_BEFORE_FEATURE_NUM), \ + CAT(prefix, _PAD_AFTER_FEATURE_NUM), \ + CAT(prefix, _PAD_BEFORE_SIZE_Z), \ + CAT(prefix, _PAD_AFTER_SIZE_Z), \ + CAT(prefix, _PAD_BEFORE_SIZE_Y), \ + CAT(prefix, _PAD_AFTER_SIZE_Y), \ + CAT(prefix, _PAD_BEFORE_SIZE_X), \ + CAT(prefix, _PAD_AFTER_SIZE_X), 8, 4) + #define GET_DATA_BS_FS_YX_BSV4_FSV2_INDEX_SAFE(prefix, b, f, y, x) \ get_bs_fs_zyx_bsv_fsv_index_safe( \ b, f, 0, y, x, \ diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp index e5927422532..73f164b3659 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp @@ -334,6 +334,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const { layout == DataLayout::fs_b_yx_fsv32 || layout == DataLayout::bs_fs_yx_bsv16_fsv16 || layout == DataLayout::bs_fs_yx_bsv4_fsv4 || + layout == DataLayout::bs_fs_yx_bsv8_fsv4 || layout == DataLayout::bs_fs_yx_bsv4_fsv2 || layout == DataLayout::bs_fs_yx_bsv32_fsv16 || layout == DataLayout::bs_fs_yx_bsv32_fsv32) { @@ -346,6 +347,7 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const { layout == DataLayout::bs_fs_yx_bsv32_fsv32 || layout == DataLayout::bs_fs_yx_bsv32_fsv16 || layout == DataLayout::bs_fs_yx_bsv4_fsv4 || + layout == DataLayout::bs_fs_yx_bsv8_fsv4 || layout == DataLayout::bs_fs_yx_bsv4_fsv2 || layout == DataLayout::bs_fs_yx_bsv16_fsv16) safe_index_func_val = "GET_DATA_" + layout_str + "_INDEX_SAFE(" + _name + ", b, f, y, x)"; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp index 3491e475e07..75349b31f3e 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp @@ -105,6 +105,7 @@ std::string toString(DataLayout l) { case kernel_selector::DataLayout::bs_fs_yx_bsv16_fsv16: return "BS_FS_YX_BSV16_FSV16"; case kernel_selector::DataLayout::bs_fs_zyx_bsv16_fsv16: return "BS_FS_ZYX_BSV16_FSV16"; case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv4: return "BS_FS_YX_BSV4_FSV4"; + case kernel_selector::DataLayout::bs_fs_yx_bsv8_fsv4: return "BS_FS_YX_BSV8_FSV4"; case kernel_selector::DataLayout::bs_fs_yx_bsv4_fsv2: return "BS_FS_YX_BSV4_FSV2"; case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv32: return "BS_FS_YX_BSV32_FSV32"; case kernel_selector::DataLayout::bs_fs_yx_bsv32_fsv16: return "BS_FS_YX_BSV32_FSV16"; diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp index a0c8a0874a1..69d79e22315 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/convolution.cpp @@ -225,6 +225,11 @@ attach_convolution_impl::attach_convolution_impl() { std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp index 3e8c233e126..b15c473fb89 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp @@ -214,6 +214,13 @@ attach_eltwise_impl::attach_eltwise_impl() { std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp index 2367674d762..c9e337a6466 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp @@ -119,6 +119,11 @@ attach_concatenation_onednn::attach_concatenation_onednn() { std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), }); } diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp index c10ea0d5b5d..54e0328fdc9 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/convolution_onednn.cpp @@ -256,6 +256,11 @@ attach_convolution_onednn::attach_convolution_onednn() { std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp index bce13ce1698..6b65c181acd 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/deconvolution_onednn.cpp @@ -199,6 +199,11 @@ attach_deconvolution_onednn::attach_deconvolution_onednn() { std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4), std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4), + std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2), std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2), diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp index a9fd1206e43..72e2effc0e1 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp @@ -91,6 +91,7 @@ dnnl::memory::format_tag convert_data_format(cldnn::format fmt) { case cldnn::format::bs_fs_yx_bsv16_fsv16: return dnnl::memory::format_tag::NChw16n16c; case cldnn::format::bs_fs_yx_bsv32_fsv32: return dnnl::memory::format_tag::NChw32n32c; case cldnn::format::bs_fs_yx_bsv4_fsv4: return dnnl::memory::format_tag::ABcd4a4b; + case cldnn::format::bs_fs_yx_bsv8_fsv4: return dnnl::memory::format_tag::ABcd8a4b; case cldnn::format::bs_fs_yx_bsv4_fsv2: return dnnl::memory::format_tag::ABcd4a2b; case cldnn::format::bs_fs_yx_bsv32_fsv16: return dnnl::memory::format_tag::NChw32n16c; case cldnn::format::bs_fs_zyx_bsv16_fsv16: return dnnl::memory::format_tag::NCdhw16n16c; diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h index 66975629a08..801895c275c 100644 --- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h +++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h @@ -97,6 +97,8 @@ inline std::string fmt_to_str(format fmt) { return "bs_fs_yx_bsv4_fsv2"; case format::bs_fs_yx_bsv4_fsv4: return "bs_fs_yx_bsv4_fsv4"; + case format::bs_fs_yx_bsv8_fsv4: + return "bs_fs_yx_bsv8_fsv4"; case format::bs_fs_yx_bsv32_fsv32: return "bs_fs_yx_bsv32_fsv32"; case format::b_fs_zyx_fsv16: diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp index ac577c70f22..540e84a81ea 100644 --- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp +++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp @@ -136,6 +136,8 @@ kernel_selector::data_layout to_data_layout(format f) { return kernel_selector::data_layout::bs_fs_yx_bsv32_fsv16; case format::bs_fs_yx_bsv4_fsv4: return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4; + case format::bs_fs_yx_bsv8_fsv4: + return kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4; case format::bs_fs_yx_bsv4_fsv2: return kernel_selector::data_layout::bs_fs_yx_bsv4_fsv2; case format::bs_fs_yx_bsv32_fsv32: @@ -193,6 +195,8 @@ cldnn::format from_data_layout(kernel_selector::data_layout l) { return cldnn::format::bs_fs_yx_bsv4_fsv2; case kernel_selector::data_layout::bs_fs_yx_bsv4_fsv4: return cldnn::format::bs_fs_yx_bsv4_fsv4; + case kernel_selector::data_layout::bs_fs_yx_bsv8_fsv4: + return cldnn::format::bs_fs_yx_bsv8_fsv4; case kernel_selector::data_layout::bs_fs_yx_bsv32_fsv32: return cldnn::format::bs_fs_yx_bsv32_fsv32; case kernel_selector::data_layout::nv12: diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index 6156ef8e8eb..1c4518b1654 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -284,10 +284,11 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, return true; if (next.is_type() && - (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4) && + (fmt_prev == format::b_fs_yx_fsv4 || fmt_prev == format::bs_fs_yx_bsv4_fsv4 || fmt_prev == format::bs_fs_yx_bsv8_fsv4) && ((fmt_next == format::b_fs_yx_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || (fmt_next == format::bs_fs_yx_bsv32_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || (fmt_next == format::bs_fs_yx_bsv4_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || + (fmt_next == format::bs_fs_yx_bsv8_fsv4 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) || (fmt_next == format::b_fs_yx_fsv16 && next_output_layout.size.feature[0] >= 16 && (prev_output_layout.size.feature[0] == 3 || (prev_output_layout.size.feature[0] == 4 && (prev_dt == data_types::u8 || prev_dt == data_types::i8)))))) return true; @@ -1269,6 +1270,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv4_fsv4, + format::bs_fs_yx_bsv8_fsv4, format::bs_fs_yx_bsv4_fsv2, format::bs_fs_zyx_bsv4_fsv4, format::bs_fs_zyx_bsv4_fsv2, @@ -1463,7 +1465,7 @@ format layout_optimizer::get_preferred_format(program_node& node) { if (data_type_traits::is_floating_point(conv.get_output_layout().data_type) || ws.spatial[0] != 7 || conv.get_primitive()->groups > 1) expected = format::bfyx; else - expected = format::bs_fs_yx_bsv4_fsv4; + expected = format::bs_fs_yx_bsv8_fsv4; auto conv_output_layout = conv.get_output_layout(); auto weights_layout = conv.weights(0).get_output_layout(); diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp index 09e3fbf6c99..bddd611cf8a 100644 --- a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp @@ -139,30 +139,25 @@ std::pair program_helpers::are_layouts_identical(layout const& l1, l return {false, false}; if (l1.get_linear_size() != l2.get_linear_size()) return {false, false}; - if ((l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) || - (l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4) || - (l1.format == format::fs_b_yx_fsv32 && l2.format != format::fs_b_yx_fsv32) || - (l2.format == format::fs_b_yx_fsv32 && l1.format != format::fs_b_yx_fsv32) || - (l1.format == format::b_fs_yx_fsv16 && l2.format != format::b_fs_yx_fsv16) || - (l2.format == format::b_fs_yx_fsv16 && l1.format != format::b_fs_yx_fsv16) || - (l1.format == format::b_fs_yx_fsv32 && l2.format != format::b_fs_yx_fsv32) || - (l2.format == format::b_fs_yx_fsv32 && l1.format != format::b_fs_yx_fsv32) || - (l1.format == format::b_fs_zyx_fsv32 && l2.format != format::b_fs_zyx_fsv32) || - (l2.format == format::b_fs_zyx_fsv32 && l1.format != format::b_fs_zyx_fsv32) || - (l1.format == format::b_fs_zyx_fsv16 && l2.format != format::b_fs_zyx_fsv16) || - (l2.format == format::b_fs_zyx_fsv16 && l1.format != format::b_fs_zyx_fsv16) || - (l1.format == format::bs_fs_yx_bsv4_fsv4 && l2.format != format::bs_fs_yx_bsv4_fsv4) || - (l2.format == format::bs_fs_yx_bsv4_fsv4 && l1.format != format::bs_fs_yx_bsv4_fsv4) || - (l1.format == format::bs_fs_yx_bsv4_fsv2 && l2.format != format::bs_fs_yx_bsv4_fsv2) || - (l2.format == format::bs_fs_yx_bsv4_fsv2 && l1.format != format::bs_fs_yx_bsv4_fsv2) || - (l1.format == format::bs_fs_yx_bsv32_fsv16 && l2.format != format::bs_fs_yx_bsv32_fsv16) || - (l2.format == format::bs_fs_yx_bsv32_fsv16 && l1.format != format::bs_fs_yx_bsv32_fsv16) || - (l1.format == format::bs_fs_yx_bsv32_fsv32 && l2.format != format::bs_fs_yx_bsv32_fsv32) || - (l2.format == format::bs_fs_yx_bsv32_fsv32 && l1.format != format::bs_fs_yx_bsv32_fsv32) || - (l1.format == format::bs_fs_yx_bsv16_fsv16 && l2.format != format::bs_fs_yx_bsv16_fsv16) || - (l2.format == format::bs_fs_yx_bsv16_fsv16 && l1.format != format::bs_fs_yx_bsv16_fsv16) || - (l1.format == format::bs_fs_zyx_bsv16_fsv16 && l2.format != format::bs_fs_zyx_bsv16_fsv16) || - (l2.format == format::bs_fs_zyx_bsv16_fsv16 && l1.format != format::bs_fs_zyx_bsv16_fsv16)) + + auto check_format = [&l1, &l2](cldnn::format format) { + return (l1.format == format && l2.format != format) || + (l2.format == format && l1.format != format); + }; + + if (check_format(format::b_fs_yx_fsv4) || + check_format(format::fs_b_yx_fsv32) || + check_format(format::b_fs_yx_fsv16) || + check_format(format::b_fs_yx_fsv32) || + check_format(format::b_fs_zyx_fsv32) || + check_format(format::b_fs_zyx_fsv16) || + check_format(format::bs_fs_yx_bsv4_fsv4) || + check_format(format::bs_fs_yx_bsv8_fsv4) || + check_format(format::bs_fs_yx_bsv4_fsv2) || + check_format(format::bs_fs_yx_bsv32_fsv16) || + check_format(format::bs_fs_yx_bsv32_fsv32) || + check_format(format::bs_fs_yx_bsv16_fsv16) || + check_format(format::bs_fs_zyx_bsv16_fsv16)) return {false, false}; auto l1_pitch = l1.get_pitches(); From 9aedece39896dd3a40f651f7623e278240f322e0 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Wed, 15 Dec 2021 14:32:43 +0300 Subject: [PATCH 04/27] Fixed cpu tests location (#9224) --- .../cpu/shape_inference_test/gather_elements_shape_inference.cpp | 0 .../unit/cpu/shape_inference_test/gather_shape_inference.cpp | 0 .../unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp | 0 .../unit/cpu/shape_inference_test/one_hot_shape_inference.cpp | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp (100%) rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp (100%) rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp (100%) rename {inference-engine => src}/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp (100%) diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp similarity index 100% rename from inference-engine/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp rename to src/tests/unit/cpu/shape_inference_test/gather_elements_shape_inference.cpp diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp similarity index 100% rename from inference-engine/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp rename to src/tests/unit/cpu/shape_inference_test/gather_shape_inference.cpp diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp similarity index 100% rename from inference-engine/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp rename to src/tests/unit/cpu/shape_inference_test/gather_tree_shape_inference.cpp diff --git a/inference-engine/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp b/src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp similarity index 100% rename from inference-engine/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp rename to src/tests/unit/cpu/shape_inference_test/one_hot_shape_inference.cpp From aa457268d44242142b3461d75092a4a07c2c76d6 Mon Sep 17 00:00:00 2001 From: Vladimir Dudnik Date: Wed, 15 Dec 2021 17:58:06 +0300 Subject: [PATCH 05/27] [IE Samples] make coverity happy (#9203) * make coverity happy * apply code style --- .../c/common/opencv_c_wrapper/bmp_reader.c | 103 +++++++++++++++--- 1 file changed, 86 insertions(+), 17 deletions(-) diff --git a/samples/c/common/opencv_c_wrapper/bmp_reader.c b/samples/c/common/opencv_c_wrapper/bmp_reader.c index e4362277f67..05ec4861901 100644 --- a/samples/c/common/opencv_c_wrapper/bmp_reader.c +++ b/samples/c/common/opencv_c_wrapper/bmp_reader.c @@ -1,31 +1,72 @@ #include "bmp_reader.h" +#include #include #include int readBmpImage(const char* fileName, BitMap* image) { - FILE* input = fopen(fileName, "rb"); + size_t cnt; + int status = 0; + FILE* input = 0; - if (input == NULL) { - printf("[BMP] file %s is not opened\n", fileName); - return 1; + if (NULL == fileName || NULL == image) { + printf("[BMP] bad arguments\n"); + status = -1; + goto Exit; } - fread(&image->header.type, 2, 1, input); + memset(image, 0, sizeof(BitMap)); + + input = fopen(fileName, "rb"); + if (input == NULL) { + printf("[BMP] file %s is not opened\n", fileName); + status = 1; + goto Exit; + } + + cnt = fread(&image->header.type, sizeof(image->header.type), sizeof(unsigned char), input); + if (cnt != sizeof(image->header.type)) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } if (image->header.type != 'M' * 256 + 'B') { printf("[BMP] file is not bmp type\n"); - return 2; + status = 2; + goto Exit; } - fread(&image->header.size, 4, 1, input); - fread(&image->header.reserved, 4, 1, input); - fread(&image->header.offset, 4, 1, input); + cnt = fread(&image->header.size, sizeof(image->header.size), sizeof(unsigned char), input); + if (cnt != sizeof(image->header.size)) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } - fread(&image->infoHeader, sizeof(BmpInfoHeader), 1, input); + cnt = fread(&image->header.reserved, sizeof(image->header.reserved), sizeof(unsigned char), input); + if (cnt != sizeof(image->header.reserved)) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } + + cnt = fread(&image->header.offset, sizeof(image->header.offset), sizeof(unsigned char), input); + if (cnt != sizeof(image->header.offset)) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } + + cnt = fread(&image->infoHeader, sizeof(BmpInfoHeader), sizeof(unsigned char), input); + if (cnt != sizeof(image->header.offset)) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } image->width = image->infoHeader.width; - image->height = image->infoHeader.height; + image->height = abs(image->infoHeader.height); if (image->infoHeader.bits != 24) { printf("[BMP] 24bpp only supported. But input has: %d\n", image->infoHeader.bits); @@ -38,21 +79,49 @@ int readBmpImage(const char* fileName, BitMap* image) { } int padSize = image->width & 3; + size_t row_size = (size_t)image->width * 3; char pad[3]; - size_t size = image->width * image->height * 3; + size_t size = row_size * image->height; image->data = malloc(sizeof(char) * size); + if (NULL == image->data) { + printf("[BMP] memory allocation failed\n"); + return 5; + } - fseek(input, image->header.offset, 0); + if (0 != fseek(input, image->header.offset, SEEK_SET)) { + printf("[BMP] file seek error\n"); + status = 2; + goto Exit; + } // reading by rows in invert vertically int i; for (i = 0; i < image->height; i++) { unsigned int storeAt = image->infoHeader.height < 0 ? i : (unsigned int)image->height - 1 - i; - fread(image->data + image->width * 3 * storeAt, image->width * 3, 1, input); - fread(pad, padSize, 1, input); + cnt = fread(image->data + row_size * storeAt, row_size, sizeof(unsigned char), input); + if (cnt != row_size) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } + + cnt = fread(pad, padSize, sizeof(unsigned char), input); + if (cnt != padSize) { + printf("[BMP] file read error\n"); + status = 2; + goto Exit; + } } - fclose(input); - return 0; +Exit: + if (0 != status && NULL != image && NULL != image->data) { + free(image->data); + } + + if (NULL != input) { + fclose(input); + } + + return status; } From e6d08aef5b871d4370b5a294e792a1ee43e4695e Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Wed, 15 Dec 2021 21:21:46 +0300 Subject: [PATCH 06/27] Don't use EXCLUDE_FROM_ALL with samples targets (#9237) --- docs/template_plugin/backend/CMakeLists.txt | 15 ++++----------- docs/template_plugin/src/CMakeLists.txt | 1 - samples/c/common/opencv_c_wrapper/CMakeLists.txt | 2 +- samples/cpp/common/format_reader/CMakeLists.txt | 2 +- src/core/tests/frontend/shared/CMakeLists.txt | 2 +- src/tests/unit/vpu/CMakeLists.txt | 1 - 6 files changed, 7 insertions(+), 16 deletions(-) diff --git a/docs/template_plugin/backend/CMakeLists.txt b/docs/template_plugin/backend/CMakeLists.txt index e075a555099..df959ed86e5 100644 --- a/docs/template_plugin/backend/CMakeLists.txt +++ b/docs/template_plugin/backend/CMakeLists.txt @@ -38,17 +38,10 @@ target_include_directories(interpreter_backend PUBLIC $ Date: Thu, 16 Dec 2021 03:42:24 +0900 Subject: [PATCH 07/27] Create TopK-3 (#9106) --- .../tests/functional/op_reference/topk.cpp | 481 ++++++++++++------ 1 file changed, 321 insertions(+), 160 deletions(-) diff --git a/docs/template_plugin/tests/functional/op_reference/topk.cpp b/docs/template_plugin/tests/functional/op_reference/topk.cpp index 40907722f27..f571096dabb 100644 --- a/docs/template_plugin/tests/functional/op_reference/topk.cpp +++ b/docs/template_plugin/tests/functional/op_reference/topk.cpp @@ -4,8 +4,8 @@ #include -#include "openvino/op/topk.hpp" -#include "openvino/op/constant.hpp" +#include "openvino/opsets/opset3.hpp" +#include "openvino/opsets/opset1.hpp" #include "base_reference_test.hpp" using namespace reference_tests; @@ -15,7 +15,7 @@ namespace { struct TopKParams { TopKParams( const Tensor& A, const Tensor& k, const int64_t axis, - const op::v1::TopK::Mode mode, const op::v1::TopK::SortType sort, + const opset1::TopK::Mode mode, const opset1::TopK::SortType sort, const Tensor& result0, const Tensor& result1, const size_t outIdx, const std::string& testcaseName = "") : A(A), k(k), axis(axis), mode(mode), sort(sort), @@ -25,8 +25,8 @@ struct TopKParams { Tensor A; Tensor k; int64_t axis; - op::v1::TopK::Mode mode; - op::v1::TopK::SortType sort; + opset1::TopK::Mode mode; + opset1::TopK::SortType sort; Tensor result0; Tensor result1; size_t outIdx; @@ -71,7 +71,6 @@ struct TopKParamsResnet50 { std::string testcaseName; }; - class ReferenceTopKTestResnet50 : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { @@ -101,18 +100,18 @@ public: private: static std::shared_ptr CreateFunction(const TopKParamsResnet50& params) { - const auto A = std::make_shared(params.A.type, + const auto A = std::make_shared(params.A.type, params.A.shape); - const auto B = std::make_shared(A, - op::v0::Constant::create(element::i64, {}, {5}), + const auto B = std::make_shared(A, + opset1::Constant::create(element::i64, {}, {5}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES); - const auto C = std::make_shared(A, - op::v0::Constant::create(element::i64, {}, {1}), + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES); + const auto C = std::make_shared(A, + opset1::Constant::create(element::i64, {}, {1}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES); + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES); const auto out5_value = B->output(0); const auto out5_index = B->output(1); @@ -220,12 +219,12 @@ public: private: static std::shared_ptr CreateFunction(const TopKParams& params) { - const auto A = std::make_shared(params.A.type, + const auto A = std::make_shared(params.A.type, params.A.shape); - const auto k = op::v0::Constant::create(params.k.type, + const auto k = opset1::Constant::create(params.k.type, params.k.shape, params.k.data.data()); - const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); const auto f = std::make_shared(B->outputs(), ParameterVector{A}); return f; } @@ -253,8 +252,8 @@ std::vector generateParamsMaxMinSort() { }({128, 1000})), Tensor(ET2, {}, std::vector{5}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::NONE, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::NONE, Tensor(ET, {128, 5}, [](std::vector rshape, std::vector shape) -> std::vector{ std::vector expected_value; for (size_t i = 0; i < rshape[0]; i++) { @@ -292,8 +291,8 @@ std::vector generateParamsMaxMinSort() { }({128, 1000})), Tensor(ET2, {}, std::vector{5}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::NONE, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::NONE, Tensor(ET, {128, 5}, [](std::vector rshape) -> std::vector{ std::vector expected_value; for (size_t i = 0; i < rshape[0]; i++) { @@ -331,8 +330,8 @@ std::vector generateParamsMaxMinSort() { }({128, 1000})), Tensor(ET2, {}, std::vector{5}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {128, 5}, [](std::vector rshape, std::vector shape) -> std::vector{ std::vector expected_value; for (size_t i = 0; i < rshape[0]; i++) { @@ -366,8 +365,8 @@ std::vector generateParamsMaxMinSort() { }({128, 1000})), Tensor(ET2, {}, std::vector{5}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {128, 5}, [](std::vector rshape) -> std::vector{ std::vector expected_value; for (size_t i = 0; i < rshape[0]; i++) { @@ -401,8 +400,8 @@ std::vector generateParamsMaxMinSort() { }({128, 1000})), Tensor(ET2, {}, std::vector{5}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_INDICES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_INDICES, Tensor(ET, {128, 5}, [](std::vector rshape, std::vector shape) -> std::vector{ std::vector expected_value; for (size_t i = 0; i < rshape[0]; i++) { @@ -440,8 +439,8 @@ std::vector generateParamsMaxMinSort() { }({128, 1000})), Tensor(ET2, {}, std::vector{5}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_INDICES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_INDICES, Tensor(ET, {128, 5}, [](std::vector rshape) -> std::vector{ std::vector expected_value; for (size_t i = 0; i < rshape[0]; i++) { @@ -467,8 +466,8 @@ std::vector generateParamsMaxMinSort() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{5, 4, 3}), Tensor(ET_OUT, {3}, std::vector{3, 4, 0}), 0, @@ -478,8 +477,8 @@ std::vector generateParamsMaxMinSort() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_INDICES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_INDICES, Tensor(ET, {3}, std::vector{3, 5, 4}), Tensor(ET_OUT, {3}, std::vector{0, 3, 4}), 0, @@ -489,8 +488,8 @@ std::vector generateParamsMaxMinSort() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{1, 2, 3}), Tensor(ET_OUT, {3}, std::vector{1, 2, 0}), 0, @@ -500,8 +499,8 @@ std::vector generateParamsMaxMinSort() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_INDICES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_INDICES, Tensor(ET, {3}, std::vector{3, 1, 2}), Tensor(ET_OUT, {3}, std::vector{0, 1, 2}), 0, @@ -536,7 +535,7 @@ std::vector generateCombinedParamsMaxMinSort() { INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSort, testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTest::getTestCaseName); -class ReferenceTopKTestV3 : public ReferenceTopKTest { +class ReferenceTopKTestBackend : public ReferenceTopKTest { public: void SetUp() override { auto params = GetParam(); @@ -547,18 +546,18 @@ public: private: static std::shared_ptr CreateFunction(const TopKParams& params) { - const auto A = std::make_shared(params.A.type, + const auto A = std::make_shared(params.A.type, params.A.shape); - const auto k = op::v0::Constant::create(params.k.type, + const auto k = opset1::Constant::create(params.k.type, params.k.shape, params.k.data.data()); - const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); const auto f = std::make_shared(B->outputs(), ParameterVector{A}); return f; } }; -TEST_P(ReferenceTopKTestV3, CompareWithRefs) { +TEST_P(ReferenceTopKTestBackend, CompareWithRefs) { Exec(); } @@ -572,8 +571,8 @@ std::vector generateParamsV3() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{5, 4, 3}), Tensor(ET_OUT, {3}, std::vector{3, 4, 0}), 0, @@ -583,8 +582,8 @@ std::vector generateParamsV3() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_INDICES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_INDICES, Tensor(ET, {3}, std::vector{3, 5, 4}), Tensor(ET_OUT, {3}, std::vector{0, 3, 4}), 0, @@ -594,8 +593,8 @@ std::vector generateParamsV3() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{1, 2, 3}), Tensor(ET_OUT, {3}, std::vector{1, 2, 0}), 0, @@ -605,8 +604,8 @@ std::vector generateParamsV3() { Tensor(ET, {5}, std::vector{3, 1, 2, 5, 4}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_INDICES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_INDICES, Tensor(ET, {3}, std::vector{3, 1, 2}), Tensor(ET_OUT, {3}, std::vector{0, 1, 2}), 0, @@ -615,7 +614,7 @@ std::vector generateParamsV3() { return params; } -std::vector generateCombinedParamsV3() { +std::vector generateCombinedParamsBackend() { const std::vector> generatedParams { generateParamsMaxMinSort(), generateParamsMaxMinSort(), @@ -638,8 +637,8 @@ std::vector generateCombinedParamsV3() { return combinedParams; } -INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestV3, - testing::ValuesIn(generateCombinedParamsV3()), ReferenceTopKTest::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackend, + testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTest::getTestCaseName); class ReferenceTopKTest1dMaxMin : public ReferenceTopKTest { public: @@ -673,12 +672,12 @@ public: private: static std::shared_ptr CreateFunction(const TopKParams& params, size_t out_idx) { - const auto A = std::make_shared(params.A.type, + const auto A = std::make_shared(params.A.type, params.A.shape); - const auto k = op::v0::Constant::create(params.k.type, + const auto k = opset1::Constant::create(params.k.type, params.k.shape, params.k.data.data()); - const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); const auto f = std::make_shared(OutputVector{B->output(out_idx)}, ParameterVector{A}); return f; } @@ -698,8 +697,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET2, {}, std::vector{6}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET_OUT, {6}, std::vector{5, 4, 3, 2, 1, 0}), 0, @@ -709,8 +708,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET2, {}, std::vector{6}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET_OUT, {6}, std::vector{5, 4, 3, 2, 1, 0}), 1, @@ -720,8 +719,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{6, 5, 4}), Tensor(ET_OUT, {3}, std::vector{5, 4, 3}), 0, @@ -731,8 +730,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{6, 5, 4}), Tensor(ET_OUT, {3}, std::vector{5, 4, 3}), 1, @@ -742,8 +741,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {1}, std::vector{6}), Tensor(ET_OUT, {1}, std::vector{5}), 0, @@ -753,8 +752,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {1}, std::vector{6}), Tensor(ET_OUT, {1}, std::vector{5}), 1, @@ -764,8 +763,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET2, {}, std::vector{6}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET_OUT, {6}, std::vector{5, 4, 3, 2, 1, 0}), 0, @@ -775,8 +774,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET2, {}, std::vector{6}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {6}, std::vector{1, 2, 3, 4, 5, 6}), Tensor(ET_OUT, {6}, std::vector{5, 4, 3, 2, 1, 0}), 1, @@ -786,8 +785,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{1, 2, 3}), Tensor(ET_OUT, {3}, std::vector{5, 4, 3}), 0, @@ -797,8 +796,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET2, {}, std::vector{3}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {3}, std::vector{1, 2, 3}), Tensor(ET_OUT, {3}, std::vector{5, 4, 3}), 1, @@ -808,8 +807,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {1}, std::vector{1}), Tensor(ET_OUT, {1}, std::vector{5}), 0, @@ -819,8 +818,8 @@ std::vector generateParams1dMaxMin() { Tensor(ET, {6}, std::vector{6, 5, 4, 3, 2, 1}), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {1}, std::vector{1}), Tensor(ET_OUT, {1}, std::vector{5}), 1, @@ -832,8 +831,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{3}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3, 2}, std::vector{ 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1 }), @@ -849,8 +848,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{3}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3, 2}, std::vector{ 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1 }), @@ -882,8 +881,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 3, 2, 4}, std::vector{ 169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251, 187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222, @@ -923,8 +922,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 3, 2, 4}, std::vector{ 169, 241, 177, 249, 185, 233, 170, 242, 178, 250, 186, 258, 171, 243, 179, 251, 187, 259, 172, 224, 180, 252, 188, 260, 149, 221, 157, 229, 165, 113, 150, 222, @@ -948,8 +947,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 2}, std::vector{ 10, 12, 9, 4, 11, 7, 6, 3 }), @@ -965,8 +964,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 2}, std::vector{ 10, 12, 9, 4, 11, 7, 6, 3 }), @@ -982,8 +981,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 1, 2}, std::vector{ 10, 12, 11, 7 }), @@ -999,8 +998,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 1, 2}, std::vector{ 10, 12, 11, 7 }), @@ -1016,8 +1015,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{3}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3, 2}, std::vector{ 8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7 }), @@ -1033,8 +1032,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{3}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3, 2}, std::vector{ 8, 2, 10, 4, 12, 9, 5, 1, 6, 3, 11, 7 }), @@ -1050,8 +1049,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 2}, std::vector{ 8, 2, 10, 4, 5, 1, 6, 3 }), @@ -1067,8 +1066,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 2}, std::vector{ 8, 2, 10, 4, 5, 1, 6, 3 }), @@ -1084,8 +1083,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 1, 2}, std::vector{ 8, 2, 5, 1 }), @@ -1101,8 +1100,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 1, 2}, std::vector{ 8, 2, 5, 1 }), @@ -1118,8 +1117,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{4}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {4, 3}, std::vector{ 12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4 }), @@ -1135,8 +1134,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{4}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {4, 3}, std::vector{ 12, 11, 10, 9, 8, 7, 6, 2, 5, 3, 1, 4 }), @@ -1152,8 +1151,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3}, std::vector{ 12, 11, 10, 9, 8, 7 }), @@ -1169,8 +1168,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3}, std::vector{ 12, 11, 10, 9, 8, 7 }), @@ -1186,8 +1185,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {1, 3}, std::vector{ 12, 11, 10 }), @@ -1203,8 +1202,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {1, 3}, std::vector{ 12, 11, 10 }), @@ -1220,8 +1219,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 1}, std::vector{ 4, 3 }), @@ -1237,8 +1236,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 1}, std::vector{ 4, 3 }), @@ -1254,8 +1253,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{4}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {4, 3}, std::vector{ 3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10 }), @@ -1271,8 +1270,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{4}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {4, 3}, std::vector{ 3, 1, 4, 6, 2, 5, 9, 8, 7, 12, 11, 10 }), @@ -1288,8 +1287,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3}, std::vector{ 3, 1, 4, 6, 2, 5 }), @@ -1305,8 +1304,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{2}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3}, std::vector{ 3, 1, 4, 6, 2, 5 }), @@ -1322,8 +1321,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::NONE, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::NONE, Tensor(ET, {1, 3}, std::vector{ 3, 1, 4 }), @@ -1339,8 +1338,8 @@ std::vector generateParams1dMaxMin() { }), Tensor(ET2, {}, std::vector{1}), 0, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::NONE, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::NONE, Tensor(ET, {1, 3}, std::vector{ 3, 1, 4 }), @@ -1380,12 +1379,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxM class ReferenceTopKTestInt64 : public ReferenceTopKTest1dMaxMin { private: static std::shared_ptr CreateFunction(const TopKParams& params, size_t out_idx) { - const auto A = std::make_shared(params.A.type, + const auto A = std::make_shared(params.A.type, params.A.shape); - const auto k = op::v0::Constant::create(params.k.type, + const auto k = opset1::Constant::create(params.k.type, params.k.shape, params.k.data.data()); - const auto B = std::make_shared(A, + const auto B = std::make_shared(A, k, params.axis, params.mode, @@ -1412,8 +1411,8 @@ std::vector generateParamsInt64() { }), Tensor(ET2, {}, std::vector{3}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3, 2}, std::vector{ 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1 }), @@ -1428,8 +1427,8 @@ std::vector generateParamsInt64() { }), Tensor(ET2, {}, std::vector{3}), 1, - op::v1::TopK::Mode::MAX, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 3, 2}, std::vector{ 10, 12, 9, 4, 8, 2, 11, 7, 6, 3, 5, 1 }), @@ -1468,12 +1467,12 @@ public: private: static std::shared_ptr CreateFunction(const TopKParams& params) { - const auto A = std::make_shared(params.A.type, + const auto A = std::make_shared(params.A.type, params.A.shape); - const auto k = op::v0::Constant::create(params.k.type, + const auto k = opset1::Constant::create(params.k.type, params.k.shape, params.k.data.data()); - const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); const auto f = std::make_shared(OutputVector{B->output(1)}, ParameterVector{A}); return f; } @@ -1493,8 +1492,8 @@ std::vector generateParamsSingleOutput() { Tensor(ET, {2, 3, 2}, std::vector{12, 2, 10, 9, 8, 4, 6, 1, 5, 3, 11, 7}), Tensor(ET2, {}, std::vector{2}), 1, - op::v1::TopK::Mode::MIN, - op::v1::TopK::SortType::SORT_VALUES, + opset1::TopK::Mode::MIN, + opset1::TopK::SortType::SORT_VALUES, Tensor(ET, {2, 2, 2}, std::vector{}), Tensor(ET_OUT, {2, 2, 2}, std::vector{2, 0, 1, 2, 1, 0, 0, 1}), 0, @@ -1530,19 +1529,181 @@ INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingle testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTest::getTestCaseName); TEST(ReferenceTopKTestInvalid, topk_v1_invalid_strings) { - const auto data = std::make_shared(element::f32, Shape{1, 2, 3}); - const auto k = op::v0::Constant::create(element::i64, Shape{}, {1}); - EXPECT_THROW(op::v1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure); - EXPECT_THROW(op::v1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure); + const auto data = std::make_shared(element::f32, Shape{1, 2, 3}); + const auto k = opset1::Constant::create(element::i64, Shape{}, {1}); + EXPECT_THROW(opset1::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure); + EXPECT_THROW(opset1::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure); } TEST(ReferenceTopKTestInvalid, topk_v1_invalid_k) { - const auto data = std::make_shared(element::f32, Shape{1, 2, 3}); - const auto k_non_scalar = op::v0::Constant::create(element::i64, Shape{2}, {1, 2}); - EXPECT_THROW(op::v1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure); - const auto k_float = op::v0::Constant::create(element::f32, Shape{}, {1.0f}); - EXPECT_THROW(op::v1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure); - const auto k_negative = op::v0::Constant::create(element::i8, Shape{}, {-1}); - EXPECT_THROW(op::v1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure); + const auto data = std::make_shared(element::f32, Shape{1, 2, 3}); + const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2}); + EXPECT_THROW(opset1::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure); + const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f}); + EXPECT_THROW(opset1::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure); + const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1}); + EXPECT_THROW(opset1::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure); +} + + + + +class ReferenceTopKTestResnet50V3 : public ReferenceTopKTestResnet50 { +private: + static std::shared_ptr CreateFunction(const TopKParamsResnet50& params) { + const auto A = std::make_shared(params.A.type, + params.A.shape); + const auto B = std::make_shared(A, + opset1::Constant::create(element::i64, {}, {5}), + 1, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES); + const auto C = std::make_shared(A, + opset1::Constant::create(element::i64, {}, {1}), + 1, + opset1::TopK::Mode::MAX, + opset1::TopK::SortType::SORT_VALUES); + + const auto out5_value = B->output(0); + const auto out5_index = B->output(1); + const auto out1_value = C->output(0); + const auto out1_index = C->output(1); + const auto f = std::make_shared(OutputVector{out5_value, out5_index, out1_value, out1_index}, ParameterVector{A}); + return f; + } +}; + +TEST_P(ReferenceTopKTestResnet50V3, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestResnet50V3, + testing::ValuesIn(generateCombinedParamsResnet50()), ReferenceTopKTestResnet50V3::getTestCaseName); + +class ReferenceTopKTestMaxMinSortV3 : public ReferenceTopKTestMaxMinSort { +private: + static std::shared_ptr CreateFunction(const TopKParams& params) { + const auto A = std::make_shared(params.A.type, + params.A.shape); + const auto k = opset1::Constant::create(params.k.type, + params.k.shape, + params.k.data.data()); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto f = std::make_shared(B->outputs(), ParameterVector{A}); + return f; + } +}; + +TEST_P(ReferenceTopKTestMaxMinSortV3, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestMaxMinSortV3, + testing::ValuesIn(generateCombinedParamsMaxMinSort()), ReferenceTopKTestMaxMinSortV3::getTestCaseName); + +class ReferenceTopKTestBackendV3 : public ReferenceTopKTestBackend { +private: + static std::shared_ptr CreateFunction(const TopKParams& params) { + const auto A = std::make_shared(params.A.type, + params.A.shape); + const auto k = opset1::Constant::create(params.k.type, + params.k.shape, + params.k.data.data()); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto f = std::make_shared(B->outputs(), ParameterVector{A}); + return f; + } +}; + +TEST_P(ReferenceTopKTestBackendV3, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestBackendV3, + testing::ValuesIn(generateCombinedParamsBackend()), ReferenceTopKTestBackendV3::getTestCaseName); + +class ReferenceTopKTest1dMaxMinV3 : public ReferenceTopKTest1dMaxMin { +private: + static std::shared_ptr CreateFunction(const TopKParams& params, size_t out_idx) { + const auto A = std::make_shared(params.A.type, + params.A.shape); + const auto k = opset1::Constant::create(params.k.type, + params.k.shape, + params.k.data.data()); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto f = std::make_shared(OutputVector{B->output(out_idx)}, ParameterVector{A}); + return f; + } +}; + +TEST_P(ReferenceTopKTest1dMaxMinV3, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTest1dMaxMinV3, + testing::ValuesIn(generateCombinedParams1dMaxMin()), ReferenceTopKTest1dMaxMinV3::getTestCaseName); + +class ReferenceTopKTestInt64V3 : public ReferenceTopKTestInt64 { +private: + static std::shared_ptr CreateFunction(const TopKParams& params, size_t out_idx) { + const auto A = std::make_shared(params.A.type, + params.A.shape); + const auto k = opset1::Constant::create(params.k.type, + params.k.shape, + params.k.data.data()); + const auto B = std::make_shared(A, + k, + params.axis, + params.mode, + params.sort, + element::i64); + const auto f = std::make_shared(OutputVector{B->output(out_idx)}, ParameterVector{A}); + return f; + } +}; + +TEST_P(ReferenceTopKTestInt64V3, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestInt64V3, + testing::ValuesIn(generateCombinedParamsInt64()), ReferenceTopKTestInt64V3::getTestCaseName); + +class ReferenceTopKTestSingleOutputV3 : public ReferenceTopKTestSingleOutput { +private: + static std::shared_ptr CreateFunction(const TopKParams& params) { + const auto A = std::make_shared(params.A.type, + params.A.shape); + const auto k = opset1::Constant::create(params.k.type, + params.k.shape, + params.k.data.data()); + const auto B = std::make_shared(A, k, params.axis, params.mode, params.sort); + const auto f = std::make_shared(OutputVector{B->output(1)}, ParameterVector{A}); + return f; + } +}; + +TEST_P(ReferenceTopKTestSingleOutputV3, CompareWithRefs) { + Exec(); +} + +INSTANTIATE_TEST_SUITE_P(smoke_TopK_With_Hardcoded_Refs, ReferenceTopKTestSingleOutputV3, + testing::ValuesIn(generateCombinedParamsSingleOutput()), ReferenceTopKTestSingleOutputV3::getTestCaseName); + +TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_strings) { + const auto data = std::make_shared(element::f32, Shape{1, 2, 3}); + const auto k = opset1::Constant::create(element::i64, Shape{}, {1}); + EXPECT_THROW(opset3::TopK(data, k, 0, "max", "invalid_mode"), ngraph::CheckFailure); + EXPECT_THROW(opset3::TopK(data, k, 0, "invalid_sort", "index"), ngraph::CheckFailure); +} + +TEST(ReferenceTopKTestInvalidV3, topk_v3_invalid_k) { + const auto data = std::make_shared(element::f32, Shape{1, 2, 3}); + const auto k_non_scalar = opset1::Constant::create(element::i64, Shape{2}, {1, 2}); + EXPECT_THROW(opset3::TopK(data, k_non_scalar, 0, "max", "index"), ngraph::NodeValidationFailure); + const auto k_float = opset1::Constant::create(element::f32, Shape{}, {1.0f}); + EXPECT_THROW(opset3::TopK(data, k_float, 0, "max", "index"), ngraph::NodeValidationFailure); + const auto k_negative = opset1::Constant::create(element::i8, Shape{}, {-1}); + EXPECT_THROW(opset3::TopK(data, k_negative, 0, "max", "index"), ngraph::NodeValidationFailure); } } // namespace From 93698483b5b023fd2d54ac16f0eab5dd6a94b637 Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Thu, 16 Dec 2021 03:45:18 +0900 Subject: [PATCH 08/27] Migrate ExtractImagePatches-3 (#8981) --- .../op_reference/extract_image_patches.cpp | 246 ++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100644 docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp diff --git a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp new file mode 100644 index 00000000000..95f5571cc41 --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp @@ -0,0 +1,246 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/opsets/opset3.hpp" +#include "openvino/opsets/opset1.hpp" +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ov; + +namespace { +struct ExtractImagePatchesParams { + Tensor data; + Shape sizes; + Strides strides; + Shape rates; + op::PadType autoPad; + Tensor expectedResult; + std::string testcaseName; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, data); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, sizes); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, strides); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, rates); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, autoPad); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName); +}; + +class ReferenceExtractImagePatchesTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params); + inputData = {params.data.data}; + refOutData = {params.expectedResult.data}; + } + + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "dType=" << param.data.type; + result << "_dShape=" << param.data.shape; + result << "_sizes=" << param.sizes; + result << "_strides=" << param.strides; + result << "_rates=" << param.rates; + result << "_autoPad=" << param.autoPad; + result << "_eType=" << param.expectedResult.type; + result << "_eShape=" << param.expectedResult.shape; + if (param.testcaseName != "") { + result << "_=" << param.testcaseName; + } + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const ExtractImagePatchesParams& params) { + const auto data = std::make_shared(params.data.type, params.data.shape); + const auto extrace_image_patches = std::make_shared(data, + params.sizes, + params.strides, + params.rates, + params.autoPad); + const auto f = std::make_shared(extrace_image_patches, ParameterVector{data}); + return f; + } +}; + +TEST_P(ReferenceExtractImagePatchesTest, CompareWithRefs) { + Exec(); +} + +template +std::vector generateParams() { + using T = typename element_type_traits::value_type; + std::vector params { + Builder {} + .data({ET, {1, 1, 10, 10}, std::vector{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}}) + .sizes({3, 3}) + .strides({5, 5}) + .rates({1, 1}) + .autoPad(op::PadType::VALID) + .expectedResult({ET, {1, 9, 2, 2}, std::vector{ + 1, 6, 51, 56, + 2, 7, 52, 57, + 3, 8, 53, 58, + 11, 16, 61, 66, + 12, 17, 62, 67, + 13, 18, 63, 68, + 21, 26, 71, 76, + 22, 27, 72, 77, + 23, 28, 73, 78}}), + + Builder {} + .data({ET, {1, 1, 10, 10}, std::vector{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}}) + .sizes({4, 4}) + .strides({8, 8}) + .rates({1, 1}) + .autoPad(op::PadType::VALID) + .expectedResult({ET, {1, 16, 1, 1}, std::vector{ + 1, 2, 3, 4, + 11, 12, 13, 14, + 21, 22, 23, 24, + 31, 32, 33, 34}}), + + Builder {} + .data({ET, {1, 1, 10, 10}, std::vector{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}}) + .sizes({4, 4}) + .strides({9, 9}) + .rates({1, 1}) + .autoPad(op::PadType::SAME_UPPER) + .expectedResult({ET, {1, 16, 2, 2}, std::vector{ + 0, 0, 0, 89, + 0, 0, 81, 90, + 0, 0, 82, 0, + 0, 0, 83, 0, + 0, 9, 0, 99, + 1, 10, 91, 100, + 2, 0, 92, 0, + 3, 0, 93, 0, + 0, 19, 0, 0, + 11, 20, 0, 0, + 12, 0, 0, 0, + 13, 0, 0, 0, + 0, 29, 0, 0, + 21, 30, 0, 0, + 22, 0, 0, 0, + 23, 0, 0, 0}}), + + Builder {} + .data({ET, {1, 1, 10, 10}, std::vector{ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 99, 100}}) + .sizes({3, 3}) + .strides({5, 5}) + .rates({2, 2}) + .autoPad(op::PadType::VALID) + .expectedResult({ET, {1, 9, 2, 2}, std::vector{ + 1, 6, 51, 56, + 3, 8, 53, 58, + 5, 10, 55, 60, + 21, 26, 71, 76, + 23, 28, 73, 78, + 25, 30, 75, 80, + 41, 46, 91, 96, + 43, 48, 93, 98, + 45, 50, 95, 100}}), + + Builder {} + .data({ET, {1, 2, 5, 5}, std::vector{ + 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, + 46, 47, 48, 49, 50}}) + .sizes({2, 2}) + .strides({3, 3}) + .rates({1, 1}) + .autoPad(op::PadType::VALID) + .expectedResult({ET, {1, 8, 2, 2}, std::vector{ + 1, 4, 16, 19, + 26, 29, 41, 44, + 2, 5, 17, 20, + 27, 30, 42, 45, + 6, 9, 21, 24, + 31, 34, 46, 49, + 7, 10, 22, 25, + 32, 35, 47, 50}}), + }; + return params; +} + +std::vector generateCombinedParams() { + const std::vector> generatedParams { + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + generateParams(), + }; + std::vector combinedParams; + + for (const auto& params : generatedParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest, + testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName); +} // namespace \ No newline at end of file From d9ecb108f115c5f84556768be42e6dd9db2b831b Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Thu, 16 Dec 2021 03:46:10 +0900 Subject: [PATCH 09/27] Create Einsum-7 (#8982) --- .../tests/functional/op_reference/einsum.cpp | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 docs/template_plugin/tests/functional/op_reference/einsum.cpp diff --git a/docs/template_plugin/tests/functional/op_reference/einsum.cpp b/docs/template_plugin/tests/functional/op_reference/einsum.cpp new file mode 100644 index 00000000000..31460fd2ccf --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/einsum.cpp @@ -0,0 +1,182 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "openvino/opsets/opset7.hpp" +#include "openvino/opsets/opset1.hpp" +#include "base_reference_test.hpp" + +using namespace reference_tests; +using namespace ov; + +namespace { +struct EinsumParams { + std::vector inputs; + std::string equation; + Tensor expectedResult; + std::string testcaseName; +}; + +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputs); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, equation); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, expectedResult); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName); +}; + +class ReferenceEinsumTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params); + for (const auto& input_tensor : params.inputs) { + inputData.push_back(input_tensor.data); + } + refOutData = {params.expectedResult.data}; + } + + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + std::ostringstream result; + result << "iType=" << param.inputs[0].type; + result << "_iShape=" << param.inputs[0].shape; + result << "_equation=" << param.equation; + result << "_eType=" << param.expectedResult.type; + result << "_eShape=" << param.expectedResult.shape; + if (param.testcaseName != "") { + result << "_=" << param.testcaseName; + } + return result.str(); + } + +private: + static std::shared_ptr CreateFunction(const EinsumParams& params) { + OutputVector output_vector; + ParameterVector param_vector; + for (const auto& input_tensor : params.inputs) { + auto param = std::make_shared(input_tensor.type, input_tensor.shape); + output_vector.push_back(param); + param_vector.push_back(param); + } + const auto einsum = std::make_shared(output_vector, params.equation); + const auto f = std::make_shared(OutputVector{einsum}, param_vector); + return f; + } +}; + +TEST_P(ReferenceEinsumTest, CompareWithRefs) { + Exec(); +} + +template +std::vector generateParams() { + using T = typename element_type_traits::value_type; + std::vector params { + Builder {} + .inputs({{ET, {1, 2}, std::vector{1, 2}}, + {ET, {3, 4}, std::vector{3, 4, 5, 6, + 7, 8, 9, 10, + 11, 12, 13, 14}}}) + .equation("ab,cd->abcd") + .expectedResult({ET, {1, 2, 3, 4}, std::vector{3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 6, 8, 10, 12, + 14, 16, 18, 20, 22, 24, 26, 28}}) + .testcaseName("einsum_no_reduction"), + Builder {} + .inputs({{ET, {1, 2, 3}, std::vector{1, 2, 3, 4, 5, 6}}}) + .equation("ijk->kij") + .expectedResult({ET, {3, 1, 2}, std::vector{1, 4, 2, 5, 3, 6}}) + .testcaseName("einsum_transpose"), + + Builder {} + .inputs({{ET, {2, 3}, std::vector{1, 2, 3, 4, 5, 6}}}) + .equation("ab->a") + .expectedResult({ET, {2}, std::vector{6, 15}}) + .testcaseName("einsum_reduce"), + + Builder {} + .inputs({{ET, {2, 3}, std::vector{1, 2, 3, 4, 5, 6}}, + {ET, {3, 2}, std::vector{1, 2, 3, 4, 5, 6}}}) + .equation("ab,bc->ac") + .expectedResult({ET, {2, 2}, std::vector{22, 28, 49, 64}}) + .testcaseName("einsum_matrix_multiplication"), + + Builder {} + .inputs({{ET, {2, 4}, std::vector{1, 3, 2, 7, 5, 6, 0, 1}}, + {ET, {4, 3, 1}, std::vector{1, 2, 3, 4, 5, 6, 5, 7, 3, 7, 9, 1}}, + {ET, {4, 3}, std::vector{4, 3, 1, 6, 4, 2, 2, 5, 3, 1, 9, 4}}}) + .equation("ab,bcd,bc->ca") + .expectedResult({ET, {3, 2}, std::vector{145, 171, 703, 231, 85, 91}}) + .testcaseName("einsum_multiple_multiplication"), + + Builder {} + .inputs({{ET, {2, 2, 3}, std::vector{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}}) + .equation("a...->...") + .expectedResult({ET, {2, 3}, std::vector{4, 8, 4, 8, 5, 13}}) + .testcaseName("einsum_ellipsis_one_input_reduction"), + + Builder {} + .inputs({{ET, {2, 2, 3}, std::vector{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}}) + .equation("a...->...a") + .expectedResult({ET, {2, 3, 2}, std::vector{1, 3, 3, 5, 2, 2, 7, 1, 5, 0, 6, 7}}) + .testcaseName("einsum_ellipsis_one_input_transpose"), + + Builder {} + .inputs({{ET, {2, 2, 3}, std::vector{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}, + {ET, {1}, std::vector{2}}}) + .equation("ab...,...->ab...") + .expectedResult({ET, {2, 2, 3}, std::vector{2, 6, 4, 14, 10, 12, 6, 10, 4, 2, 0, 14}}) + .testcaseName("einsum_ellipsis_mul_by_1dscalar"), + + Builder {} + .inputs({{ET, {1, 1, 4, 3}, std::vector{1, 3, 2, 7, 5, 6, 3, 5, 2, 1, 0, 7}}, + {ET, {3, 4, 2, 1}, std::vector{3, 1, 6, 2, 3, 10, 9, 8, 2, 9, 3, 2, + 4, 2, 3, 1, 9, 1, 11, 4, 7, 2, 3, 1}}}) + .equation("a...j,j...->a...") + .expectedResult({ET, {1, 4, 2, 4}, std::vector{27, 85, 37, 66, 30, 58, 50, 8, + 37, 123, 55, 83, 16, 48, 24, 30, + 29, 83, 43, 52, 20, 92, 44, 24, + 24, 96, 48, 30, 13, 67, 31, 15}}) + .testcaseName("einsum_ellipsis_complex_mul"), + + Builder {} + .inputs({{ET, {1, 3, 3}, std::vector{1, 2, 3, 4, 5, 6, 7, 8, 9}}}) + .equation("kii->ki") + .expectedResult({ET, {1, 3}, std::vector{1, 5, 9}}) + .testcaseName("einsum_diagonal"), + + Builder {} + .inputs({{ET, {2, 3, 3, 2, 4}, std::vector{4, 2, 5, 4, 5, 5, 1, 1, 3, 3, 1, 1, 2, 2, 4, 1, 3, 4, + 4, 5, 1, 3, 1, 3, 1, 4, 3, 5, 4, 4, 5, 4, 4, 5, 4, 2, + 2, 2, 3, 3, 1, 1, 4, 3, 4, 2, 2, 1, 1, 2, 3, 1, 1, 4, + 2, 3, 1, 3, 4, 2, 5, 5, 3, 4, 3, 4, 5, 4, 4, 5, 1, 3, + 4, 4, 5, 3, 1, 3, 2, 5, 3, 2, 5, 4, 4, 2, 4, 4, 1, 4, + 4, 5, 4, 4, 4, 2, 3, 3, 4, 2, 4, 2, 5, 1, 3, 2, 4, 3, + 5, 1, 2, 3, 1, 1, 2, 5, 1, 1, 2, 1, 4, 5, 3, 4, 1, 3, + 3, 1, 3, 2, 4, 5, 1, 1, 5, 4, 5, 2, 2, 3, 3, 1, 2, 4}}, + {ET, {3, 2, 1}, std::vector{1, 4, 4, 5, 3, 3}}}) + .equation("abbac,bad->ad") + .expectedResult({ET, {2, 1}, std::vector{123, 129}}) + .testcaseName("einsum_diagonal_with_matmul"), + }; + return params; +} + +std::vector generateCombinedParams() { + const std::vector> generatedParams { + generateParams(), + generateParams(), + }; + std::vector combinedParams; + + for (const auto& params : generatedParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest, + testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName); +} // namespace \ No newline at end of file From 38bbc30a29680f7690fc086c4b08d642927aa4ce Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Wed, 15 Dec 2021 21:24:35 +0100 Subject: [PATCH 10/27] [ONNX] Fix memleak caused by shared_ptr cyclic dependency (#9236) ONNXFrameworkNode had it own copy of shared_ptr so in convert phase, it can be used to produce real ngraph nodes (by graph->make_ng_nodes(..)). But Graph also keeps ONNXFrameworkNodes in its cache and in consequence its own shared_ptr, which is causing a dependency cycle. This change removes shared_ptr from ONNXFrameworkNode class and moves it to decoded function runtime info, so Graph is in a single place now and its lifetime ends when decoded function is destroyed. --- .../tests/test_frontend/test_frontend_onnx.py | 50 ++++++++++++++++--- src/core/tests/onnx/onnx_import.in.cpp | 6 +-- .../onnx/frontend/src/core/graph.cpp | 16 +++--- .../onnx/frontend/src/core/graph.hpp | 2 + .../onnx/frontend/src/onnx_framework_node.cpp | 6 ++- .../onnx/frontend/src/onnx_framework_node.hpp | 42 +++++++--------- .../onnx/frontend/src/utils/onnx_internal.cpp | 9 +++- 7 files changed, 89 insertions(+), 42 deletions(-) diff --git a/src/bindings/python/tests/test_frontend/test_frontend_onnx.py b/src/bindings/python/tests/test_frontend/test_frontend_onnx.py index 7c36999c9cd..f691e55bdb6 100644 --- a/src/bindings/python/tests/test_frontend/test_frontend_onnx.py +++ b/src/bindings/python/tests/test_frontend/test_frontend_onnx.py @@ -26,6 +26,32 @@ def create_onnx_model(): return make_model(graph, producer_name="ngraph ONNX Importer") +def create_onnx_model_with_subgraphs(): + A = onnx.helper.make_tensor_value_info("A", onnx.TensorProto.FLOAT, [3]) + B = onnx.helper.make_tensor_value_info("B", onnx.TensorProto.FLOAT, [3]) + add_out = onnx.helper.make_tensor_value_info("add_out", onnx.TensorProto.FLOAT, [3]) + sub_out = onnx.helper.make_tensor_value_info("sub_out", onnx.TensorProto.FLOAT, [3]) + + add = onnx.helper.make_node("Add", inputs=["A", "B"], outputs=["add_out"]) + sub = onnx.helper.make_node("Sub", inputs=["A", "B"], outputs=["sub_out"]) + + then_body = make_graph([add], "then_body", [], [add_out]) + else_body = make_graph([sub], "else_body", [], [sub_out]) + + if_node = onnx.helper.make_node( + "If", + inputs=["cond"], + outputs=["res"], + then_branch=then_body, + else_branch=else_body + ) + cond = onnx.helper.make_tensor_value_info("cond", onnx.TensorProto.BOOL, []) + res = onnx.helper.make_tensor_value_info("res", onnx.TensorProto.FLOAT, [3]) + + graph = make_graph([if_node], "graph", [cond, A, B], [res]) + return make_model(graph, producer_name="ngraph ONNX Importer") + + def run_function(function, *inputs, expected): runtime = get_runtime() computation = runtime.computation(function) @@ -37,15 +63,18 @@ def run_function(function, *inputs, expected): fem = FrontEndManager() onnx_model_filename = "model.onnx" +onnx_model_with_subgraphs_filename = "model_subgraphs.onnx" ONNX_FRONTEND_NAME = "onnx" def setup_module(): onnx.save_model(create_onnx_model(), onnx_model_filename) + onnx.save_model(create_onnx_model_with_subgraphs(), onnx_model_with_subgraphs_filename) def teardown_module(): os.remove(onnx_model_filename) + os.remove(onnx_model_with_subgraphs_filename) def skip_if_onnx_frontend_is_disabled(): @@ -72,17 +101,29 @@ def test_convert(): run_function(function, a, b, expected=[expected]) -def test_decode_and_convert(): +@pytest.mark.parametrize("model_filename, inputs, expected", [ + [onnx_model_filename, + [np.array([[1, 2], [3, 4]], dtype=np.float32), + np.array([[2, 3], [4, 5]], dtype=np.float32)], + np.array([[1.5, 5], [10.5, 18]], dtype=np.float32)], + [onnx_model_with_subgraphs_filename, + [np.array(False, dtype=bool), + np.array([1, 2, 3], dtype=np.float32), + np.array([2, 3, 5], dtype=np.float32)], + np.array([-1, -1, -2], dtype=np.float32)], +]) +def test_decode_and_convert(model_filename, inputs, expected): skip_if_onnx_frontend_is_disabled() fe = fem.load_by_framework(framework=ONNX_FRONTEND_NAME) assert fe - model = fe.load(onnx_model_filename) + model = fe.load(model_filename) assert model decoded_function = fe.decode(model) assert decoded_function + for op in decoded_function.get_ordered_ops(): assert op.get_type_name() in ["Parameter", "Constant", "ONNXFrameworkNode", "ONNXSubgraphFrameworkNode", "Result"] @@ -92,10 +133,7 @@ def test_decode_and_convert(): for op in decoded_function.get_ordered_ops(): assert op.get_type_name() not in ["ONNXFrameworkNode", "ONNXSubgraphFrameworkNode"] - a = np.array([[1, 2], [3, 4]], dtype=np.float32) - b = np.array([[2, 3], [4, 5]], dtype=np.float32) - expected = np.array([[1.5, 5], [10.5, 18]], dtype=np.float32) - run_function(decoded_function, a, b, expected=[expected]) + run_function(decoded_function, *inputs, expected=[expected]) def test_load_by_model(): diff --git a/src/core/tests/onnx/onnx_import.in.cpp b/src/core/tests/onnx/onnx_import.in.cpp index efac638d743..100c8a2d720 100644 --- a/src/core/tests/onnx/onnx_import.in.cpp +++ b/src/core/tests/onnx/onnx_import.in.cpp @@ -380,7 +380,7 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input) { test_case.run(); } -NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function) { const auto function = onnx_import::import_onnx_model( file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.onnx")); @@ -392,7 +392,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function) { test_case.run(); } -NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) { const auto function = onnx_import::import_onnx_model( file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/greater_or_equal.onnx")); @@ -403,7 +403,7 @@ NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_function_dependency_to_created_sub test_case.run(); } -NGRAPH_TEST(onnx_${BACKEND_NAME}, onnx_expand_context_dependent_function) { +NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_context_dependent_function) { auto function = onnx_import::import_onnx_model( file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/softmax_crossentropy_consumed.onnx")); diff --git a/src/frontends/onnx/frontend/src/core/graph.cpp b/src/frontends/onnx/frontend/src/core/graph.cpp index 2c76c62d377..212766fc550 100644 --- a/src/frontends/onnx/frontend/src/core/graph.cpp +++ b/src/frontends/onnx/frontend/src/core/graph.cpp @@ -199,9 +199,10 @@ void Graph::decode_to_framework_nodes() { if (node.has_subgraphs()) { const auto& subgraphs = node.get_subgraphs(); auto inputs = node.get_ng_inputs(); + std::vector> functions; for (const auto& kv : subgraphs) { auto& subgraph = kv.second; - subgraph->decode(); + functions.push_back(subgraph->decode()); for (const auto& input : subgraph->get_inputs_from_parent()) { const auto& name = input.get_node()->get_friendly_name(); if (std::find_if(inputs.begin(), inputs.end(), [&name](const Output& n) -> bool { @@ -211,10 +212,9 @@ void Graph::decode_to_framework_nodes() { } } } - framework_node = - std::make_shared(shared_from_this(), node, inputs); + framework_node = std::make_shared(node, functions, inputs); } else { - framework_node = std::make_shared(shared_from_this(), node); + framework_node = std::make_shared(node); } OutputVector ng_nodes{framework_node->outputs()}; set_friendly_names(node, ng_nodes); @@ -240,7 +240,10 @@ std::shared_ptr Graph::create_function() { std::shared_ptr Graph::decode() { decode_to_framework_nodes(); - return create_function(); + auto function = create_function(); + auto& rt_info = function->get_rt_info(); + rt_info[ONNX_GRAPH_RT_ATTRIBUTE] = shared_from_this(); + return function; } bool Graph::is_ng_node_in_cache(const std::string& name) const { @@ -399,7 +402,8 @@ void Subgraph::find_inputs_from_parent() { for (const auto& out_name : node_proto.output()) { if (m_cache->contains(out_name)) { auto node_to_replace_input = m_cache->get_node(out_name).get_node(); - if (!dynamic_cast(node_to_replace_input)) + if (!ov::is_type(node_to_replace_input) && + !ov::is_type(node_to_replace_input)) continue; auto inputs = node_to_replace_input->input_values(); for (size_t i = 0; i < inputs.size(); i++) { diff --git a/src/frontends/onnx/frontend/src/core/graph.hpp b/src/frontends/onnx/frontend/src/core/graph.hpp index a7a983b038a..0e5c2378d32 100644 --- a/src/frontends/onnx/frontend/src/core/graph.hpp +++ b/src/frontends/onnx/frontend/src/core/graph.hpp @@ -121,6 +121,8 @@ inline std::ostream& operator<<(std::ostream& outs, const Graph& graph) { return (outs << ""); } +static const char* const ONNX_GRAPH_RT_ATTRIBUTE = "onnx_graph"; + } // namespace onnx_import } // namespace ngraph diff --git a/src/frontends/onnx/frontend/src/onnx_framework_node.cpp b/src/frontends/onnx/frontend/src/onnx_framework_node.cpp index bac360586d3..22eb9b56299 100644 --- a/src/frontends/onnx/frontend/src/onnx_framework_node.cpp +++ b/src/frontends/onnx/frontend/src/onnx_framework_node.cpp @@ -21,10 +21,14 @@ namespace frontend { NGRAPH_RTTI_DEFINITION(ONNXFrameworkNode, "ONNXFrameworkNode", 1); std::shared_ptr ONNXFrameworkNode::clone_with_new_inputs(const OutputVector& inputs) const { - return std::make_shared(m_graph, m_node, inputs); + return std::make_shared(m_node, inputs); } NGRAPH_RTTI_DEFINITION(ONNXSubgraphFrameworkNode, "ONNXSubgraphFrameworkNode", 1); +std::shared_ptr ONNXSubgraphFrameworkNode::clone_with_new_inputs(const OutputVector& inputs) const { + return std::make_shared(m_node, m_functions, inputs); +} + } // namespace frontend } // namespace ngraph diff --git a/src/frontends/onnx/frontend/src/onnx_framework_node.hpp b/src/frontends/onnx/frontend/src/onnx_framework_node.hpp index 852a3f07b09..8e52dd3dd18 100644 --- a/src/frontends/onnx/frontend/src/onnx_framework_node.hpp +++ b/src/frontends/onnx/frontend/src/onnx_framework_node.hpp @@ -38,20 +38,16 @@ class ONNXFrameworkNode : public ov::op::util::FrameworkNode { public: NGRAPH_RTTI_DECLARATION; - ONNXFrameworkNode(std::shared_ptr graph, const onnx_import::Node& node) + ONNXFrameworkNode(const onnx_import::Node& node) : ov::op::util::FrameworkNode(node.get_ng_inputs(), node.get_outputs_size()), - m_node(node), - m_graph(graph) {} + m_node(node) {} - ONNXFrameworkNode(std::shared_ptr graph, - const onnx_import::Node& node, - const OutputVector& inputs) + ONNXFrameworkNode(const onnx_import::Node& node, const OutputVector& inputs) : ov::op::util::FrameworkNode(inputs, node.get_outputs_size()), - m_node(node), - m_graph(graph) {} + m_node(node) {} - OutputVector get_ng_nodes() const { - OutputVector ng_nodes{m_graph->make_ng_nodes(m_node)}; + OutputVector get_ng_nodes(const std::shared_ptr& graph) const { + OutputVector ng_nodes{graph->make_ng_nodes(m_node)}; if (ng_nodes.size() > get_output_size()) { ng_nodes.resize(get_output_size()); } @@ -71,35 +67,31 @@ public: protected: onnx_import::Node m_node; - -private: - std::shared_ptr m_graph; }; class ONNXSubgraphFrameworkNode : public ONNXFrameworkNode { public: NGRAPH_RTTI_DECLARATION; - ONNXSubgraphFrameworkNode(std::shared_ptr graph, - const onnx_import::Node& node, + ONNXSubgraphFrameworkNode(const onnx_import::Node& node, + const std::vector>& functions, const OutputVector& inputs) - : ONNXFrameworkNode(graph, node, inputs) {} + : ONNXFrameworkNode(node, inputs), + m_functions(functions) {} void infer_inputs_from_parent() { for (auto& subgraph : m_node.get_subgraphs()) subgraph.second->infer_inputs_from_parent(); } - std::vector> get_subgraph_functions() const { - std::vector> ret; - for (const auto& kv : m_node.get_subgraphs()) { - auto& subgraph = kv.second; - ret.push_back(std::make_shared(subgraph->get_ng_outputs(), - subgraph->get_ng_parameters(), - subgraph->get_name())); - } - return ret; + const std::vector>& get_subgraph_functions() const { + return m_functions; } + + virtual std::shared_ptr clone_with_new_inputs(const OutputVector& inputs) const override; + +private: + std::vector> m_functions; }; } // namespace frontend diff --git a/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp b/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp index 930c8fab619..aff727c9fa8 100644 --- a/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp +++ b/src/frontends/onnx/frontend/src/utils/onnx_internal.cpp @@ -60,6 +60,12 @@ void apply_transformations(ONNX_NAMESPACE::ModelProto& model_proto, const std::s } // namespace void convert_decoded_function(std::shared_ptr function) { + auto& rt_info = function->get_rt_info(); + auto it = rt_info.find(ONNX_GRAPH_RT_ATTRIBUTE); + OPENVINO_ASSERT(it != rt_info.end(), + "Could not find '" + std::string(ONNX_GRAPH_RT_ATTRIBUTE) + + "' attribute in decoded model. Model probably wasn't created by FrontEnd::decode function."); + auto onnx_graph = it->second.as>(); for (const auto& node : function->get_ordered_ops()) { if (auto raw_node = std::dynamic_pointer_cast(node)) { if (auto subgraph_node = std::dynamic_pointer_cast(node)) { @@ -68,7 +74,7 @@ void convert_decoded_function(std::shared_ptr function) { convert_decoded_function(function); } } - auto ng_nodes = raw_node->get_ng_nodes(); + auto ng_nodes = raw_node->get_ng_nodes(onnx_graph); replace_node(raw_node, ng_nodes); } else { // Have to revalidate node because new intpus can affect shape/type @@ -76,6 +82,7 @@ void convert_decoded_function(std::shared_ptr function) { node->revalidate_and_infer_types(); } } + rt_info.erase(it); detail::remove_dangling_parameters(function); detail::remove_dangling_results(function); } From b64329430071ab8af0cb7de75e025859b517b463 Mon Sep 17 00:00:00 2001 From: Andrey Noskov Date: Wed, 15 Dec 2021 23:39:31 +0300 Subject: [PATCH 11/27] [GNA] Added import/export test (#8769) * [GNA] Added import/export test - still need fixes * Fixed inputs and const vals * Parametrized input shape for import base test * Deleted commented code * Fixed input shape in test * Fixed SF for import --- .../import_export_batch_size.cpp | 91 +++++++++++++++++++ .../import_export_multi_inputs.cpp | 11 ++- .../import_reshape_permute_conv.cpp | 6 ++ .../import_export_tests/import_nonzero.cpp | 3 + .../import_export_base/import_export_base.hpp | 1 + .../import_export_base/import_export_base.cpp | 4 +- .../import_export_tests/import_nonzero.cpp | 5 +- .../import_reshape_permute_conv.cpp | 5 +- 8 files changed, 119 insertions(+), 7 deletions(-) create mode 100644 src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp diff --git a/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp b/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp new file mode 100644 index 00000000000..da0e3c88a12 --- /dev/null +++ b/src/tests/functional/plugin/gna/Import_export_tests/import_export_batch_size.cpp @@ -0,0 +1,91 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ngraph_functions/builders.hpp" +#include "base/import_export_base/import_export_base.hpp" + +namespace LayerTestDefinitions { + +class ImportBatchTest : public FuncTestUtils::ImportNetworkTestBase { +protected: + InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const override { + return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), 0.2f, -0.1f); + } + + void SetUp() override { + InferenceEngine::Precision netPrecision; + std::vector inputShape; + std::string _; + std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, _) = this->GetParam(); + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); + + auto mul_const_1 = ngraph::builder::makeConstant(ngPrc, { inputShape[1], 2048 }, + CommonTestUtils::generate_float_numbers(2048 * inputShape[1], -0.1f, 0.1f), false); + + auto matmul_1 = std::make_shared(params[0], mul_const_1); + auto sigmoid_1 = std::make_shared(matmul_1); + + auto mul_const_2 = ngraph::builder::makeConstant(ngPrc, { 2048, 3425 }, + CommonTestUtils::generate_float_numbers(2048 * 3425, -0.1f, 0.1f), false); + + auto matmul_2 = std::make_shared(sigmoid_1, mul_const_2); + + function = std::make_shared(matmul_2, params, "ExportImportNetwork"); + } +}; + +TEST_P(ImportBatchTest, CompareWithRefImpl) { + Run(); +}; + +const std::vector> inputShapes = { + {1, 440}, + {2, 440}, + {4, 128} +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> exportConfigs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + {"GNA_SCALE_FACTOR_0", "327.67"} + } +}; + +const std::vector> importConfigs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"} + } +}; + +const std::vector appHeader = { + "" +}; + +INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkBatchCase, ImportBatchTest, + ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(exportConfigs), + ::testing::ValuesIn(importConfigs), + ::testing::ValuesIn(appHeader)), + ImportBatchTest::getTestCaseName); +} // namespace LayerTestDefinitions diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp index c2e381b5542..487dcf3f4e1 100644 --- a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp +++ b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_export_multi_inputs.cpp @@ -17,11 +17,12 @@ namespace LayerTestsDefinitions { class ImportMultiInput : public FuncTestUtils::ImportNetworkTestBase { protected: void SetUp() override { + std::vector inputShape; InferenceEngine::Precision netPrecision; - std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); + std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto input = ngraph::builder::makeParams(ngPrc, {{1, 10}, {1, 10}}); + auto input = ngraph::builder::makeParams(ngPrc, {inputShape, inputShape}); auto mul1 = ngraph::builder::makeEltwise(input[0], input[1], ngraph::helpers::EltwiseTypes::ADD); auto result = std::make_shared(mul1); @@ -40,6 +41,10 @@ TEST_P(ImportMultiInputChanged, CompareWithRefImpl) { TestRun(true); }; +const std::vector> inputShape = { + {1, 10} +}; + const std::vector netPrecisions = { InferenceEngine::Precision::FP32 }; @@ -98,6 +103,7 @@ const std::vector> importConfigsUnchanged = { INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportMultiInputUnchanged, ::testing::Combine( + ::testing::ValuesIn(inputShape), ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::ValuesIn(exportConfigs), @@ -107,6 +113,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportMultiInputUnchanged, INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportMultiInputChanged, ::testing::Combine( + ::testing::ValuesIn(inputShape), ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::ValuesIn(exportConfigs), diff --git a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp index 110b0fd66fe..0f8bd3d1fc9 100644 --- a/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp +++ b/src/tests/functional/plugin/gna/shared_tests_instances/import_export_tests/import_reshape_permute_conv.cpp @@ -52,6 +52,10 @@ TEST_P(ImportExportGNAModelChanged, ReshapePermuteConv) { TestRun(true); }; +const std::vector> inputShapes = { + {1, 336} +}; + const std::vector netPrecisions = { InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16 @@ -92,6 +96,7 @@ const std::vector appHeaders = { INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelUnchanged, ::testing::Combine( + ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::ValuesIn(exportConfigs), @@ -101,6 +106,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelUnchanged, INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkGNA, ImportExportGNAModelChanged, ::testing::Combine( + ::testing::ValuesIn(inputShapes), ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_GNA), ::testing::ValuesIn(exportConfigs), diff --git a/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp b/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp index 4555ac1ec9d..888ac6eb712 100644 --- a/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp +++ b/src/tests/functional/plugin/myriad/shared_tests_instances/import_export_tests/import_nonzero.cpp @@ -26,8 +26,11 @@ const std::vector appHeaders = { "APPLICATION_HEADER" }; +std::vector inputShape = ngraph::Shape{1000}; + INSTANTIATE_TEST_SUITE_P(smoke_ImportNetworkCase, ImportNonZero, ::testing::Combine( + ::testing::Values(inputShape), ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_MYRIAD), ::testing::ValuesIn(exportConfigs), diff --git a/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp b/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp index 8795dced06f..e6ce7a46165 100644 --- a/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp +++ b/src/tests/functional/plugin/shared/include/base/import_export_base/import_export_base.hpp @@ -9,6 +9,7 @@ #include typedef std::tuple< + std::vector, // Input Shape InferenceEngine::Precision, // Network Precision std::string, // Target Device std::map, // Export Configuration diff --git a/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp b/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp index 3f7e6e0f149..be782eb6687 100644 --- a/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp +++ b/src/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp @@ -9,14 +9,16 @@ namespace FuncTestUtils { std::string ImportNetworkTestBase::getTestCaseName(testing::TestParamInfo obj) { + std::vector inputShape; InferenceEngine::Precision netPrecision; std::string targetDevice; std::map exportConfiguration; std::map importConfiguration; std::string appHeader; - std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, appHeader) = obj.param; + std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, appHeader) = obj.param; std::ostringstream result; + result << "IS=" << CommonTestUtils::vec2str(inputShape) << "_"; result << "netPRC=" << netPrecision.name() << "_"; result << "targetDevice=" << targetDevice << "_"; for (auto const& configItem : exportConfiguration) { diff --git a/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp b/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp index 44ed3eff75c..64d3ad1192f 100644 --- a/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp +++ b/src/tests/functional/plugin/shared/src/import_export_tests/import_nonzero.cpp @@ -10,10 +10,11 @@ namespace LayerTestsDefinitions { void ImportNonZero::SetUp() { InferenceEngine::Precision netPrecision; - std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); + ngraph::Shape inputShape; + std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); const auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - const auto parameter = std::make_shared(ngPrc, ngraph::Shape{1000}); + const auto parameter = std::make_shared(ngPrc, inputShape); const auto nonZero = std::make_shared(parameter); function = std::make_shared(nonZero->outputs(), ngraph::ParameterVector{parameter}, "ExportImportNetwork"); diff --git a/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp b/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp index 5a6cb6b6ba6..664aa444854 100644 --- a/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp +++ b/src/tests/functional/plugin/shared/src/import_export_tests/import_reshape_permute_conv.cpp @@ -9,11 +9,12 @@ namespace LayerTestsDefinitions { void ImportReshapePermuteConv::SetUp() { + std::vector inputShape; InferenceEngine::Precision netPrecision; - std::tie(netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); + std::tie(inputShape, netPrecision, targetDevice, exportConfiguration, importConfiguration, applicationHeader) = this->GetParam(); auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto params = ngraph::builder::makeParams(ngPrc, { {1, 336} }); + auto params = ngraph::builder::makeParams(ngPrc, { inputShape }); std::vector outFormShapes1 = { 1, 1, 168, 2 }; auto pattern1 = std::make_shared(ngraph::element::Type_t::i64, ngraph::Shape{ 4 }, outFormShapes1); From 0b9158c2b82f5ec44eeaef76599614a2b29f8991 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Ko=C5=BCykowski?= Date: Wed, 15 Dec 2021 21:40:43 +0100 Subject: [PATCH 12/27] Extend ONNX FE for operation Softmax-8 (#9189) --- .../onnx/softmax_axis_1_opset11.prototxt | 56 ++++++ .../softmax_axis_negative_1_opset11.prototxt | 56 ++++++ .../softmax_axis_negative_1_opset13.prototxt | 56 ++++++ src/core/tests/onnx/onnx_import.in.cpp | 162 ++++++++++++++---- .../onnx/frontend/src/op/softmax.cpp | 25 +-- 5 files changed, 296 insertions(+), 59 deletions(-) create mode 100644 src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt create mode 100644 src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt create mode 100644 src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt diff --git a/src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt b/src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt new file mode 100644 index 00000000000..947b381db0b --- /dev/null +++ b/src/core/tests/models/onnx/softmax_axis_1_opset11.prototxt @@ -0,0 +1,56 @@ +ir_version: 3 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "x" + output: "y" + op_type: "Softmax" + attribute { + name: "axis" + i: 1 + type: INT + } + } + name: "test_softmax_axis_1" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + version: 11 +} diff --git a/src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt b/src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt new file mode 100644 index 00000000000..ad9a4b72603 --- /dev/null +++ b/src/core/tests/models/onnx/softmax_axis_negative_1_opset11.prototxt @@ -0,0 +1,56 @@ +ir_version: 3 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "x" + output: "y" + op_type: "Softmax" + attribute { + name: "axis" + i: -1 + type: INT + } + } + name: "test_softmax_axis_0" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + version: 11 +} diff --git a/src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt b/src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt new file mode 100644 index 00000000000..aff3afc2c52 --- /dev/null +++ b/src/core/tests/models/onnx/softmax_axis_negative_1_opset13.prototxt @@ -0,0 +1,56 @@ +ir_version: 3 +producer_name: "nGraph ONNX Importer" +graph { + node { + input: "x" + output: "y" + op_type: "Softmax" + attribute { + name: "axis" + i: -1 + type: INT + } + } + name: "test_softmax_axis_0" + input { + name: "x" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } + output { + name: "y" + type { + tensor_type { + elem_type: 1 + shape { + dim { + dim_value: 3 + } + dim { + dim_value: 4 + } + dim { + dim_value: 5 + } + } + } + } + } +} +opset_import { + version: 13 +} diff --git a/src/core/tests/onnx/onnx_import.in.cpp b/src/core/tests/onnx/onnx_import.in.cpp index 100c8a2d720..73f02233e0f 100644 --- a/src/core/tests/onnx/onnx_import.in.cpp +++ b/src/core/tests/onnx/onnx_import.in.cpp @@ -690,19 +690,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_1D) { } namespace { // common input for all Softmax 3D test cases (Shape = {3,4,5}) +// clang-format off const std::vector SOFTMAX_INPUT = { - 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, 0.21004745, 1.38337255, - 1.19030397, 2.0940445, -0.03551657, -0.78686039, 1.992782, 0.04300319, -0.29230777, - -0.56797112, -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, + 2.75793882, -0.50841322, 0.82013929, -0.62409912, -0.96136118, + 0.21004745, 1.38337255, 1.19030397, 2.0940445, -0.03551657, + -0.78686039, 1.992782, 0.04300319, -0.29230777, -0.56797112, + -1.26732165, -0.61935399, 0.57670432, 0.92844898, 2.82469233, - 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, -0.13259761, -1.14313018, - 0.2673723, -0.87996154, 1.29053106, 1.55, 0.8396538, 1.20729817, 0.23727845, - -0.89113606, -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, + 0.98721677, -0.05100663, -1.21178917, -0.17530157, 1.40051805, + -0.13259761, -1.14313018, 0.2673723, -0.87996154, 1.29053106, + 1.55, 0.8396538, 1.20729817, 0.23727845, -0.89113606, + -1.70909842, 0.26460363, -0.70566808, 2.383518, 1.07024615, - -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, 0.75425957, -2.43721014, - -1.24478184, 2.65316853, 1.19509542, -0.95523998, 0.5149006, -0.01151649, 0.68327026, - -0.4589638, -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; + -1.21722605, 0.82919357, 0.55765697, 0.12657686, 0.63432172, + 0.75425957, -2.43721014, -1.24478184, 2.65316853, 1.19509542, + -0.95523998, 0.5149006, -0.01151649, 0.68327026, -0.4589638, + -0.46554745, 0.21055324, 0.39266729, 2.05098086, 1.83207919}; } // namespace +// clang-format on NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) { auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_0.onnx")); @@ -710,19 +715,24 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_0) { auto test_case = test::TestCase(function, s_device); test_case.add_input(SOFTMAX_INPUT); + // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823, 0.00757665, 0.02449322, - 0.02019284, 0.04985249, 0.00592694, 0.00279593, 0.04505148, 0.00641108, 0.00458466, - 0.00348007, 0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497, + {0.09683057, 0.00369363, 0.01394559, 0.00329012, 0.00234823, + 0.00757665, 0.02449322, 0.02019284, 0.04985249, 0.00592694, + 0.00279593, 0.04505148, 0.00641108, 0.00458466, 0.00348007, + 0.00172928, 0.00330577, 0.01093237, 0.01554086, 0.10351497, - 0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679, 0.00537859, 0.00195794, - 0.00802367, 0.00254737, 0.0223216, 0.02893419, 0.0142204, 0.02053893, 0.00778581, - 0.00251907, 0.00111174, 0.00800149, 0.0030324, 0.06658917, 0.0179084, + 0.01648154, 0.00583583, 0.00182802, 0.00515374, 0.02491679, + 0.00537859, 0.00195794, 0.00802367, 0.00254737, 0.0223216, + 0.02893419, 0.0142204, 0.02053893, 0.00778581, 0.00251907, + 0.00111174, 0.00800149, 0.0030324, 0.06658917, 0.0179084, - 0.00181811, 0.01407243, 0.01072611, 0.0069699, 0.01158077, 0.01305647, 0.00053677, - 0.0017687, 0.08719896, 0.02028982, 0.00236265, 0.01027717, 0.0060709, 0.01216173, - 0.00388087, 0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337}); + 0.00181811, 0.01407243, 0.01072611, 0.0069699, 0.01158077, + 0.01305647, 0.00053677, 0.0017687, 0.08719896, 0.02028982, + 0.00236265, 0.01027717, 0.0060709, 0.01216173, 0.00388087, + 0.00385541, 0.00758048, 0.00909469, 0.04775123, 0.03836337}); + // clang-format on test_case.run(6); } @@ -733,35 +743,113 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1) { auto test_case = test::TestCase(function, s_device); test_case.add_input(SOFTMAX_INPUT); + // clang-format off test_case.add_expected_output( Shape{3, 4, 5}, - {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188, 0.0178066, 0.05756383, - 0.04745709, 0.11716303, 0.01392945, 0.00657097, 0.10587974, 0.01506727, 0.01077484, - 0.00817884, 0.00406413, 0.00776921, 0.0256932, 0.03652405, 0.24328028, + {0.22757064, 0.00868076, 0.03277484, 0.00773243, 0.0055188, + 0.0178066, 0.05756383, 0.04745709, 0.11716303, 0.01392945, + 0.00657097, 0.10587974, 0.01506727, 0.01077484, 0.00817884, + 0.00406413, 0.00776921, 0.0256932, 0.03652405, 0.24328028, - 0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488, 0.02028993, 0.00738604, - 0.03026811, 0.00960958, 0.08420492, 0.10914991, 0.05364435, 0.07748005, 0.02937079, - 0.0095028, 0.00419387, 0.03018442, 0.01143929, 0.2511977, 0.06755678, + 0.06217413, 0.02201481, 0.00689594, 0.01944171, 0.09399488, + 0.02028993, 0.00738604, 0.03026811, 0.00960958, 0.08420492, + 0.10914991, 0.05364435, 0.07748005, 0.02937079, 0.0095028, + 0.00419387, 0.03018442, 0.01143929, 0.2511977, 0.06755678, - 0.00587593, 0.04548053, 0.0346656, 0.02252594, 0.03742775, 0.04219705, 0.00173478, - 0.00571623, 0.2818174, 0.06557446, 0.00763582, 0.03321466, 0.01962049, 0.03930537, - 0.01254255, 0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617}); + 0.00587593, 0.04548053, 0.0346656, 0.02252594, 0.03742775, + 0.04219705, 0.00173478, 0.00571623, 0.2818174, 0.06557446, + 0.00763582, 0.03321466, 0.01962049, 0.03930537, 0.01254255, + 0.01246025, 0.02449929, 0.02939305, 0.15432668, 0.12398617}); + // clang-format on test_case.run(4); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_1D) { - ASSERT_THROW( - onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_1D.onnx")), - ngraph::ngraph_error) - << "Softmax model with invalid axis was successfully imported while it should have thrown."; +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_1_opset11) { + auto function = + onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_1_opset11.onnx")); + + auto test_case = test::TestCase(function, s_device); + test_case.add_input(SOFTMAX_INPUT); + + // clang-format off + test_case.add_expected_output( + Shape{3, 4, 5}, + {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154, + 0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776, + 0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055, + 0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015, + + 0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282, + 0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983, + 0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801, + 0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935, + + 0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437, + 0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196, + 0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297, + 0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207}); + // clang-format on + + test_case.run(4); } -NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_invalid_axis_3D) { - ASSERT_THROW( - onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_invalid_axis_3D.onnx")), - ngraph::ngraph_error) - << "Softmax model with invalid axis was successfully imported while it should have thrown."; +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset11) { + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_negative_1_opset11.onnx")); + + auto test_case = test::TestCase(function); + test_case.add_input(SOFTMAX_INPUT); + + // clang-format off + test_case.add_expected_output( + Shape{3, 4, 5}, + {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154, + 0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776, + 0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055, + 0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015, + + 0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282, + 0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983, + 0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801, + 0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935, + + 0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437, + 0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196, + 0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297, + 0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207}); + // clang-format on + + test_case.run(6); +} + +NGRAPH_TEST(${BACKEND_NAME}, onnx_model_softmax_axis_negative_1_opset13) { + auto function = onnx_import::import_onnx_model( + file_util::path_join(SERIALIZED_ZOO, "onnx/softmax_axis_negative_1_opset13.onnx")); + + auto test_case = test::TestCase(function); + test_case.add_input(SOFTMAX_INPUT); + + // clang-format off + test_case.add_expected_output( + Shape{3, 4, 5}, + {0.88890495, 0.04825497, 0.27088348, 0.04490523, 0.02037154, + 0.06955369, 0.31998834, 0.39223197, 0.68041159, 0.05141776, + 0.02566661, 0.5885689, 0.12453075, 0.06257374, 0.03019055, + 0.01587475, 0.0431878, 0.21235381, 0.21210944, 0.89802015, + + 0.31752626, 0.19442629, 0.0546935, 0.06279221, 0.36823282, + 0.10362164, 0.06523066, 0.24006419, 0.03103672, 0.32987983, + 0.55743381, 0.473766, 0.61451431, 0.09486084, 0.03722801, + 0.02141829, 0.26657706, 0.090728, 0.81131024, 0.26465935, + + 0.08619648, 0.43343993, 0.3877785, 0.04523505, 0.15625437, + 0.61900597, 0.01653285, 0.06394322, 0.56592636, 0.27376196, + 0.11201305, 0.31654337, 0.21947994, 0.07893034, 0.05236297, + 0.18278451, 0.23348385, 0.32879834, 0.30990825, 0.5176207}); + // clang-format on + + test_case.run(6); } NGRAPH_TEST(${BACKEND_NAME}, onnx_model_sub) { diff --git a/src/frontends/onnx/frontend/src/op/softmax.cpp b/src/frontends/onnx/frontend/src/op/softmax.cpp index 3aa517f3c12..ce609a52e44 100644 --- a/src/frontends/onnx/frontend/src/op/softmax.cpp +++ b/src/frontends/onnx/frontend/src/op/softmax.cpp @@ -37,17 +37,8 @@ OutputVector softmax(const Node& node) { result = default_opset::Constant::create(data.get_element_type(), Shape{}, {1}); break; } - case 1: { - // checks if the axis belongs to the allowed values set (-1 and 0 for 1D) - ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank()); - result = std::make_shared(data, 0); - break; - } default: { - const auto normalized_axis = - ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank()); - - result = onnx_softmax(data, normalized_axis); + result = onnx_softmax(data, axis); break; } } @@ -69,17 +60,8 @@ OutputVector softmax(const Node& node) { result = default_opset::Constant::create(data.get_element_type(), Shape{}, {1}); break; } - case 1: { - // checks if the axis belongs to the allowed values set (-1 and 0 for 1D) - ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank()); - result = std::make_shared(data, 0); - break; - } default: { - const auto normalized_axis = - ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank()); - - result = std::make_shared(data, normalized_axis); + result = std::make_shared(data, axis); break; } } @@ -92,9 +74,8 @@ OutputVector softmax(const Node& node) { const auto data = node.get_ng_inputs().at(0); const auto axis = node.get_attribute_value("axis", -1); - const auto normalized_axis = ngraph::normalize_axis(node.get_description(), axis, data.get_partial_shape().rank()); - return {std::make_shared(data, normalized_axis)}; + return {std::make_shared(data, axis)}; } } // namespace set_13 } // namespace op From d1e54d996112cb0eb425b65dc08904aa1ded7855 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Wed, 15 Dec 2021 23:43:24 +0300 Subject: [PATCH 13/27] [IE TESTS] Fix filters in report (#9232) --- .../layer_tests_summary/template/filters.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js index 6005190e7cc..beb86e1b65f 100644 --- a/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js +++ b/src/tests/ie_test_utils/functional_test_utils/layer_tests_summary/template/filters.js @@ -99,15 +99,15 @@ function filterTable() { if (implementation != 0) { if (implementation == 'ni') { $("#report #data tr:not(:hidden)").filter(function () { - $(this).toggle($(this).find('td').hasClass("not_impl")) + $(this).toggle($(this).find('td').hasClass("value " + device + " not_impl")) }); } else if (implementation == 'i') { $("#report #data tr:not(:hidden)").filter(function () { - $(this).toggle($(this).find('td').hasClass("impl")); + $(this).toggle($(this).find('td').hasClass("value " + device + " impl")); }); } else { $("#report #data tr:not(:hidden)").filter(function () { - $(this).toggle(!$(this).find('td').hasClass("not_impl") && !$(this).find('td').hasClass("impl")); + $(this).toggle(!$(this).find('td').hasClass("value")); }); } } @@ -116,19 +116,19 @@ function filterTable() { selector = []; select.forEach(item => { if (item == '100p') { - selector.push('.value:visible[crashed="0"][failed="0"][skipped="0"]'); + selector.push('.value:visible[crashed="0"][failed="0"][skipped="0"][value!="---"]'); } if (item == '100f') { - selector.push('.value:visible[passed="0"]'); + selector.push('.value:visible[passed="0"][value!="---"]'); } if (item == 'p') { - selector.push('.value:visible[passed!="0"]'); + selector.push('.value:visible[passed!="0"][value!="---"]'); } if (item == 'f') { - selector.push('.value:visible[failed!="0"]'); + selector.push('.value:visible[failed!="0"][value!="---"]'); } if (item == 'c') { - selector.push('.value:visible[crashed!="0"]'); + selector.push('.value:visible[crashed!="0"][value!="---"]'); } if (item == 's') { selector.push('.value:visible[value!="---"][skipped!="0"]'); From 40f668140ec6536b4d47e05f5185eb8ac5accf7e Mon Sep 17 00:00:00 2001 From: Mateusz Tabaka Date: Wed, 15 Dec 2021 23:52:26 +0100 Subject: [PATCH 14/27] Fix compilation error in template_plugin tests (#9248) --- .../tests/functional/op_reference/einsum.cpp | 8 ++++---- .../functional/op_reference/extract_image_patches.cpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/template_plugin/tests/functional/op_reference/einsum.cpp b/docs/template_plugin/tests/functional/op_reference/einsum.cpp index 31460fd2ccf..625521dc42b 100644 --- a/docs/template_plugin/tests/functional/op_reference/einsum.cpp +++ b/docs/template_plugin/tests/functional/op_reference/einsum.cpp @@ -30,7 +30,7 @@ class ReferenceEinsumTest : public testing::TestWithParam, public public: void SetUp() override { auto params = GetParam(); - function = CreateFunction(params); + function = CreateModel(params); for (const auto& input_tensor : params.inputs) { inputData.push_back(input_tensor.data); } @@ -52,7 +52,7 @@ public: } private: - static std::shared_ptr CreateFunction(const EinsumParams& params) { + static std::shared_ptr CreateModel(const EinsumParams& params) { OutputVector output_vector; ParameterVector param_vector; for (const auto& input_tensor : params.inputs) { @@ -61,7 +61,7 @@ private: param_vector.push_back(param); } const auto einsum = std::make_shared(output_vector, params.equation); - const auto f = std::make_shared(OutputVector{einsum}, param_vector); + const auto f = std::make_shared(OutputVector{einsum}, param_vector); return f; } }; @@ -179,4 +179,4 @@ std::vector generateCombinedParams() { INSTANTIATE_TEST_SUITE_P(smoke_Einsum_With_Hardcoded_Refs, ReferenceEinsumTest, testing::ValuesIn(generateCombinedParams()), ReferenceEinsumTest::getTestCaseName); -} // namespace \ No newline at end of file +} // namespace diff --git a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp index 95f5571cc41..503880ce8ac 100644 --- a/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp +++ b/docs/template_plugin/tests/functional/op_reference/extract_image_patches.cpp @@ -36,7 +36,7 @@ class ReferenceExtractImagePatchesTest : public testing::TestWithParam CreateFunction(const ExtractImagePatchesParams& params) { + static std::shared_ptr CreateModel(const ExtractImagePatchesParams& params) { const auto data = std::make_shared(params.data.type, params.data.shape); const auto extrace_image_patches = std::make_shared(data, params.sizes, params.strides, params.rates, params.autoPad); - const auto f = std::make_shared(extrace_image_patches, ParameterVector{data}); + const auto f = std::make_shared(extrace_image_patches, ParameterVector{data}); return f; } }; @@ -243,4 +243,4 @@ std::vector generateCombinedParams() { INSTANTIATE_TEST_SUITE_P(smoke_ExtractImagePatches_With_Hardcoded_Refs, ReferenceExtractImagePatchesTest, testing::ValuesIn(generateCombinedParams()), ReferenceExtractImagePatchesTest::getTestCaseName); -} // namespace \ No newline at end of file +} // namespace From 9b71a5fb704acc7f3360c1c7b80b1905737cada2 Mon Sep 17 00:00:00 2001 From: Ilya Churaev Date: Thu, 16 Dec 2021 07:52:59 +0300 Subject: [PATCH 15/27] Fixed python tests (#9238) --- src/bindings/python/tests/test_utils/test_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bindings/python/tests/test_utils/test_utils.py b/src/bindings/python/tests/test_utils/test_utils.py index b312689aaa7..d8364635813 100644 --- a/src/bindings/python/tests/test_utils/test_utils.py +++ b/src/bindings/python/tests/test_utils/test_utils.py @@ -1,9 +1,9 @@ # Copyright (C) 2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.runtime import Function -from openvino.runtime.impl import Shape, Type -from openvino.runtime.impl.op import Parameter +from openvino.runtime import Model +from openvino.runtime import Shape, Type +from openvino.runtime.op import Parameter import openvino.runtime.opset8 as ops @@ -11,7 +11,7 @@ def get_test_function(): element_type = Type.f32 param = Parameter(element_type, Shape([1, 3, 22, 22])) relu = ops.relu(param) - func = Function([relu], [param], "test") + func = Model([relu], [param], "test") assert func is not None return func From ea3f34c3516775724d61f926d44eeea49ecc6421 Mon Sep 17 00:00:00 2001 From: Indira Salyahova Date: Thu, 16 Dec 2021 08:51:24 +0300 Subject: [PATCH 16/27] Temporary revert test layout (#9242) * Update test_image_loading.py * Update test_image_loading.py --- tools/pot/tests/test_image_loading.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/pot/tests/test_image_loading.py b/tools/pot/tests/test_image_loading.py index ff82d73c3d6..fc520c19d48 100644 --- a/tools/pot/tests/test_image_loading.py +++ b/tools/pot/tests/test_image_loading.py @@ -46,15 +46,17 @@ def test_check_image(tmp_path, models, model_name, model_framework): assert num_images_from_data_loader == num_images_in_dir -TEST_MODELS_LAYOUT = [('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (3, 224, 224)), - ('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (224, 224, 3)), - ('mobilenet-v2-pytorch', 'pytorch', None, (3, 224, 224)), - ('mobilenet-v1-1.0-224-tf', 'tf', None, (224, 224, 3))] +TEST_MODELS_LAYOUT = [ + #('mobilenet-v2-pytorch', 'pytorch', 'NCHW', (3, 224, 224)), + #('mobilenet-v2-pytorch', 'pytorch', 'NHWC', (224, 224, 3)), + #('mobilenet-v2-pytorch', 'pytorch', None, (3, 224, 224)), + #('mobilenet-v1-1.0-224-tf', 'tf', None, (224, 224, 3)) +] @pytest.mark.parametrize( - 'model_name, model_framework, layout, reference_shape', TEST_MODELS, - ids=['{}_{}'.format(m[0], m[1]) for m in TEST_MODELS]) + 'model_name, model_framework, layout, reference_shape', TEST_MODELS_LAYOUT, + ids=['{}_{}_{}_{}'.format(m[0], m[1], m[2], m[3]) for m in TEST_MODELS_LAYOUT]) def test_check_layout(tmp_path, models, model_name, model_framework, layout, reference_shape): test_dir = Path(__file__).parent path_image_data = os.path.join(test_dir, "data/image_data") From d5f84ad783bbd7b1478c98ce691401dea2afa4e7 Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Thu, 16 Dec 2021 09:53:14 +0300 Subject: [PATCH 17/27] [CPU] Deconvolution dynamism support (#8512) --- .../src/mkldnn_plugin/mkldnn_graph.cpp | 1 - .../src/mkldnn_plugin/mkldnn_node.cpp | 6 +- .../src/mkldnn_plugin/mkldnn_node.h | 10 +- .../src/mkldnn_plugin/mkldnn_primitive.h | 1 - .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp | 2 +- .../nodes/mkldnn_deconv_node.cpp | 611 +++++++++++--- .../mkldnn_plugin/nodes/mkldnn_deconv_node.h | 113 ++- .../nodes/mkldnn_fullyconnected_node.cpp | 2 +- .../nodes/mkldnn_matmul_node.cpp | 2 +- .../skip_tests_config.cpp | 4 + .../convolution_backprop_data.cpp | 679 +++++++++++----- .../group_convolution_backprop_data.cpp | 744 +++++++++++++----- .../src/base/ov_subgraph.cpp | 2 + .../src/convolution_backprop_data.cpp | 8 +- .../src/group_convolution_backprop_data.cpp | 8 +- .../unit/cpu/mkldnn_memory_desc_test.cpp | 2 +- 16 files changed, 1693 insertions(+), 502 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index e4a5aa18f2f..6765a1c0b67 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -319,7 +319,6 @@ void MKLDNNGraph::InitGraph() { SortTopologically(); InitDescriptors(); - RemoveDroppedEdges(); InitOptimalPrimitiveDescriptors(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 44ea9e933d2..a0a10a93ec4 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -717,7 +717,7 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) { selectedPD->setConfig(rightConfig); } -void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd) { +void MKLDNNNode::prepareMemory(mkldnn::primitive_desc_iterator& itpd) { for (size_t i = 0; i < getChildEdges().size(); i++) { auto &dstMemPtr = getChildEdgeAt(i)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) @@ -1049,7 +1049,9 @@ void MKLDNNNode::setDynamicBatchLim(int lim) { } } -void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr) { +void MKLDNNNode::appendPostOpArgs(const mkldnn::primitive_attr& attr, + std::unordered_map& primArgs, + const std::vector& binaryPostOpsArgs) { auto post_ops = attr.get_post_ops(); int idx = 0; for (int i = 0; i < post_ops.len(); i++) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index aee4f876806..97517d54fbc 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -208,7 +208,9 @@ public: return 1; } - void appendPostOpArgs(const mkldnn::primitive_attr& attr); + static void appendPostOpArgs(const mkldnn::primitive_attr& attr, + std::unordered_map& primArgs, + const std::vector& binaryPostOpsArgs); bool isFusedWith(Type type) const; @@ -425,7 +427,7 @@ public: if (impl_type == selected_pd->getImplementationType() && descsCompatible(srcDescs, selected_pd->getConfig().inConfs) && descsCompatible(dstDescs, selected_pd->getConfig().outConfs)) { - prepareMemory(selected_pd, itpd); + prepareMemory(itpd); PD prim_desc = createPd(desc); return {itpd.get()}; } @@ -722,6 +724,8 @@ protected: supportedPrimitiveDescriptors.push_back({config, implType}); } + void prepareMemory(mkldnn::primitive_desc_iterator& itpd); + bool isDynamic = false; bool inputShapesDefined() const; @@ -746,6 +750,7 @@ protected: } std::vector lastInputDims = {}; + std::shared_ptr opToShapeInfer; private: @@ -788,7 +793,6 @@ private: return PD(*selected_desc_ptr, engine); } - void prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_desc_iterator& itpd); enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2 }; ConstantType checkConstant(LOOK look, std::vector& checkNodes); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h index ffd43ee8dbd..d7e9c05a6fe 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h @@ -18,7 +18,6 @@ public: operator bool() const; MKLDNNPrimitive& operator=(const std::shared_ptr& primitive); mkldnn::primitive operator*(); - void reset(mkldnn::primitive* primitive); private: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index e2f01e85cef..03f7700e7af 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -979,7 +979,7 @@ void MKLDNNConvolutionNode::prepareParams() { primArgs[DNNL_ARG_BIAS] = getBias(); } - appendPostOpArgs(*pAttrLocal); + appendPostOpArgs(*pAttrLocal, primArgs, binaryPostOpsArgs); } void MKLDNNConvolutionNode::executeDynamicImpl(dnnl::stream strm) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 6a2c6332e38..f81e4601eab 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -13,34 +13,38 @@ #include #include "ie_parallel.hpp" #include "utils/general_utils.h" -#include #include #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" #include "utils/cpu_utils.hpp" +#include +#include +#include +#include +#include "convolution_shape_inference.hpp" + using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (isDynamicNgraphNode(op)) { - errorMessage = "Doesn't support op with dynamic shapes"; - return false; - } - if (std::dynamic_pointer_cast(op) == nullptr && std::dynamic_pointer_cast(op) == nullptr) { errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported"; return false; } - size_t ndims = op->get_input_shape(0).size(); + size_t ndims = op->get_input_partial_shape(0).rank().get_length(); if ((ndims < 3) || (ndims > 5)) { errorMessage = "Only 3D, 4D and 5D blobs are supported as input"; return false; } + if (op->get_input_partial_shape(1).is_dynamic() || (op->get_input_size() > 2 && op->get_input_partial_shape(2).is_dynamic())) { + errorMessage = "Doesn't support dynamic shapes for 'weights' and 'output_shape' inputs"; + return false; + } } catch (...) { return false; } @@ -58,15 +62,14 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr(op); auto groupConvBackprop = std::dynamic_pointer_cast(op); - const auto dataShape = op->get_input_shape(0); - weightDims = op->get_input_shape(1); - const auto outShape = op->get_shape(); - OC = outShape[1]; - IC = dataShape[1]; + const auto& weightDims = getWeightDims(); if (convBackprop) { algorithm = DeconvolutionCommon; + IC = weightDims[0]; + OC = weightDims[1]; + groupNum = 1; withGroups = false; @@ -78,10 +81,17 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptrget_pads_begin(); paddingR = convBackprop->get_pads_end(); + + outputPadding = convBackprop->get_output_padding(); + + autoPad = one_of(convBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER); } else if (groupConvBackprop) { algorithm = DeconvolutionGrouped; groupNum = weightDims[0]; + IC = groupNum * weightDims[1]; + OC = groupNum * weightDims[2]; + withGroups = groupNum > 1; isDW = withGroups && groupNum == OC && groupNum == IC; @@ -93,10 +103,26 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptrget_pads_begin(); paddingR = groupConvBackprop->get_pads_end(); + + outputPadding = groupConvBackprop->get_output_padding(); + + autoPad = one_of(groupConvBackprop->get_auto_pad(), ov::op::PadType::SAME_LOWER, ov::op::PadType::SAME_UPPER); } for (int i = 0; i < dilation.size(); i++) { kernel.push_back(weightDims[withGroups + 2 + i]); } + + externOutShape = inputShapes.size() == 3; + if (externOutShape && isDynamicNode()) { + bool isConstOutShape = ngraph::is_type(op->get_input_node_shared_ptr(2)); + if (isConstOutShape) { + lastOutputSpatialDims = ov::as_type(op->get_input_node_ptr(2))->cast_vector(); + } + const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2; + if (getInputShapeAtPort(2).getStaticDims()[0] != spDimsNum || (isConstOutShape && lastOutputSpatialDims.size() != spDimsNum)) { + IE_THROW() << "'output_shape' input has incorrect number of elements. Expected = " << spDimsNum; + } + } } else { IE_THROW(NotImplemented) << errorMessage; } @@ -113,14 +139,6 @@ InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceE auto const blbSize = blb->GetSize(); // WA: In int8 case, we are processing weights using internal blob. - // So we disconnect constant node containing weights from the graph and then don't use it. - if (getParentEdges().size() == 3) { - removeEdge(getParentEdgeAt(2)); - inputShapes.erase(inputShapes.begin() + 2); - } - removeEdge(getParentEdgeAt(1)); - inputShapes.erase(inputShapes.begin() + 1); - InferenceEngine::SizeVector dimsForBlockedDesc{dims}; std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]); @@ -160,13 +178,16 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { if (!withGroups && stride.back() > 3) return false; if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) { - auto inDims = getOutputShapeAtPort(0).getStaticDims(); + const auto& inMaxDims = getOutputShapeAtPort(0).getMaxDims(); + if (std::any_of(inMaxDims.begin(), inMaxDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) { + return false; + } // heuristicConst = 2^26 // heuristicParam = IC^2 * SP auto heuristicConst = 67108864; auto heuristicParam = IC * IC; - for (int i = 2; i < inDims.size(); i++) - heuristicParam *= inDims[i]; + for (int i = 2; i < inMaxDims.size(); i++) + heuristicParam *= inMaxDims[i]; if (heuristicParam > heuristicConst) return false; } @@ -203,10 +224,65 @@ bool MKLDNNDeconvolutionNode::canFuse(const MKLDNNNodePtr& node) const { return (fusedWith.empty() && node->canBePerformedAsScaleShift(this)); } -void MKLDNNDeconvolutionNode::getSupportedDescriptors() { - if (!descs_fwd.empty() && !descs_bwd.empty()) - return; +void MKLDNNDeconvolutionNode::initPadding(std::shared_ptr op, const Shape &inDims, const std::vector& outSpDims) { + std::vector input_shapes{inDims.getStaticDims(), getWeightDims()}; + ov::StaticShape output_shape_input; + if (externOutShape) { + IE_ASSERT(outSpDims.size() == getInputShapeAtPort(2).getStaticDims()[0]); + input_shapes.push_back({outSpDims.size()}); + for (size_t i = 0; i < outSpDims.size(); i++) { + output_shape_input.push_back(outSpDims[i]); + } + } + if (getAlgorithm() == DeconvolutionCommon) { + auto deconv = ngraph::as_type_ptr(op); + IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 2)); + } else if (getAlgorithm() == DeconvolutionGrouped) { + auto deconv = ngraph::as_type_ptr(op); + IE_ASSERT(ov::op::v1::resolve_auto_pad_for_shape_back_prop(deconv.get(), paddingL, paddingR, input_shapes, output_shape_input, 2, 3)); + } +} + +std::pair MKLDNNDeconvolutionNode::makeDummyInOutShape() { + auto inShape = MemoryDescUtils::makeDummyShape(getInputShapeAtPort(0)); + auto outShape = getOutputShapeAtPort(0); + + if (isDynamicNode()) { + if (externOutShape) { + if (lastOutputSpatialDims.empty()) { + const auto& shape = getOutputShapeAtPort(0); + lastOutputSpatialDims.resize(shape.getRank() - 2); + + const auto& minDims = shape.getMinDims(); + const auto& maxDims = shape.getMaxDims(); + const auto& dims = shape.getDims(); + for (size_t i = 0; i < dims.size() - 2; ++i) { + lastOutputSpatialDims[i] = dims[i + 2] == Shape::UNDEFINED_DIM ? std::min(maxDims[i + 2], + std::max(minDims[i + 2], static_cast(64))) : dims[i + 2]; + } + } + ov::CoordinateDiff pb = autoPad ? ov::CoordinateDiff(paddingL.size(), 0) : paddingL; + ov::CoordinateDiff pe = autoPad ? ov::CoordinateDiff(paddingR.size(), 0) : paddingR; + + auto inputDims = inShape.getStaticDims(); + const auto& weightDims = getWeightDims(); + const size_t wghOffset = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; + for (size_t i = 0; i < inputDims.size() - 2; i++) { + inputDims[2 + i] = ((lastOutputSpatialDims[i] - (dilation[i] + 1) * + (weightDims[wghOffset + 2 + i] - 1) - 1 + pb[i] + pe[i] - outputPadding[i])) / + stride[i] + 1; + } + + inShape = Shape(inputDims); + } + initPadding(opToShapeInfer, inShape, lastOutputSpatialDims); + outShape = Shape(shapeInferInternal(inShape.getStaticDims(), lastOutputSpatialDims)); + } + return {inShape.getStaticDims(), outShape.getStaticDims()}; +} + +void MKLDNNDeconvolutionNode::getSupportedDescriptors() { isInt8 = canBeExecutedInInt8(); InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0); @@ -236,21 +312,17 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - for (int i = 0; i < paddingR.size(); i++) { - int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; - int krn = weightDims[with_group + 2 + i]; - int src = getOutputShapeAtPort(0).getStaticDims()[2 + i]; - int dst = getInputShapeAtPort(0).getStaticDims()[2 + i]; - - krn = (krn - 1)*(dilation[i] + 1) + 1; - int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; - paddingR[i] = (dst - calc_dst) * stride[i]; - } + VectorDims inDims, outDims; + std::tie(inDims, outDims) = makeDummyInOutShape(); + inShape = Shape(inDims); + Shape outShape(outDims); + initPaddingR(inShape, outShape); if (isInt8) { + int8WeightDims = getWeightDims(); // WA: if int8 deconvolution is supported, we create internal weights blob in IO format - std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]); - internalBlobs.push_back(createWeiBlobAsIO(weightDims)); + std::swap(int8WeightDims[withGroups + 0], int8WeightDims[withGroups + 1]); + internalBlobs.push_back(createWeiBlobAsIO(int8WeightDims)); auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; MemoryDescPtr in_candidate = std::make_shared(getInputShapeAtPort(0), inputDataType, format); MemoryDescPtr out_candidate = std::make_shared(getOutputShapeAtPort(0), outputDataType, format); @@ -262,18 +334,31 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { createDescriptor({in_candidate}, {out_candidate}); } } - setPostOps(attr); + setPostOps(attr, outShape.getStaticDims()); } -void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { +void MKLDNNDeconvolutionNode::initPaddingR(const Shape &inShape, const Shape &outShape) { + for (int i = 0; i < paddingR.size(); i++) { + int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; + const auto& weightDims = getWeightDims(); + int krn = weightDims[with_group + 2 + i]; + int src = outShape.getStaticDims()[2 + i]; + int dst = inShape.getStaticDims()[2 + i]; + + krn = (krn - 1)*(dilation[i] + 1) + 1; + int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; + paddingR[i] = (dst - calc_dst) * stride[i]; + } +} + +void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims) { mkldnn::post_ops ops; auto getBinPostOpShape = [&](){ - const auto outShape = getOutputShapeAtPort(0).getStaticDims(); const auto outShapeRank = getOutputShapeAtPort(0).getRank(); const auto chIdx = getFusingAxis(); std::vector binaryShape(outShapeRank, 1); - binaryShape[chIdx] = outShape[chIdx]; + binaryShape[chIdx] = dims[chIdx]; return binaryShape; }; @@ -282,7 +367,7 @@ void MKLDNNDeconvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { // TODO [DS]: change to shape from memory constexpr int align = 16; // use legacy depthwise since backprop convolution does not support binary post ops - eltwiseNode->appendPostOps(ops, getOutputShapeAtPort(0).getStaticDims(), align); + eltwiseNode->appendPostOps(ops, dims, align); continue; } if (auto* fakeQuantizeNode = dynamic_cast(node.get())) { @@ -339,80 +424,277 @@ bool MKLDNNDeconvolutionNode::created() const { return getType() == Deconvolution; } -void MKLDNNDeconvolutionNode::createPrimitive() { - if (prim) - return; - - if (isInt8) { - auto prim_desc = createPrimitiveDescriptor(attr); - - prim.reset(new deconvolution_forward(prim_desc)); - - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}}; - } else { - auto prim_desc = createPrimitiveDescriptor(attr); - - prim.reset(new convolution_backward_data(prim_desc)); - - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}}; +bool MKLDNNDeconvolutionNode::needShapeInfer() const { + if (inputShapesModified()) { + return true; + } + if (externOutShape) { + if (lastOutputSpatialDims != readOutputSpatialDims()) { + return true; + } } - appendPostOpArgs(attr); + return false; } -void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]); - const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]); +std::vector MKLDNNDeconvolutionNode::shapeInfer() const { + const auto &dataMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); + std::vector outSpDims; + if (externOutShape) { + outSpDims = readOutputSpatialDims(); + } + return {shapeInferInternal(dataMemPtr->getStaticDims(), outSpDims)}; +} - // grouping and autoblicking is not compatible - if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) - return; +VectorDims MKLDNNDeconvolutionNode::shapeInferInternal(const VectorDims &inDims, std::vector outSpDims) const { + std::vector inputShapes = { + inDims, + getWeightDims() + }; + std::map> inputValues; + + if (externOutShape) { + if (outSpDims.size() != getInputShapeAtPort(2).getStaticDims()[0]) { + IE_THROW() << "Can't compute output shape for node with name: " << getName() + << ", because the node has 'output_shape' input, but provided output spatial dims number is incorrect"; + } + inputShapes.push_back({outSpDims.size()}); + inputValues.insert({2, std::make_shared(ngraph::element::Type_t::i32, + inputShapes.back().to_shape(), + outSpDims.data())}); + } + + std::vector outputShapes(1); + shape_inference(opToShapeInfer.get(), inputShapes, outputShapes, inputValues); + + return outputShapes.back().to_shape(); +} + +void MKLDNNDeconvolutionNode::execute(mkldnn::stream strm) { + if (!execPtr) { + IE_THROW() << "Can't execute Deconvolution node with name: " << getName() << ", because executor is not compiled"; + } + execPtr->exec(strm); + + if (externOutShape) { + lastOutputSpatialDims = readOutputSpatialDims(); + } +} + +std::shared_ptr MKLDNNDeconvolutionNode::createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc, + bool isWinograd) const { + mkldnn::algorithm alg = isWinograd ? mkldnn::algorithm::convolution_winograd : mkldnn::algorithm::convolution_direct; + std::shared_ptr deconv_desc; + std::shared_ptr fwd_conv_pd; + std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(srcDesc, wghDesc, dstDesc, alg); + if (fwd_conv_pd->get(true) == nullptr) { + IE_THROW() << "Forward convolution primitive descriptor is nullable for node with name: " << getName(); + } + return std::make_shared(deconv_desc, fwd_conv_pd); +} + +std::shared_ptr MKLDNNDeconvolutionNode::createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc) const { + return std::make_shared(createDescriptorInternalInt8(srcDesc, wghDesc, dstDesc)); +} + +void MKLDNNDeconvolutionNode::createDeconvPrim(std::shared_ptr desc, + MKLDNNMemoryPtr srcMemPtr, + MKLDNNMemoryPtr wghMemPtr, + MKLDNNMemoryPtr dstMemPtr, + AttrPtr attr, + impl_desc_type selectedImpl) { + auto itpd = desc->createPrimitiveDescriptorIterator(getEngine(), *attr); + + while (static_cast(itpd)) { + impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); + + if (impl_type == selectedImpl) { + if (isInt8) { + if (internalBlobMemory.empty()) { + prepareMemory(itpd); + } + auto prim_desc = deconvolution_forward::primitive_desc(itpd.get()); + execPtr = std::make_shared(prim_desc, srcMemPtr, internalBlobMemory.front(), dstMemPtr, *attr, + binaryPostOpsArgs, getEngine()); + } else { + auto prim_desc = convolution_backward_data::primitive_desc(itpd.get()); + execPtr = std::make_shared(prim_desc, srcMemPtr, wghMemPtr, dstMemPtr, *attr, + binaryPostOpsArgs, getEngine()); + } + return; + } + + if (!itpd.next_impl()) { + auto inDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(srcMemPtr->getStaticDims()), + memory::data_type::f32, + memory::format_tag::any); + auto wghDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(wghMemPtr->getStaticDims()), + memory::data_type::f32, + memory::format_tag::any); + auto outDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dstMemPtr->getStaticDims()), + memory::data_type::f32, + memory::format_tag::any); + + std::shared_ptr anyDeconvDesc = createDefaultMkldnnDeconvDesc(inDesc, wghDesc, outDesc, false); + auto anyDeconvItpd = anyDeconvDesc->createPrimitiveDescriptorIterator(getEngine(), *attr); + if (static_cast(anyDeconvItpd)) { + auto prim_desc = convolution_backward_data::primitive_desc(anyDeconvItpd.get()); + execPtr = std::make_shared(prim_desc, srcMemPtr, wghMemPtr, dstMemPtr, *attr, + binaryPostOpsArgs, getEngine()); + return; + } + } + } + IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; +} + +void MKLDNNDeconvolutionNode::prepareParams() { + auto srcMemPtr = getParentEdgesAtPort(0)[0]->getMemoryPtr(); + auto dstMemPtr = getChildEdgesAtPort(0)[0]->getMemoryPtr(); + if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) + IE_THROW() << "Destination memory didn't allocate."; + if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) + IE_THROW() << "Input memory didn't allocate."; + const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set for node " << getName() << "."; + + auto inMemoryDesc = getParentEdgesAtPort(0).front()->getMemory().GetDescWithType(); + auto outMemoryDesc = getChildEdgesAtPort(0).front()->getMemory().GetDescWithType(); + + auto initPrimitiveAttr = [&]() { + mkldnn::primitive_attr attr; + setPostOps(attr, dstMemPtr->getStaticDims()); + return std::make_shared(std::move(attr)); + }; + + AttrPtr pAttrLocal; + + if (isDynamicNode()) { + if (!pAttr) { + pAttr = initPrimitiveAttr(); + } + pAttrLocal = pAttr; + if (autoPad || externOutShape) { + initPadding(opToShapeInfer, inMemoryDesc->getShape(), externOutShape ? readOutputSpatialDims() : std::vector{}); + } + initPaddingR(inMemoryDesc->getShape(), outMemoryDesc->getShape()); + } else { + pAttrLocal = initPrimitiveAttr(); + } + + const auto in_candidate = inMemoryDesc->getDnnlDesc(); + const auto out_candidate = outMemoryDesc->getDnnlDesc(); + + mkldnn::memory::desc wgh_candidate; + if (isInt8) { + if (internalBlobMemory.empty()) { + wgh_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); + } else { + wgh_candidate = internalBlobMemory.front()->GetDescWithType()->getDnnlDesc(); + } + } else { + wgh_candidate = getParentEdgesAtPort(1).front()->getMemory().GetDescWithType()->getDnnlDesc(); + } + + std::shared_ptr desc; + if (isInt8) { + desc = createInt8MkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate); + } else { + desc = createDefaultMkldnnDeconvDesc(in_candidate, wgh_candidate, out_candidate, + selected_pd->getImplementationType() == MKLDNNPlugin::impl_desc_type::jit_avx512_winograd); + } + + createDeconvPrim(desc, srcMemPtr, getParentEdgesAtPort(1)[0]->getMemoryPtr(), dstMemPtr, pAttrLocal, selected_pd->getImplementationType()); +} + +void MKLDNNDeconvolutionNode::createPrimitive() { + if (inputShapesDefined()) { + if (needPrepareParams()) + prepareParams(); + updateLastInputDims(); + } +} + +MKLDNNDeconvolutionNode::DefaultDeconvDescs MKLDNNDeconvolutionNode::createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate, + const mkldnn::memory::desc& wgh_candidate, + const mkldnn::memory::desc& out_candidate, + mkldnn::algorithm alg) const { auto convertDims = [] (const std::vector& orig_dims) { return memory::dims(orig_dims.begin(), orig_dims.end()); }; + std::shared_ptr conv_desc; + conv_desc = std::make_shared(prop_kind::forward_inference, alg, + out_candidate, wgh_candidate, in_candidate, + convertDims(stride), + convertDims(dilation), + convertDims(paddingL), + convertDims(paddingR)); + + std::shared_ptr deconv_desc; + deconv_desc = std::make_shared(alg, out_candidate, wgh_candidate, + in_candidate, + convertDims(stride), + convertDims(dilation), + convertDims(paddingL), + convertDims(paddingR)); + + auto fwd_conv_pd = std::make_shared(*conv_desc, getEngine(), true); + + return {deconv_desc, fwd_conv_pd}; +} + +MKLDNNDeconvolutionNode::Int8DeconvDesc MKLDNNDeconvolutionNode::createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate, + const mkldnn::memory::desc& wgh_candidate, + const mkldnn::memory::desc& out_candidate) const { + auto convertDims = [] (const std::vector& orig_dims) { + return memory::dims(orig_dims.begin(), orig_dims.end()); + }; + + MKLDNNDeconvolutionNode::Int8DeconvDesc deconv_desc; + deconv_desc = std::make_shared(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, + in_candidate, wgh_candidate, out_candidate, + convertDims(stride), convertDims(dilation), + convertDims(paddingL), convertDims(paddingR)); + return deconv_desc; +} + +void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + auto inDesc = inputDesc[0]->isDefined() ? inputDesc[0] : inputDesc[0]->cloneWithNewDims(inShape.getStaticDims()); + auto dnnlInDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inDesc); + auto in_candidate = dnnlInDesc.getDnnlDesc(); + + auto outDesc = outputDesc[0]; + if (!outDesc->isDefined()) { + const auto outShape = shapeInferInternal(inDesc->getShape().getStaticDims(), lastOutputSpatialDims); + outDesc = outDesc->cloneWithNewDims(outShape); + } + auto dnnlOutDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outDesc); + auto out_candidate = dnnlOutDesc.getDnnlDesc(); + + // grouping and autoblocking is not compatible + if ((withGroups && !isDW) && (dnnlInDesc.blocksExtended() || dnnlOutDesc.blocksExtended())) + return; + if (isInt8) { - mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), memory::data_type::s8, memory::format_tag::any); - std::shared_ptr deconv_desc; - deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, - in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(), - convertDims(stride), convertDims(dilation), - convertDims(paddingL), convertDims(paddingR))); - descs.emplace_back(deconv_desc); + mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(int8WeightDims), memory::data_type::s8, memory::format_tag::any); + descs.emplace_back(createDescriptorInternalInt8(in_candidate, wgh_candidate, out_candidate)); } else { - mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(getWeightDims()), + dnnlInDesc.getDataType(), memory::format_tag::any); for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) { - std::shared_ptr conv_desc; - conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg, - out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(), - convertDims(stride), - convertDims(dilation), - convertDims(paddingL), - convertDims(paddingR))); - - std::shared_ptr deconv_desc; - deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate, - in_candidate.getDnnlDesc(), - convertDims(stride), - convertDims(dilation), - convertDims(paddingL), - convertDims(paddingR))); - descs_fwd.push_back(conv_desc); - descs_bwd.push_back(deconv_desc); - - auto fwd_conv_pd = std::make_shared(*conv_desc, getEngine(), true); + std::shared_ptr deconv_desc; + std::shared_ptr fwd_conv_pd; + std::tie(deconv_desc, fwd_conv_pd) = createDescriptorInternalDefault(in_candidate, wgh_candidate, out_candidate, alg); if (fwd_conv_pd->get(true) == nullptr) continue; - descs.emplace_back(deconv_desc, fwd_conv_pd); } } @@ -420,15 +702,25 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector std::shared_ptr MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx == 2) { - return std::make_shared(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims())); + return std::make_shared(InferenceEngine::Precision::I32, Shape(getInputShapeAtPort(2).getStaticDims())); + } else if (idx > 0 && isInt8) { + // we need to store 'weight' input as edge, + // because at this moment we can't simple replace internal blob with input, since we need to save weight data as is, but with different order + return std::make_shared(getOriginalInputPrecisionAtPort(idx), Shape(getInputShapeAtPort(idx).getStaticDims())); } auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx); + if (getInputShapeAtPort(idx).isDynamic()) { + return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getInputShapeAtPort(idx)); + } return MKLDNNExtensionUtils::makeDescriptor(desc); } std::shared_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx); + if (getOutputShapeAtPort(idx).isDynamic()) { + return MKLDNNExtensionUtils::makeUndefinedDesc(desc, getOutputShapeAtPort(idx)); + } return MKLDNNExtensionUtils::makeDescriptor(desc); } @@ -446,4 +738,117 @@ InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const return getMaxPrecision(inputPrecisions); } +MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::IntermReorder(MKLDNNMemoryPtr memFrom, + const mkldnn::memory::desc& descTo, + const mkldnn::engine& engine) : m_memFrom(memFrom) { + m_memTo = std::make_shared(engine); + m_memTo->Create(MKLDNNExtensionUtils::makeDescriptor(descTo)); + m_reorder = mkldnn::reorder(m_memFrom->GetPrimitive(), m_memTo->GetPrimitive()); +} + +MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::IntermReorder(const mkldnn::memory::desc& descFrom, + MKLDNNMemoryPtr memTo, + const mkldnn::engine& engine) : m_memTo(memTo) { + m_memFrom = std::make_shared(engine); + m_memFrom->Create(MKLDNNExtensionUtils::makeDescriptor(descFrom)); + m_reorder = mkldnn::reorder(m_memFrom->GetPrimitive(), m_memTo->GetPrimitive()); +} + +void MKLDNNDeconvolutionNode::DeconvExecutor::IntermReorder::exec(mkldnn::stream strm) { + auto src = m_memFrom->GetPrimitive(); + auto dst = m_memTo->GetPrimitive(); + m_reorder.execute(strm, src, dst); +} + +void MKLDNNDeconvolutionNode::DeconvExecutor::exec(mkldnn::stream strm) { + for (auto &inReorder : inputReorders) { + inReorder.exec(strm); + } + (*execPrim).execute(strm, primArgs); + for (auto &outReorder : outputReorders) { + outReorder.exec(strm); + } +} + +MKLDNNDeconvolutionNode::DeconvExecutorDefault::DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd, + MKLDNNMemoryPtr inMem, + MKLDNNMemoryPtr weightMem, + MKLDNNMemoryPtr outMem, + const mkldnn::primitive_attr &attr, + const std::vector& binPostOpsArgs, + const mkldnn::engine& engine) { + execPrim.reset(new mkldnn::convolution_backward_data(pd)); + + if (inMem->GetPrimitive().get_desc() != pd.diff_dst_desc()) { + inputReorders.push_back(IntermReorder(inMem, pd.diff_dst_desc(), engine)); + primArgs[DNNL_ARG_DIFF_DST] = inputReorders.back().getToMem()->GetPrimitive(); + } else { + primArgs[DNNL_ARG_DIFF_DST] = inMem->GetPrimitive(); + } + + if (weightMem->GetPrimitive().get_desc() != pd.weights_desc()) { + inputReorders.push_back(IntermReorder(weightMem, pd.weights_desc(), engine)); + primArgs[DNNL_ARG_WEIGHTS] = inputReorders.back().getToMem()->GetPrimitive(); + } else { + primArgs[DNNL_ARG_WEIGHTS] = weightMem->GetPrimitive(); + } + + if (outMem->GetPrimitive().get_desc() != pd.diff_src_desc()) { + outputReorders.push_back(IntermReorder(pd.diff_src_desc(), outMem, engine)); + primArgs[DNNL_ARG_DIFF_SRC] = outputReorders.back().getFromMem()->GetPrimitive(); + } else { + primArgs[DNNL_ARG_DIFF_SRC] = outMem->GetPrimitive(); + } + MKLDNNNode::appendPostOpArgs(attr, primArgs, binPostOpsArgs); +} + +MKLDNNDeconvolutionNode::DeconvExecutorInt8::DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd, + MKLDNNMemoryPtr inMem, + MKLDNNMemoryPtr weightMem, + MKLDNNMemoryPtr outMem, + const mkldnn::primitive_attr &attr, + const std::vector& binPostOpsArgs, + const mkldnn::engine& engine) { + execPrim.reset(new mkldnn::deconvolution_forward(pd)); + + if (inMem->GetPrimitive().get_desc() != pd.src_desc()) { + inputReorders.push_back(IntermReorder(inMem, pd.src_desc(), engine)); + primArgs[DNNL_ARG_SRC] = inputReorders.back().getToMem()->GetPrimitive(); + } else { + primArgs[DNNL_ARG_SRC] = inMem->GetPrimitive(); + } + + if (weightMem->GetPrimitive().get_desc() != pd.weights_desc()) { + inputReorders.push_back(IntermReorder(weightMem, pd.weights_desc(), engine)); + primArgs[DNNL_ARG_WEIGHTS] = inputReorders.back().getToMem()->GetPrimitive(); + } else { + primArgs[DNNL_ARG_WEIGHTS] = weightMem->GetPrimitive(); + } + + if (outMem->GetPrimitive().get_desc() != pd.dst_desc()) { + outputReorders.push_back(IntermReorder(pd.dst_desc(), outMem, engine)); + primArgs[DNNL_ARG_DST] = outputReorders.back().getFromMem()->GetPrimitive(); + } else { + primArgs[DNNL_ARG_DST] = outMem->GetPrimitive(); + } + MKLDNNNode::appendPostOpArgs(attr, primArgs, binPostOpsArgs); +} + +std::vector MKLDNNDeconvolutionNode::readOutputSpatialDims() const { + if (getParentEdges().size() < 3) { + IE_THROW() << "Can't get output spatial dims. Inputs number = " << getParentEdges().size(); + } + const auto &shapeMemPtr = getParentEdgesAtPort(2)[0]->getMemoryPtr(); + if (!shapeMemPtr || !shapeMemPtr->GetPrimitivePtr()) { + IE_THROW() << "'output_shape' input memory is not allocated."; + } + const auto spDimsNum = getInputShapeAtPort(0).getRank() - 2; + if (shapeMemPtr->getStaticDims()[0] != spDimsNum) { + IE_THROW() << "Can't read output spatial dims, beause 'output_shape' input has incorrect number of elements"; + } + const int32_t *outShapePtr = reinterpret_cast(shapeMemPtr->GetPtr()); + std::vector outSpDims(outShapePtr, outShapePtr + shapeMemPtr->getStaticDims()[0]); + return outSpDims; +} + REG_MKLDNN_PRIM_FOR(MKLDNNDeconvolutionNode, Deconvolution); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index 32837b4d59c..6af4a3d35ed 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -13,6 +13,10 @@ namespace MKLDNNPlugin { class MKLDNNDeconvolutionNode : public MKLDNNNode { + using DefaultDeconvDescs = std::pair, + std::shared_ptr>; + using Int8DeconvDesc = std::shared_ptr; + public: MKLDNNDeconvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); @@ -39,27 +43,120 @@ public: static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; bool canFuse(const MKLDNNNodePtr& node) const override; - const InferenceEngine::SizeVector& getWeightDims() { return weightDims; } - const std::vector& getStride() { return stride; } + const VectorDims& getWeightDims() const { return getInputShapeAtPort(1).getStaticDims(); } + const std::vector& getStride() const { return stride; } + + void prepareParams() override; + void execute(mkldnn::stream strm) override; + void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); } + bool needShapeInfer() const override; + std::vector shapeInfer() const override; private: + class DeconvExecutor { + protected: + class IntermReorder { + public: + IntermReorder(MKLDNNMemoryPtr memFrom, const mkldnn::memory::desc& descTo, const mkldnn::engine& engine); + IntermReorder(const mkldnn::memory::desc& descFrom, MKLDNNMemoryPtr memTo, const mkldnn::engine& engine); + MKLDNNMemoryPtr getFromMem() const { return m_memFrom; } + MKLDNNMemoryPtr getToMem() const { return m_memTo; } + void exec(mkldnn::stream strm); + + private: + MKLDNNMemoryPtr m_memFrom; + MKLDNNMemoryPtr m_memTo; + mkldnn::reorder m_reorder; + }; + + public: + void exec(mkldnn::stream strm); + virtual ~DeconvExecutor() = default; + + protected: + DeconvExecutor() = default; + std::vector inputReorders; + MKLDNNPrimitive execPrim; + std::vector outputReorders; + std::unordered_map primArgs; + }; + + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; + + class DeconvExecutorDefault : public DeconvExecutor { + public: + DeconvExecutorDefault(const mkldnn::convolution_backward_data::primitive_desc& pd, + MKLDNNMemoryPtr inMem, + MKLDNNMemoryPtr weightMem, + MKLDNNMemoryPtr outMem, + const mkldnn::primitive_attr &attr, + const std::vector& binPostOpsArgs, + const mkldnn::engine& engine); + }; + + class DeconvExecutorInt8 : public DeconvExecutor { + public: + DeconvExecutorInt8(const mkldnn::deconvolution_forward::primitive_desc& pd, + MKLDNNMemoryPtr inMem, + MKLDNNMemoryPtr weightMem, + MKLDNNMemoryPtr outMem, + const mkldnn::primitive_attr &attr, + const std::vector& binPostOpsArgs, + const mkldnn::engine& engine); + }; + bool withGroups = false; bool isDW = false; bool isInt8 = false; + bool autoPad = false; + bool externOutShape = false; size_t groupNum = 1; size_t IC; size_t OC; std::vector kernel; std::vector stride; std::vector dilation; - std::vector paddingL; - std::vector paddingR; - InferenceEngine::SizeVector weightDims; - std::vector> descs_fwd; - std::vector> descs_bwd; + ov::CoordinateDiff paddingL; + ov::CoordinateDiff paddingR; + ov::CoordinateDiff outputPadding; + std::vector lastOutputSpatialDims; + VectorDims int8WeightDims; + + Shape inShape; + + AttrPtr pAttr; mkldnn::primitive_attr attr; - void setPostOps(mkldnn::primitive_attr &attr); + void setPostOps(mkldnn::primitive_attr &attr, const VectorDims &dims); + + VectorDims shapeInferInternal(const VectorDims &inDims, std::vector outSpDims) const; + void initPadding(std::shared_ptr op, const Shape &inShape, const std::vector& outSpDims); + void initPaddingR(const Shape &inShape, const Shape &outShape); + std::vector readOutputSpatialDims() const; + std::pair makeDummyInOutShape(); + + DefaultDeconvDescs createDescriptorInternalDefault(const mkldnn::memory::desc& in_candidate, + const mkldnn::memory::desc& wgh_candidate, + const mkldnn::memory::desc& out_candidate, + mkldnn::algorithm alg) const; + Int8DeconvDesc createDescriptorInternalInt8(const mkldnn::memory::desc& in_candidate, + const mkldnn::memory::desc& wgh_candidate, + const mkldnn::memory::desc& out_candidate) const; + std::shared_ptr createDefaultMkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc, + bool isWinograd) const; + std::shared_ptr createInt8MkldnnDeconvDesc(const mkldnn::memory::desc& srcDesc, + const mkldnn::memory::desc& wghDesc, + const mkldnn::memory::desc& dstDesc) const; + + void createDeconvPrim(std::shared_ptr desc, + MKLDNNMemoryPtr srcMemPtr, + MKLDNNMemoryPtr wghMemPtr, + MKLDNNMemoryPtr dstMemPtr, + AttrPtr attr, + impl_desc_type selectedImpl); std::string errorPrefix; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index 8eaea33af95..97c509083a3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -147,7 +147,7 @@ void MKLDNNFullyConnectedNode::createPrimitive() { else primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getParentEdgeAt(WEIGHTS_ID)->getMemory().GetPrimitive()}, {DNNL_ARG_DST, dst}}; - appendPostOpArgs(*attr); + appendPostOpArgs(*attr, primArgs, binaryPostOpsArgs); } void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index 944f65ff5f0..c443eedf2c2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -421,7 +421,7 @@ void MKLDNNMatMulNode::prepareParams() { if (withBiases) primArgs[DNNL_ARG_BIAS] = getParentEdgeAt(2)->getMemoryPtr()->GetPrimitive(); - appendPostOpArgs(*attr); + appendPostOpArgs(*attr, primArgs, binaryPostOpsArgs); } void MKLDNNMatMulNode::executeDynamicImpl(dnnl::stream strm) { diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 55772b5938a..e9d0e22bc4e 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -152,6 +152,7 @@ std::vector disabledTestPatterns() { // bad accuracy R"(.*smoke_FakeQuantizeLayerCPUTest_Decompos. *IS=_TS=\(\(4\.5\.6\.7\)\)_RS=\(\(1\.1\.6\.1\)\)_\(\(1\.5\.6\.1\)\)_\(\(1\.1\.1\.1\)\)_\(\(1\.1\.6\.1\)\).*)", + // Issue: 71121 R"(.*smoke_Proposal*.*TS=\(2.*)", // TODO : CVS-69533 @@ -165,6 +166,9 @@ std::vector disabledTestPatterns() { // Failure happened on win and macos for current seeds. R"(.*CTCLossLayerTest.*CMR=1.*)", R"(.*CTCLossLayerCPUTest.*ctcMergeRepeated=1.*)", + // Issue: 71756 + R"(.*Deconv_.*D_(Blocked|DW|1x1)_.*DeconvolutionLayerCPUTest\.CompareWithRefs.*inFmts=(nChw16c|nCdhw16c)_outFmts=(nChw16c|nCdhw16c)_primitive=jit_avx512_.*Fused=Multiply\(PerChannel\)\.Add\(PerChannel\).*)", + R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)" }; #define FIX_62820 0 diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp index d4a2f3d414f..52d50c6f07e 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/convolution_backprop_data.cpp @@ -5,38 +5,78 @@ #include "test_utils/cpu_test_utils.hpp" #include "test_utils/convolution_params.hpp" #include "test_utils/fusing_test_utils.hpp" -#include "shared_test_classes/base/layer_test_utils.hpp" -#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "functional_test_utils/ov_tensor_utils.hpp" #include "ngraph_functions/builders.hpp" #include +#include "openvino/core/preprocess/pre_post_process.hpp" - -using namespace InferenceEngine; using namespace CPUTestUtils; +using namespace ov::test; namespace CPULayerTestsDefinitions { -using LayerTestsDefinitions::convBackpropDataSpecificParams; -using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet; -typedef std::tuple< - convBackpropDataLayerTestParamsSet, - CPUSpecificParams, - fusingSpecificParams, - std::map > deconvLayerCPUTestParamsSet; +using DeconvSpecParams = LayerTestsDefinitions::convBackpropDataSpecificParams; -class DeconvolutionLayerCPUTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { +using DeconvInputData = std::tuple>>; // values for 'output_shape' + +using DeconvLayerCPUTestParamsSet = std::tuple>; + +class DeconvolutionLayerCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CpuTestWithFusing { public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - convBackpropDataLayerTestParamsSet basicParamsSet; - CPUSpecificParams cpuParams; + static std::string getTestCaseName(testing::TestParamInfo obj) { + DeconvSpecParams basicParamsSet; + DeconvInputData inputData; + ElementType prec; fusingSpecificParams fusingParams; + CPUSpecificParams cpuParams; std::map additionalConfig; - std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; + std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = obj.param; + + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd, outPadding; + size_t convOutChannels; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = basicParamsSet; + + InputShape inputShape; + ngraph::helpers::InputLayerType outShapeType; + std::vector> outShapeData; + std::tie(inputShape, outShapeType, outShapeData) = inputData; std::ostringstream result; - result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo( - basicParamsSet, 0)); + result << "IS="; + result << CommonTestUtils::partialShape2str({inputShape.first}) << "_"; + result << "TS="; + for (const auto& shape : inputShape.second) { + result << "("; + result << CommonTestUtils::vec2str(shape); + result << ")_"; + } + result << "PRC=" << prec << "_"; + result << "K=" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S=" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_"; + result << "O=" << convOutChannels << "_"; + result << "AP=" << padType << "_"; + result << "OUT_SH=" << outShapeType << "_"; + result << "OUT_D="; + for (const auto& data : outShapeData) { + result << "("; + result << CommonTestUtils::vec2str(data); + result << ")_"; + } result << CPUTestsBase::getTestCaseName(cpuParams); result << CpuTestWithFusing::getTestCaseName(fusingParams); @@ -50,53 +90,159 @@ public: return result.str(); } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (int i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::runtime::Tensor tensor; + + if (i == 1) { + tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], outShapeData[inferRequestNum].data()); + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } + + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + + void init_ref_function(std::shared_ptr &funcRef, const std::vector& targetInputStaticShapes) override { + if (function->get_parameters().size() == 1) { + ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); + } else { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } + } + + void validate() override { + if (function->get_parameters().size() == 2) { + auto pos = std::find_if(inputs.begin(), inputs.end(), + [](const std::pair, ov::runtime::Tensor> ¶ms) { + return params.first->get_friendly_name() == "param_1"; + }); + IE_ASSERT(pos != inputs.end()); + inputs.erase(pos); + } + SubgraphBaseTest::validate(); + } + + void configure_model() override { + ov::preprocess::PrePostProcessor p(function); + { + auto& params = function->get_parameters(); + for (size_t i = 0; i < params.size(); i++) { + if (i > 0) { + continue; + } + if (inType != ov::element::Type_t::undefined) { + p.input(i).tensor().set_element_type(inType); + } + } + } + { + auto results = function->get_results(); + for (size_t i = 0; i < results.size(); i++) { + if (outType != ov::element::Type_t::undefined) { + p.output(i).tensor().set_element_type(outType); + } + } + } + function = p.build(); + } + + std::shared_ptr createGraph(const std::vector& inShapes, ngraph::helpers::InputLayerType outShapeType) { + auto params = ngraph::builder::makeDynamicParams(prec, {inShapes.front()}); + std::shared_ptr outShapeNode; + if (!outShapeData.empty()) { + if (outShapeType == ngraph::helpers::InputLayerType::PARAMETER) { + IE_ASSERT(inputDynamicShapes.size() == 2); + auto outShapeParam = std::make_shared(ngraph::element::i32, inputDynamicShapes.back()); + params.push_back(outShapeParam); + outShapeNode = outShapeParam; + } else { + outShapeNode = ngraph::opset8::Constant::create(ngraph::element::i32, {outShapeData[inferRequestNum].size()}, outShapeData[inferRequestNum]); + } + } + + for (size_t i = 0; i < params.size(); i++) { + params[i]->set_friendly_name(std::string("param_") + std::to_string(i)); + } + + std::shared_ptr deconv; + if (!outShapeData.empty()) { + IE_ASSERT(outShapeNode != nullptr); + deconv = ngraph::builder::makeConvolutionBackpropData(params[0], outShapeNode, prec, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels); + } else { + deconv = ngraph::builder::makeConvolutionBackpropData(params[0], prec, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, false, outPadding); + } + + return makeNgraphFunction(prec, params, deconv, "DeconvCPU"); + } + protected: InferenceEngine::SizeVector kernel, stride; + void SetUp() override { - convBackpropDataLayerTestParamsSet basicParamsSet; - CPUSpecificParams cpuParams; + rel_threshold = 1e-4f; + + targetDevice = CommonTestUtils::DEVICE_CPU; + + DeconvSpecParams basicParamsSet; + DeconvInputData inputData; fusingSpecificParams fusingParams; + CPUSpecificParams cpuParams; std::map additionalConfig; - std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam(); + std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = this->GetParam(); + + InputShape inputShape; + ngraph::helpers::InputLayerType outShapeType; + std::tie(inputShape, outShapeType, outShapeData) = inputData; configuration.insert(additionalConfig.begin(), additionalConfig.end()); - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; std::tie(postOpMgrPtr, fusedOps) = fusingParams; - convBackpropDataSpecificParams convParams; - std::vector inputShape; - std::vector outputShape; - auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = basicParamsSet; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = basicParamsSet; - if (inPrc == Precision::UNSPECIFIED) { - selectedType += std::string("_") + Precision(Precision::FP32).name(); + if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { + inType = outType = prec = ElementType::bf16; + rel_threshold = 1e-2f; } else { - selectedType += std::string("_") + inPrc.name(); + inType = outType = prec; } - ngraph::op::PadType padType; - InferenceEngine::SizeVector dilation; - std::vector padBegin, padEnd, outPadding; - size_t convOutChannels; - std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType, outPadding) = convParams; - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + selectedType = makeSelectedTypeStr(selectedType, prec); - auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape }); - auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes(inputParams)); - - auto deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), ngPrc, kernel, stride, padBegin, - padEnd, dilation, padType, convOutChannels, false, outPadding); - - if (!outputShape.empty()) { - auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); - deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), outShape, ngPrc, kernel, stride, padBegin, - padEnd, dilation, padType, convOutChannels); + std::vector paramsShapes; + paramsShapes.push_back(inputShape); + if (!outShapeData.empty() && outShapeType == ngraph::helpers::InputLayerType::PARAMETER) { + const auto outShapeDims = ov::Shape{outShapeData.front().size()}; + paramsShapes.push_back(InputShape{outShapeDims, std::vector(inputShape.second.size(), outShapeDims)}); } - function = makeNgraphFunction(ngPrc, inputParams, deconvolutionNode, "convolutionBackpropData"); + init_input_shapes(paramsShapes); + + function = createGraph(inputDynamicShapes, outShapeType); } + +private: + ElementType prec; + ngraph::op::PadType padType; + InferenceEngine::SizeVector dilation; + std::vector padBegin, padEnd, outPadding; + size_t convOutChannels; + ngraph::helpers::InputLayerType outShapeType; + std::vector> outShapeData; + size_t inferRequestNum = 0; }; TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) { @@ -113,7 +259,7 @@ TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) { } } - Run(); + run(); CheckPluginRelatedResults(executableNetwork, "Deconvolution"); } @@ -126,29 +272,29 @@ const std::vector fusingParamsSet{ }; const std::map cpuEmptyPluginConfig; -const std::map cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } }; -const std::vector emptyOutputShape = { {} }; +const std::mapcpuBF16PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, + InferenceEngine::PluginConfigParams::YES } }; const std::vector> emptyOutputPadding = { {} }; /* ============= Deconvolution params (planar layout) ============= */ -const SizeVector numOutChannels_Planar = { 6 }; +const InferenceEngine::SizeVector numOutChannels_Planar = { 6 }; /* ============= Deconvolution params (blocked layout) ============= */ -const SizeVector numOutChannels_Blocked = { 64 }; +const InferenceEngine::SizeVector numOutChannels_Blocked = { 64 }; /* ============= Deconvolution params (2D) ============= */ -const std::vector kernels2d = { {3, 3}, {1, 1} }; -const std::vector strides2d = { {1, 1}, {2, 2} }; +const std::vector kernels2d = { {3, 3}, {1, 1} }; +const std::vector strides2d = { {1, 1}, {2, 2} }; const std::vector> padBegins2d = { {0, 0} }; const std::vector> padEnds2d = { {0, 0} }; -const std::vector dilations2d = { {1, 1} }; +const std::vector dilations2d = { {1, 1} }; /* ============= Deconvolution params (3D) ============= */ -const std::vector kernels3d = { {3, 3, 3}, {1, 1, 1} }; -const std::vector strides3d = { {1, 1, 1}, {2, 2, 2} }; +const std::vector kernels3d = { {3, 3, 3}, {1, 1, 1} }; +const std::vector strides3d = { {1, 1, 1}, {2, 2, 2} }; const std::vector> padBegins3d = { {0, 0, 0} }; const std::vector> padEnds3d = { {0, 0, 0} }; -const std::vector dilations3d = { {1, 1, 1} }; +const std::vector dilations3d = { {1, 1, 1} }; /* ============= */ /* INSTANCES */ @@ -164,41 +310,99 @@ const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine( ::testing::ValuesIn(emptyOutputPadding) ); +const std::vector Planar_2D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 12, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {9, 9}} + } +}; + +const std::vector Planar_2D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15}} + } +}; + INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Planar_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + convParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), ::testing::Values(cpuEmptyPluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Planar_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::BF16), - ::testing::Values(Precision::BF16), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + convParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), ::testing::Values(cpuBF16PluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); -/* ============= GroupDeconvolution (Planar 3D) ============= */ +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= Deconvolution (Planar 3D) ============= */ +const std::vector Planar_3D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 12, 7, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15, 15}, {9, 10, 10}, {9, 9, 9}} + } +}; + +const std::vector Planar_3D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15, 15}} + } +}; + const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine( ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), @@ -212,39 +416,71 @@ const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Planar_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + convParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), ::testing::Values(cpuEmptyPluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Planar_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::BF16), - ::testing::Values(Precision::BF16), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + convParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), ::testing::Values(cpuBF16PluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); -/* ============= GroupDeconvolution (Blocked 2D) ============= */ +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= Deconvolution (Blocked 2D) ============= */ +const std::vector Blocked_2D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 67, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {9, 9}} + } +}; + +const std::vector Blocked_2D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15}} + } +}; + const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine( ::testing::ValuesIn(kernels2d), ::testing::ValuesIn(strides2d), @@ -258,92 +494,129 @@ const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Blocked_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 67, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + convParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), ::testing::Values(cpuEmptyPluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Blocked_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::BF16), - ::testing::Values(Precision::BF16), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 67, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + convParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), ::testing::Values(cpuBF16PluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); -/* ============= GroupDeconvolution (Blocked 3D) ============= */ +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= Deconvolution (Blocked 3D) ============= */ +const std::vector Blocked_3D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 35, 7, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{7, 7, 7}, {7, 9, 7}} + } +}; + +const std::vector Blocked_3D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 35, -1, -1, -1}, {{ 1, 35, 5, 5, 5}, { 2, 35, 5, 7, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{7, 7, 7}} + } +}; + const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine( ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), ::testing::ValuesIn(padBegins3d), ::testing::ValuesIn(padEnds3d), ::testing::ValuesIn(dilations3d), - ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::Values(32), ::testing::Values(ngraph::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) ); INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Blocked_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 67, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + convParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), ::testing::Values(cpuEmptyPluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_Blocked_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::BF16), - ::testing::Values(Precision::BF16), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 67, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + convParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::Values(cpuBF16PluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest, + ::testing::Combine( + convParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), ::testing::Values(cpuBF16PluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); /* ============= Kernel_1x1 (2D) ============= */ - const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine( - ::testing::Values(SizeVector({1, 1})), - ::testing::Values(SizeVector({1, 1})), + ::testing::Values(InferenceEngine::SizeVector({1, 1})), + ::testing::Values(InferenceEngine::SizeVector({1, 1})), ::testing::Values(std::vector({0, 0})), ::testing::Values(std::vector({0, 0})), - ::testing::Values(SizeVector({1, 1})), + ::testing::Values(InferenceEngine::SizeVector({1, 1})), ::testing::ValuesIn(numOutChannels_Blocked), ::testing::Values(ngraph::op::PadType::EXPLICIT), ::testing::ValuesIn(emptyOutputPadding) @@ -351,39 +624,89 @@ const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_1x1_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Precision::UNSPECIFIED), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 67, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), + convParams_ExplicitPadding_1x1_2D, + ::testing::ValuesIn(Blocked_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), ::testing::Values(cpuEmptyPluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_1x1_BF16, DeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - convParams_ExplicitPadding_1x1_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(Precision::BF16), - ::testing::Values(Precision::BF16), - ::testing::Values(Layout::ANY), - ::testing::Values(Layout::ANY), - ::testing::Values(std::vector({ 2, 67, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), + convParams_ExplicitPadding_1x1_2D, + ::testing::ValuesIn(Blocked_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})), ::testing::Values(cpuBF16PluginConfig)), DeconvolutionLayerCPUTest::getTestCaseName); -/* ========= */ +/* ============= Reorder + Deconvolution ============= */ +INSTANTIATE_TEST_SUITE_P(smoke_reorder_Deconv_2D, DeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine(::testing::ValuesIn(kernels2d), + ::testing::Values(InferenceEngine::SizeVector{1, 1}), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding)), + ::testing::Values(DeconvInputData{InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {9, 9}}}), + ::testing::Values(ElementType::f32), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= Deconvolution auto padding tests ============= */ +const std::vector inputs_2D_AutoPadding = { + DeconvInputData{ + InputShape{{}, {{ 2, 67, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15}} + }, + DeconvInputData{ + InputShape{{-1, 67, -1, -1}, {{ 2, 67, 7, 7}, { 2, 67, 5, 7}, { 1, 67, 9, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {9, 9}} + } +}; + +const auto deconvParams_AutoPadding_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::Values(ngraph::op::PadType::SAME_UPPER, ngraph::op::PadType::SAME_LOWER), + ::testing::ValuesIn(emptyOutputPadding) +); + +INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_AutoPadding_FP32, DeconvolutionLayerCPUTest, + ::testing::Combine( + deconvParams_AutoPadding_2D, + ::testing::ValuesIn(inputs_2D_AutoPadding), + ::testing::Values(ElementType::f32), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D, conv_avx512_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + DeconvolutionLayerCPUTest::getTestCaseName); } // namespace + } // namespace CPULayerTestsDefinitions diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp index fb13870bc17..5ddfa950376 100755 --- a/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/group_convolution_backprop_data.cpp @@ -2,38 +2,82 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "test_utils/cpu_test_utils.hpp" #include "test_utils/convolution_params.hpp" #include "test_utils/fusing_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "functional_test_utils/ov_tensor_utils.hpp" +#include "ngraph_functions/builders.hpp" +#include +#include "openvino/core/preprocess/pre_post_process.hpp" -using namespace InferenceEngine; using namespace CPUTestUtils; +using namespace ov::test; namespace CPULayerTestsDefinitions { -using groupConvBackpropDataLayerTestParamsSet = LayerTestsDefinitions::groupConvBackpropLayerTestParamsSet; -using groupConvBackpropDataSpecificParams = LayerTestsDefinitions::groupConvBackpropSpecificParams; +using GroupDeconvSpecParams = LayerTestsDefinitions::groupConvBackpropSpecificParams; -typedef std::tuple< - groupConvBackpropDataLayerTestParamsSet, - CPUSpecificParams, - fusingSpecificParams, - std::map> groupDeconvLayerCPUTestParamsSet; +using DeconvInputData = std::tuple>>; // values for 'output_shape' -class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface, - virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing { +using GroupDeconvLayerCPUTestParamsSet = std::tuple>; + +class GroupDeconvolutionLayerCPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest, public CpuTestWithFusing { public: - static std::string getTestCaseName(testing::TestParamInfo obj) { - groupConvBackpropDataLayerTestParamsSet basicParamsSet; + static std::string getTestCaseName(testing::TestParamInfo obj) { + GroupDeconvSpecParams basicParamsSet; + DeconvInputData inputData; + ElementType prec; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; - std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param; + std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = obj.param; + + ngraph::op::PadType padType; + InferenceEngine::SizeVector kernel, stride, dilation; + std::vector padBegin, padEnd, outPadding; + size_t convOutChannels, groupNum; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, groupNum, padType, outPadding) = basicParamsSet; + + InputShape inputShape; + ngraph::helpers::InputLayerType outShapeType; + std::vector> outShapeData; + std::tie(inputShape, outShapeType, outShapeData) = inputData; std::ostringstream result; - result << LayerTestsDefinitions::GroupConvBackpropLayerTest::getTestCaseName(testing::TestParamInfo( - basicParamsSet, 0)); + result << "IS="; + result << CommonTestUtils::partialShape2str({inputShape.first}) << "_"; + result << "TS="; + for (const auto& shape : inputShape.second) { + result << "("; + result << CommonTestUtils::vec2str(shape); + result << ")_"; + } + result << "PRC=" << prec << "_"; + result << "K=" << CommonTestUtils::vec2str(kernel) << "_"; + result << "S=" << CommonTestUtils::vec2str(stride) << "_"; + result << "PB=" << CommonTestUtils::vec2str(padBegin) << "_"; + result << "PE=" << CommonTestUtils::vec2str(padEnd) << "_"; + result << "D=" << CommonTestUtils::vec2str(dilation) << "_"; + result << "OP=" << CommonTestUtils::vec2str(outPadding) << "_"; + result << "O=" << convOutChannels << "_"; + result << "G=" << groupNum << "_"; + result << "AP=" << padType << "_"; + result << "OUT_SH=" << outShapeType << "_"; + result << "OUT_D="; + for (const auto& data : outShapeData) { + result << "("; + result << CommonTestUtils::vec2str(data); + result << ")_"; + } result << CPUTestsBase::getTestCaseName(cpuParams); result << CpuTestWithFusing::getTestCaseName(fusingParams); @@ -48,54 +92,159 @@ public: return result.str(); } + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (int i = 0; i < funcInputs.size(); ++i) { + const auto& funcInput = funcInputs[i]; + ov::runtime::Tensor tensor; + + if (i == 1) { + tensor = ov::runtime::Tensor(funcInput.get_element_type(), targetInputStaticShapes[i], outShapeData[inferRequestNum].data()); + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i], 2560, 0, 256); + } + + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + inferRequestNum++; + } + + void init_ref_function(std::shared_ptr &funcRef, const std::vector& targetInputStaticShapes) override { + if (function->get_parameters().size() == 1) { + ngraph::helpers::resize_function(funcRef, targetInputStaticShapes); + } else { + // WA: output_shape depends on 3rd deconvolution input data + // but the reference implementation doesn't implement shape inference + // so we need to build a new ngraph function and replace the 3rd input parameter with a constant + // to get valid output shapes + funcRef = createGraph({targetInputStaticShapes[0]}, ngraph::helpers::InputLayerType::CONSTANT); + } + } + + void validate() override { + if (function->get_parameters().size() == 2) { + auto pos = std::find_if(inputs.begin(), inputs.end(), + [](const std::pair, ov::runtime::Tensor> ¶ms) { + return params.first->get_friendly_name() == "param_1"; + }); + IE_ASSERT(pos != inputs.end()); + inputs.erase(pos); + } + SubgraphBaseTest::validate(); + } + + void configure_model() override { + ov::preprocess::PrePostProcessor p(function); + { + auto& params = function->get_parameters(); + for (size_t i = 0; i < params.size(); i++) { + if (i > 0) { + continue; + } + if (inType != ov::element::Type_t::undefined) { + p.input(i).tensor().set_element_type(inType); + } + } + } + { + auto results = function->get_results(); + for (size_t i = 0; i < results.size(); i++) { + if (outType != ov::element::Type_t::undefined) { + p.output(i).tensor().set_element_type(outType); + } + } + } + function = p.build(); + } + + std::shared_ptr createGraph(const std::vector& inShapes, ngraph::helpers::InputLayerType outShapeType) { + auto params = ngraph::builder::makeDynamicParams(prec, {inShapes.front()}); + std::shared_ptr outShapeNode; + if (!outShapeData.empty()) { + if (outShapeType == ngraph::helpers::InputLayerType::PARAMETER) { + IE_ASSERT(inputDynamicShapes.size() == 2); + auto outShapeParam = std::make_shared(ngraph::element::i32, inputDynamicShapes.back()); + params.push_back(outShapeParam); + outShapeNode = outShapeParam; + } else { + outShapeNode = ngraph::opset8::Constant::create(ngraph::element::i32, {outShapeData[inferRequestNum].size()}, outShapeData[inferRequestNum]); + } + } + + for (size_t i = 0; i < params.size(); i++) { + params[i]->set_friendly_name(std::string("param_") + std::to_string(i)); + } + + std::shared_ptr deconv; + if (!outShapeData.empty()) { + IE_ASSERT(outShapeNode != nullptr); + deconv = ngraph::builder::makeGroupConvolutionBackpropData(params[0], outShapeNode, prec, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, groupNum); + } else { + deconv = ngraph::builder::makeGroupConvolutionBackpropData(params[0], prec, kernel, stride, padBegin, + padEnd, dilation, padType, convOutChannels, groupNum, false, outPadding); + } + + return makeNgraphFunction(prec, params, deconv, "GroupDeconvCPU"); + } + protected: InferenceEngine::SizeVector kernel, stride; + void SetUp() override { - groupConvBackpropDataLayerTestParamsSet basicParamsSet; + rel_threshold = 1e-4f; + + targetDevice = CommonTestUtils::DEVICE_CPU; + + GroupDeconvSpecParams basicParamsSet; + DeconvInputData inputData; CPUSpecificParams cpuParams; fusingSpecificParams fusingParams; std::map additionalConfig; - std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam(); + std::tie(basicParamsSet, inputData, prec, fusingParams, cpuParams, additionalConfig) = this->GetParam(); configuration.insert(additionalConfig.begin(), additionalConfig.end()); - - std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; std::tie(postOpMgrPtr, fusedOps) = fusingParams; - groupConvBackpropDataSpecificParams groupConvParams; - std::vector inputShape, outputShape; - auto netPrecision = InferenceEngine::Precision::UNSPECIFIED; - std::tie(groupConvParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, outputShape, targetDevice) = basicParamsSet; + std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, groupNum, padType, outPadding) = basicParamsSet; - if (inPrc == Precision::UNSPECIFIED) { - selectedType += std::string("_") + Precision(Precision::FP32).name(); + InputShape inputShape; + ngraph::helpers::InputLayerType outShapeType; + std::tie(inputShape, outShapeType, outShapeData) = inputData; + + std::tie(inFmts, outFmts, priority, selectedType) = cpuParams; + + if (additionalConfig[InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16] == InferenceEngine::PluginConfigParams::YES) { + inType = outType = prec = ElementType::bf16; + rel_threshold = 1e-2f; } else { - selectedType += std::string("_") + inPrc.name(); + inType = outType = prec; } - ngraph::op::PadType padType; - InferenceEngine::SizeVector dilation; - std::vector padBegin, padEnd, outputPadding; - size_t convOutChannels, numGroups; - std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, numGroups, padType, outputPadding) = groupConvParams; + selectedType = makeSelectedTypeStr(selectedType, prec); - auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); - auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); - auto paramOuts = ngraph::helpers::convert2OutputVector( - ngraph::helpers::castOps2Nodes(params)); - std::shared_ptr groupConv; - if (!outputShape.empty()) { - auto outShape = ngraph::opset3::Constant::create(ngraph::element::i64, {outputShape.size()}, outputShape); - groupConv = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], outShape, ngPrc, kernel, stride, padBegin, - padEnd, dilation, padType, convOutChannels, numGroups, false, outputPadding)); - } else { - groupConv = std::dynamic_pointer_cast( - ngraph::builder::makeGroupConvolutionBackpropData(paramOuts[0], ngPrc, kernel, stride, padBegin, - padEnd, dilation, padType, convOutChannels, numGroups, false, outputPadding)); + std::vector paramsShapes; + paramsShapes.push_back(inputShape); + if (!outShapeData.empty() && outShapeType == ngraph::helpers::InputLayerType::PARAMETER) { + const auto outShapeDims = ov::Shape{outShapeData.front().size()}; + paramsShapes.push_back(InputShape{outShapeDims, std::vector(inputShape.second.size(), outShapeDims)}); } - function = makeNgraphFunction(ngPrc, params, groupConv, "groupConvolutionBackpropData"); + + init_input_shapes(paramsShapes); + + function = createGraph(inputDynamicShapes, outShapeType); } + +private: + ElementType prec; + ngraph::op::PadType padType; + InferenceEngine::SizeVector dilation; + std::vector padBegin, padEnd, outPadding; + size_t convOutChannels, groupNum; + ngraph::helpers::InputLayerType outShapeType; + std::vector> outShapeData; + size_t inferRequestNum = 0; }; TEST_P(GroupDeconvolutionLayerCPUTest, CompareWithRefs) { @@ -112,75 +261,80 @@ TEST_P(GroupDeconvolutionLayerCPUTest, CompareWithRefs) { } } - Run(); + run(); CheckPluginRelatedResults(executableNetwork, "Deconvolution"); } namespace { -/* GROUP CONV TEST UTILS */ -std::vector filterParamsSetForDevice(std::vector paramsSet) { - std::vector resParamsSet; - const int cpuParamsIndex = 1; - const int selectedTypeIndex = 3; - - for (auto param : paramsSet) { - auto cpuParams = std::get(param); - auto selectedTypeStr = std::get(cpuParams); - - if (selectedTypeStr.find("jit") != std::string::npos && !with_cpu_x86_sse42()) - continue; - if (selectedTypeStr.find("avx512") != std::string::npos && !with_cpu_x86_avx512f()) - continue; - - resParamsSet.push_back(param); - } - - return resParamsSet; -} -/* ===================== */ - /* COMMON PARAMS */ std::vector fusingParamsSet { emptyFusingSpec, fusingScaleShift, }; const std::map cpuEmptyPluginConfig; -const std::map cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } }; +const std::map cpuBF16PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, + InferenceEngine::PluginConfigParams::YES } }; const std::vector> emptyOutputShape = {{}}; const std::vector> emptyOutputPadding = {{}}; /* ============= GroupConvolution params (planar layout) ============= */ -const SizeVector numOutChannels_Planar = {6}; -const SizeVector numGroups_Planar = {2, 3}; +const InferenceEngine::SizeVector numOutChannels_Planar = {6}; +const InferenceEngine::SizeVector numGroups_Planar = {2, 3}; /* ============= GroupConvolution params (blocked layout) ============= */ -const SizeVector numOutChannels_Blocked = {64}; -const SizeVector numGroups_Blocked = {2, 4}; +const InferenceEngine::SizeVector numOutChannels_Blocked = {64}; +const InferenceEngine::SizeVector numGroups_Blocked = {2, 4}; /* ============= GroupConvolution params (DW) ============= */ -const SizeVector numOutChannels_DW = {32}; -const SizeVector numGroups_DW = {32}; +const InferenceEngine::SizeVector numOutChannels_DW = {32}; +const InferenceEngine::SizeVector numGroups_DW = {32}; /* ============= GroupConvolution params (2D) ============= */ -const std::vector kernels2d = {{3, 3}, {1, 1}}; -const std::vector strides2d = {{1, 1}, {2, 2}}; +const std::vector kernels2d = {{3, 3}, {1, 1}}; +const std::vector strides2d = {{1, 1}, {2, 2}}; const std::vector> padBegins2d = {{0, 0}}; const std::vector> padEnds2d = {{0, 0}}; -const std::vector dilations2d = {{1, 1}}; +const std::vector dilations2d = {{1, 1}}; /* ============= GroupConvolution params (3D) ============= */ -const std::vector kernels3d = {{3, 3, 3}, {1, 1, 1}}; -const std::vector strides3d = {{1, 1, 1}, {2, 2, 2}}; +const std::vector kernels3d = {{3, 3, 3}, {1, 1, 1}}; +const std::vector strides3d = {{1, 1, 1}, {2, 2, 2}}; const std::vector> padBegins3d = {{0, 0, 0}}; const std::vector> padEnds3d = {{0, 0, 0}}; -const std::vector dilations3d = {{1, 1, 1}}; +const std::vector dilations3d = {{1, 1, 1}}; /* ============= */ /* INSTANCES */ /* ============= GroupConvolution (Planar 2D) ============= */ +const std::vector Planar_2D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 12, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {9, 9}} + } +}; + +const std::vector Planar_2D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1}, {{ 2, 12, 7, 7}, { 2, 12, 5, 7}, { 1, 12, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15}} + } +}; + const auto groupConvParams_ExplicitPadding_Planar_2D = ::testing::Combine( ::testing::ValuesIn(kernels2d), ::testing::ValuesIn(strides2d), @@ -195,39 +349,71 @@ const auto groupConvParams_ExplicitPadding_Planar_2D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Planar_FP32, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Planar_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + groupConvParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), ::testing::Values(cpuEmptyPluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Planar_BF16, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Planar_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + groupConvParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Planar_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Planar_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_2D, + ::testing::ValuesIn(Planar_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})), ::testing::Values(cpuBF16PluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); /* ============= GroupConvolution (Planar 3D) ============= */ +const std::vector Planar_3D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 12, 7, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15, 15}, {9, 10, 10}, {9, 9, 9}} + } +}; + +const std::vector Planar_3D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 12, -1, -1, -1}, {{ 2, 12, 7, 7, 7}, { 2, 12, 5, 7, 7}, { 1, 12, 9, 4, 9}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15, 15}} + } +}; + const auto groupConvParams_ExplicitPadding_Planar_3D = ::testing::Combine( ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), @@ -242,39 +428,71 @@ const auto groupConvParams_ExplicitPadding_Planar_3D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Planar_FP32, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Planar_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + groupConvParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), ::testing::Values(cpuEmptyPluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Planar_BF16, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Planar_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 12, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + groupConvParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Planar_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Planar_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Planar_3D, + ::testing::ValuesIn(Planar_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})), ::testing::Values(cpuBF16PluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); /* ============= GroupConvolution (Blocked 2D) ============= */ +const std::vector Blocked_2D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 64, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 5}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {19, 9}} + } +}; + +const std::vector Blocked_2D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15}} + } +}; + const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine( ::testing::ValuesIn(kernels2d), ::testing::ValuesIn(strides2d), @@ -289,39 +507,71 @@ const auto groupConvParams_ExplicitPadding_Blocked_2D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Blocked_FP32, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Blocked_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 64, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + groupConvParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), ::testing::Values(cpuEmptyPluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_Blocked_BF16, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Blocked_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 64, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + groupConvParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Blocked_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_Blocked_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_2D, + ::testing::ValuesIn(Blocked_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), ::testing::Values(cpuBF16PluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); /* ============= GroupConvolution (Blocked 3D) ============= */ +const std::vector Blocked_3D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 64, 7, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1, -1}, {{ 1, 64, 5, 5, 5}, { 2, 64, 5, 7, 5}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{7, 7, 7}, {7, 9, 7}} + } +}; + +const std::vector Blocked_3D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 64, -1, -1, -1}, {{ 1, 64, 5, 5, 5}, { 2, 64, 5, 7, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1, -1}, {{ 1, 64, 5, 5, 5}, { 2, 64, 5, 7, 5}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{7, 7, 7}} + } +}; + const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine( ::testing::ValuesIn(kernels3d), ::testing::ValuesIn(strides3d), @@ -336,39 +586,71 @@ const auto groupConvParams_ExplicitPadding_Blocked_3D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Blocked_FP32, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Blocked_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 64, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + groupConvParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), ::testing::Values(cpuEmptyPluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_3D_Blocked_BF16, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_Blocked_3D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 64, 7, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + groupConvParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Blocked_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_3D_Blocked_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_Blocked_3D, + ::testing::ValuesIn(Blocked_3D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})), ::testing::Values(cpuBF16PluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); /* ============= GroupConvolution (DW 2D) ============= */ +const std::vector dw_2D_inputs_smoke = { + DeconvInputData{ + InputShape{{}, {{ 2, 32, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 32, -1, -1}, {{ 1, 32, 5, 5}, { 2, 32, 5, 7}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{7, 7}, {7, 9}} + } +}; + +const std::vector dw_2D_inputs_nightly = { + DeconvInputData{ + InputShape{{-1, 32, -1, -1}, {{ 1, 32, 5, 5}, { 2, 32, 5, 7}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 32, -1, -1}, {{ 1, 32, 5, 5}, { 2, 32, 5, 7}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{7, 7}} + } +}; + const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine( ::testing::ValuesIn(kernels2d), ::testing::ValuesIn(strides2d), @@ -383,37 +665,111 @@ const auto groupConvParams_ExplicitPadding_DW_2D = ::testing::Combine( INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_DW_FP32, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_DW_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 32, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), + groupConvParams_ExplicitPadding_DW_2D, + ::testing::ValuesIn(dw_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), ::testing::Values(cpuEmptyPluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_DW_BF16, GroupDeconvolutionLayerCPUTest, ::testing::Combine( - ::testing::Combine( - groupConvParams_ExplicitPadding_DW_2D, - ::testing::Values(Precision::FP32), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Precision::BF16), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({ 2, 32, 7, 7 })), - ::testing::ValuesIn(emptyOutputShape), - ::testing::Values(CommonTestUtils::DEVICE_CPU)), - ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), + groupConvParams_ExplicitPadding_DW_2D, + ::testing::ValuesIn(dw_2D_inputs_smoke), + ::testing::Values(ElementType::f32), ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), ::testing::Values(cpuBF16PluginConfig)), GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_DW_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_DW_2D, + ::testing::ValuesIn(dw_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(nightly_GroupDeconv_2D_DW_BF16, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupConvParams_ExplicitPadding_DW_2D, + ::testing::ValuesIn(dw_2D_inputs_nightly), + ::testing::Values(ElementType::f32), + ::testing::ValuesIn(fusingParamsSet), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_dw_2D})), + ::testing::Values(cpuBF16PluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= Reorder + GroupDeconvolution ============= */ +INSTANTIATE_TEST_SUITE_P(smoke_reorder_GroupDeconv_2D, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + ::testing::Combine(::testing::ValuesIn(kernels2d), + ::testing::Values(InferenceEngine::SizeVector{1, 1}), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::ValuesIn(numGroups_Blocked), + ::testing::Values(ngraph::op::PadType::EXPLICIT), + ::testing::ValuesIn(emptyOutputPadding)), + ::testing::Values(DeconvInputData{InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {9, 9}}}), + ::testing::Values(ElementType::f32), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + +/* ============= GroupDeconvolution auto padding tests ============= */ +const std::vector inputs_2D_AutoPadding = { + DeconvInputData{ + InputShape{{}, {{ 2, 64, 7, 7 }}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 4}}}, + ngraph::helpers::InputLayerType::CONSTANT, + {{15, 15}} + }, + DeconvInputData{ + InputShape{{-1, 64, -1, -1}, {{ 2, 64, 7, 7}, { 2, 64, 5, 7}, { 1, 64, 9, 5}}}, + ngraph::helpers::InputLayerType::PARAMETER, + {{15, 15}, {9, 10}, {19, 9}} + } +}; + +const auto groupDeconvParams_AutoPadding_2D = ::testing::Combine( + ::testing::ValuesIn(kernels2d), + ::testing::ValuesIn(strides2d), + ::testing::ValuesIn(padBegins2d), + ::testing::ValuesIn(padEnds2d), + ::testing::ValuesIn(dilations2d), + ::testing::ValuesIn(numOutChannels_Blocked), + ::testing::ValuesIn(numGroups_Blocked), + ::testing::Values(ngraph::op::PadType::SAME_UPPER, ngraph::op::PadType::SAME_LOWER), + ::testing::ValuesIn(emptyOutputPadding) +); + +INSTANTIATE_TEST_SUITE_P(smoke_GroupDeconv_2D_AutoPadding_FP32, GroupDeconvolutionLayerCPUTest, + ::testing::Combine( + groupDeconvParams_AutoPadding_2D, + ::testing::ValuesIn(inputs_2D_AutoPadding), + ::testing::Values(ElementType::f32), + ::testing::Values(emptyFusingSpec), + ::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D, conv_avx512_2D})), + ::testing::Values(cpuEmptyPluginConfig)), + GroupDeconvolutionLayerCPUTest::getTestCaseName); + } // namespace -} // namespace CPULayerTestsDefinitions +} // namespace CPULayerTestsDefinitions \ No newline at end of file diff --git a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp index 851a2dad329..d6cf3cfd997 100644 --- a/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp +++ b/src/tests/functional/shared_test_classes/src/base/ov_subgraph.cpp @@ -55,6 +55,8 @@ void SubgraphBaseTest::run() { try { if (!inputDynamicShapes.empty()) { // resize ngraph function according new target shape + // Note: output shapes of some nodes depend on the input data + // so for some tests we need to override this function and replace parameter with constant node to get correct output shapes init_ref_function(functionRefs, targetStaticShapeVec); } generate_inputs(targetStaticShapeVec); diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp index 91d6c0fc085..0280895ba88 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp +++ b/src/tests/ngraph_helpers/ngraph_functions/src/convolution_backprop_data.cpp @@ -24,8 +24,8 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in const std::vector &filterWeights, const std::vector &biasesWeights) { bool randomFilterWeights = filterWeights.empty(); - auto shape = in.get_shape(); - std::vector filterWeightsShape = {shape[1], numOutChannels}; + auto shape = in.get_partial_shape(); + std::vector filterWeightsShape = {static_cast(shape[1].get_length()), numOutChannels}; filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights); @@ -74,8 +74,8 @@ std::shared_ptr makeConvolutionBackpropData(const ngraph::Output &in const std::vector &filterWeights, const std::vector &biasesWeights) { bool randomFilterWeights = filterWeights.empty(); - auto shape = in.get_shape(); - std::vector filterWeightsShape = {shape[1], numOutChannels}; + auto shape = in.get_partial_shape(); + std::vector filterWeightsShape = {static_cast(shape[1].get_length()), numOutChannels}; filterWeightsShape.insert(filterWeightsShape.end(), filterSize.begin(), filterSize.end()); auto filterWeightsNode = makeConstant(type, filterWeightsShape, filterWeights, randomFilterWeights); diff --git a/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp b/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp index 84c141f8a20..e7642633e92 100644 --- a/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp +++ b/src/tests/ngraph_helpers/ngraph_functions/src/group_convolution_backprop_data.cpp @@ -25,8 +25,8 @@ std::shared_ptr makeGroupConvolutionBackpropData(const ngraph::Output &filterWeights, const std::vector &biasesWeights) { bool randomFilterWeights = filterWeights.empty(); - auto shape = in.get_shape(); - std::vector filterWeightsShape = {shape[1], numOutChannels}; + auto shape = in.get_partial_shape(); + std::vector filterWeightsShape = {static_cast(shape[1].get_length()), numOutChannels}; if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups) throw std::runtime_error("incorrect shape for GroupConvolutionBackpropData"); filterWeightsShape[0] /= numGroups; @@ -81,8 +81,8 @@ std::shared_ptr makeGroupConvolutionBackpropData(const ngraph::Output &filterWeights, const std::vector &biasesWeights) { bool randomFilterWeights = filterWeights.empty(); - auto shape = in.get_shape(); - std::vector filterWeightsShape = {shape[1], numOutChannels}; + auto shape = in.get_partial_shape(); + std::vector filterWeightsShape = {static_cast(shape[1].get_length()), numOutChannels}; if (filterWeightsShape[0] % numGroups || filterWeightsShape[1] % numGroups) throw std::runtime_error("incorrect shape for GroupConvolutionBackpropData"); filterWeightsShape[0] /= numGroups; diff --git a/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp index c4ff9feb0cb..4e25879f191 100644 --- a/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp +++ b/src/tests/unit/cpu/mkldnn_memory_desc_test.cpp @@ -541,7 +541,7 @@ TEST(cloneWithParamsChange, UndefinedAndDefaultParams) { } } -TEST(makeDummyDesc, LowerBoundMoreThenDummyValie) { +TEST(makeDummyDesc, LowerBoundMoreThanDummyValue) { Shape shape(ngraph::PartialShape{1, 3, 85, {144, 1444}}); auto desc = std::make_shared(shape, mkldnn::memory::data_type::f32, mkldnn::memory::format_tag::nchw); ASSERT_FALSE(desc->isDefined()); From 58be795970b3a32107c027d14a77185f0adc10ac Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 16 Dec 2021 10:24:57 +0300 Subject: [PATCH 18/27] [CPU] Added BF16 support for CumSum as well (#9030) --- .../nodes/mkldnn_cum_sum_node.cpp | 48 ++--- .../mkldnn_plugin/nodes/mkldnn_cum_sum_node.h | 7 + .../plugin/cpu/single_layer_tests/cum_sum.cpp | 183 ++++++------------ 3 files changed, 75 insertions(+), 163 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp index 68fd40d9a56..a99e30a1db0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp @@ -12,6 +12,7 @@ #include "ie_precision.hpp" #include #include "mkldnn_cum_sum_node.h" +#include "utils/bfloat16.hpp" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -70,8 +71,7 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() { return; dataPrecision = getOriginalInputPrecisionAtPort(CUM_SUM_DATA); - if (dataPrecision != Precision::I8 && dataPrecision != Precision::U8 && dataPrecision != Precision::I16 && dataPrecision != Precision::I32 && - dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16) + if (!one_of(dataPrecision, Precision::I8, Precision::U8, Precision::I16, Precision::BF16, Precision::I32, Precision::FP32, Precision::I64, Precision::U64)) IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name(); if (inputShapes.size() == numOfInputs) { @@ -95,43 +95,17 @@ void MKLDNNCumSumNode::execute(mkldnn::stream strm) { if (inputShapes.size() == numOfInputs) axis = getAxis(getParentEdgeAt(AXIS)->getMemory(), getParentEdgeAt(CUM_SUM_DATA)->getMemory()); - switch (dataPrecision) { - case Precision::I8 : { - exec(); - break; - } - case Precision::U8 : { - exec(); - break; - } - case Precision::I16 : { - exec(); - break; - } - case Precision::I32 : { - exec(); - break; - } - case Precision::FP32 : { - exec(); - break; - } - case Precision::I64 : { - exec(); - break; - } - case Precision::U64 : { - exec(); - break; - } - default : { - std::string errorMsg = errorPrefix + " has unsupported 'data' input precision: " + dataPrecision.name(); - IE_THROW() << errorMsg; - } - } + OV_SWITCH(MKLDNNPlugin, CumSumExecute, this, dataPrecision, + OV_CASE(Precision::I8, int8_t), + OV_CASE(Precision::U8, uint8_t), + OV_CASE(Precision::I16, int16_t), + OV_CASE(Precision::BF16, bfloat16_t), + OV_CASE(Precision::I32, int32_t), + OV_CASE(Precision::FP32, float), + OV_CASE(Precision::I64, int64_t), + OV_CASE(Precision::U64, uint64_t)) } - template void MKLDNNCumSumNode::exec() { const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h index 2e5ebfaf7d8..f917a53ef34 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h @@ -47,6 +47,13 @@ private: InferenceEngine::Precision dataPrecision; std::string errorPrefix; + + template + struct CumSumExecute { + void operator()(MKLDNNCumSumNode* node) { + node->exec(); + } + }; }; } // namespace MKLDNNPlugin diff --git a/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp b/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp index 90a1d12b5df..bd84c142971 100644 --- a/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp +++ b/src/tests/functional/plugin/cpu/single_layer_tests/cum_sum.cpp @@ -9,57 +9,55 @@ using namespace ngraph; using namespace InferenceEngine; using namespace CPUTestUtils; +using namespace ov; +using namespace test; namespace CPULayerTestsDefinitions { -using cumSumShape = std::pair, std::vector>>; using cumSumParams = std::tuple< ngraph::element::Type, // data precision - cumSumShape, // input shape + InputShape, // input shape std::int64_t, // axis bool, // exclusive bool>; // reverse -class CumSumLayerCPUTest : public testing::WithParamInterface, public ov::test::SubgraphBaseTest, public CPUTestsBase { +class CumSumLayerCPUTest : public testing::WithParamInterface, + public SubgraphBaseTest, public CPUTestsBase { public: static std::string getTestCaseName(testing::TestParamInfo obj) { ngraph::element::Type inputPrecision; - std::pair, std::vector>> shapes; + InputShape shapes; std::int64_t axis; bool exclusive; bool reverse; std::tie(inputPrecision, shapes, axis, exclusive, reverse) = obj.param; - std::ostringstream result; - result << inputPrecision << "_" << "IS=" << CommonTestUtils::partialShape2str(shapes.first) << "_" << "TS="; - for (const auto& shape : shapes.second) { - result << "("; - for (const auto& item : shape) { - result << CommonTestUtils::vec2str(item) << "_"; - } - result << ")_"; + std::ostringstream results; + results << "IS=" << CommonTestUtils::partialShape2str({shapes.first}) << "_"; + results << "TS="; + for (const auto& item : shapes.second) { + results << CommonTestUtils::vec2str(item) << "_"; } - - result << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : ""); - return result.str(); + results << "Prc=" << inputPrecision << "_"; + results << "Axis=" << axis << "_" << (exclusive ? "exclusive" : "") << "_" << (reverse ? "reverse" : ""); + return results.str(); } protected: void SetUp() override { targetDevice = CommonTestUtils::DEVICE_CPU; - ngraph::element::Type inputPrecision; - std::pair, std::vector>> shapes; + InputShape shapes; std::int64_t axis; bool exclusive; bool reverse; - std::tie(inputPrecision, shapes, axis, exclusive, reverse) = this->GetParam(); + std::tie(inType, shapes, axis, exclusive, reverse) = this->GetParam(); + if (inType == ElementType::bf16) + rel_threshold = 0.05f; - for (size_t i = 0; i < shapes.second.size(); i++) { - targetStaticShapes.push_back(shapes.second[i]); - } - inputDynamicShapes = shapes.first; + selectedType = makeSelectedTypeStr("ref_any", inType); + init_input_shapes({shapes}); - auto params = ngraph::builder::makeDynamicParams(inputPrecision, { inputDynamicShapes.front() }); + auto params = ngraph::builder::makeDynamicParams(inType, inputDynamicShapes); auto axisNode = ngraph::opset1::Constant::create(ngraph::element::i32, ngraph::Shape{}, std::vector{axis})->output(0); auto cumSum = ngraph::builder::makeCumSum(params[0], axisNode, exclusive, reverse); @@ -72,15 +70,12 @@ TEST_P(CumSumLayerCPUTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() run(); - // TODO: Should be uncommented after updating the CheckPluginRelatedResults() method - //CheckPluginRelatedResults(executableNetwork, "CumSum"); + CheckPluginRelatedResults(executableNetwork, "CumSum"); } const ngraph::element::TypeVector inputPrecision = { ngraph::element::i8, - ngraph::element::u8, - ngraph::element::i16, - ngraph::element::i32, + ngraph::element::bf16, ngraph::element::f32 }; @@ -90,97 +85,33 @@ const std::vector negativeAxes = { -1, -2, -3, -4, -5, -6 }; const std::vector exclusive = { true, false }; const std::vector reverse = { true, false }; -const std::vector inShapes = { - { - // dynamic - { - {-1} - }, - // target - { - {{16}, {18}, {12}} - } - }, - { - // dynamic - { - {-1, -1} - }, - // target - { - {{9, 15}, {18, 12}, {12, 12}} - } - }, - { - // dynamic - { - {-1, -1, -1} - }, - // target - { - {{16, 10, 12}, {18, 12, 10}, {12, 18, 10}} - } - }, - { - // dynamic - { - {-1, -1, -1, -1} - }, - // target - { - {{18, 20, 14, 12}, {19, 20, 14, 12}, {20, 22, 23, 25}} - } - }, - { - // dynamic - { - {-1, -1, -1, -1, -1} - }, - // target - { - {{2, 4, 6, 2, 4}, {3, 5, 6, 3, 5}, {1, 4, 2, 6, 8}} - } - }, - { - // dynamic - { - {-1, -1, -1, -1, -1, -1} - }, - // target - { - {{2, 4, 6, 2, 4, 2}, {3, 5, 6, 3, 5, 3}, {1, 4, 2, 6, 8, 1}} - } - }, - { - // dynamic - { - {-1, -1, -1, -1, -1, -1, -1} - }, - // target - { - {{2, 4, 6, 2, 4, 2, 4}, {3, 5, 6, 3, 5, 3, 5}, {1, 4, 2, 6, 8, 1, 4}} - } - }, - { - // dynamic - { - {{2, 5}, {3, 7}, {4, 8}, {5, 7}, {2, 5}, {3, 7}, {1, 2}} - }, - // target - { - {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}} - } - }, - { - // dynamic - { - {{2, 5}, -1, {4, 8}, -1, -1, {3, 7}, -1} - }, - // target - { - {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}} - } - }, +const std::vector inShapes = { + {{-1}, + {{16}, {18}, {12}}}, + + {{-1, -1}, + {{9, 15}, {18, 12}, {12, 12}}}, + + {{-1, -1, -1}, + {{16, 10, 12}, {18, 12, 10}, {12, 18, 10}}}, + + {{-1, -1, -1, -1}, + {{18, 20, 14, 12}, {19, 20, 14, 12}, {20, 22, 23, 25}}}, + + {{-1, -1, -1, -1, -1}, + {{2, 4, 6, 2, 4}, {3, 5, 6, 3, 5}, {1, 4, 2, 6, 8}}}, + + {{-1, -1, -1, -1, -1, -1}, + {{2, 4, 6, 2, 4, 2}, {3, 5, 6, 3, 5, 3}, {1, 4, 2, 6, 8, 1}}}, + + {{{-1, -1, -1, -1, -1, -1, -1}}, + {{2, 4, 6, 2, 4, 2, 4}, {3, 5, 6, 3, 5, 3, 5}, {1, 4, 2, 6, 8, 1, 4}}}, + + {{{2, 5}, {3, 7}, {4, 8}, {5, 7}, {2, 5}, {3, 7}, {1, 2}}, + {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}}, + + {{{2, 5}, -1, {4, 8}, -1, -1, {3, 7}, -1}, + {{2, 4, 6, 5, 4, 3, 1}, {3, 5, 6, 6, 5, 3, 1}, {5, 7, 4, 6, 3, 7, 2}}} }; const auto testCasesAxis_0 = ::testing::Combine( @@ -193,7 +124,7 @@ const auto testCasesAxis_0 = ::testing::Combine( const auto testCasesAxis_1 = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 1, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 1, inShapes.end())), ::testing::Values(axes[1]), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) @@ -201,7 +132,7 @@ const auto testCasesAxis_1 = ::testing::Combine( const auto testCasesAxis_2 = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 2, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 2, inShapes.end())), ::testing::Values(axes[2]), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) @@ -209,7 +140,7 @@ const auto testCasesAxis_2 = ::testing::Combine( const auto testCasesAxis_3 = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 3, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 3, inShapes.end())), ::testing::Values(axes[3]), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) @@ -217,7 +148,7 @@ const auto testCasesAxis_3 = ::testing::Combine( const auto testCasesAxis_4 = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 4, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 4, inShapes.end())), ::testing::Values(axes[4]), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) @@ -225,7 +156,7 @@ const auto testCasesAxis_4 = ::testing::Combine( const auto testCasesAxis_5 = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 5, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 5, inShapes.end())), ::testing::Values(axes[5]), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) @@ -233,7 +164,7 @@ const auto testCasesAxis_5 = ::testing::Combine( const auto testCasesAxis_6 = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 6, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 6, inShapes.end())), ::testing::Values(axes[6]), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) @@ -241,7 +172,7 @@ const auto testCasesAxis_6 = ::testing::Combine( const auto testCasesAxis_negative = ::testing::Combine( ::testing::ValuesIn(inputPrecision), - ::testing::ValuesIn(std::vector(inShapes.begin() + 6, inShapes.end())), + ::testing::ValuesIn(std::vector(inShapes.begin() + 6, inShapes.end())), ::testing::ValuesIn(negativeAxes), ::testing::ValuesIn(exclusive), ::testing::ValuesIn(reverse) From 2068d5838ba5a851d3d95ab0c79927d6dcbebe43 Mon Sep 17 00:00:00 2001 From: Roman Lyamin Date: Thu, 16 Dec 2021 10:27:27 +0300 Subject: [PATCH 19/27] [GPU] Add I420toRGB/I420toBGR operations (#9204) --- .../convert_color_kernel_base.cpp | 3 + .../core/cl_kernels/convert_color_ref.cl | 50 ++-- .../test_cases/convert_color_gpu_test.cpp | 226 ++++++++++++++++-- src/core/include/openvino/op/i420_to_bgr.hpp | 2 + src/core/include/openvino/op/i420_to_rgb.hpp | 2 + src/core/src/op/i420_to_bgr.cpp | 2 + src/core/src/op/i420_to_rgb.cpp | 2 + .../intel_gpu/plugin/primitives_list.hpp | 2 + .../src/plugin/ops/convert_color.cpp | 16 +- .../intel_gpu/src/plugin/ops/parameter.cpp | 4 +- .../intel_gpu/src/plugin/ops/result.cpp | 4 +- .../single_layer_tests/convert_color_i420.cpp | 58 +++++ 12 files changed, 327 insertions(+), 44 deletions(-) create mode 100644 src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp index 8cbd44375d2..88648ba1809 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convert_color/convert_color_kernel_base.cpp @@ -43,6 +43,9 @@ JitConstants ConvertColorKernelBase::GetJitConstants(const convert_color_params& case color_format::NV12: jit.AddConstant(MakeJitConstant("CONVERT_FROM_NV12", "")); break; + case color_format::I420: + jit.AddConstant(MakeJitConstant("CONVERT_FROM_I420", "")); + break; default: IE_THROW() << "Not supported input color format"; } diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl index 24703d15d90..2e24deb4236 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convert_color_ref.cl @@ -5,11 +5,14 @@ #include "include/batch_headers/fetch_data.cl" #include "include/batch_headers/data_types.cl" -#ifdef CONVERT_FROM_NV12 +#if defined(CONVERT_FROM_NV12) || defined(CONVERT_FROM_I420) #ifdef BUFFER_MEM -KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y, -#if INPUTS_COUNT == 2 - const __global INPUT1_TYPE* input_uv, +KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input1, +#if INPUTS_COUNT > 1 + const __global INPUT1_TYPE* input2, +#if INPUTS_COUNT == 3 + const __global INPUT2_TYPE* input3, +#endif #endif __global OUTPUT_TYPE* output) { @@ -17,16 +20,19 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y, const uint y = get_global_id(1); const uint x = get_global_id(2); - float Y = input_y[GET_DATA_INDEX(INPUT0, b, 0, y, x)]; + float Y = input1[GET_DATA_INDEX(INPUT0, b, 0, y, x)]; -#if INPUTS_COUNT == 2 - float U = input_uv[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)]; - float V = input_uv[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)]; +#if INPUTS_COUNT == 3 + float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)]; + float V = input3[GET_DATA_INDEX(INPUT2, b, 0, y / 2, x / 2)]; +#elif INPUTS_COUNT == 2 + float U = input2[GET_DATA_INDEX(INPUT1, b, 0, y / 2, x / 2)]; + float V = input2[GET_DATA_INDEX(INPUT1, b, 1, y / 2, x / 2)]; #else // Single plane uint input_uv_offset = INPUT0_SIZE_X * INPUT0_SIZE_Y / 3 * 2; - float U = input_y[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset]; - float V = input_y[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset]; + float U = input1[GET_DATA_INDEX(INPUT0, b, 0, y / 2, (x / 2) * 2) + input_uv_offset]; + float V = input1[GET_DATA_INDEX(INPUT0, b, 1, y / 2, (x / 2) * 2) + input_uv_offset]; #endif float Ycomponent = mad(Y, 1.164f, -18.624f); @@ -57,9 +63,12 @@ KERNEL(convert_color_ref)(const __global INPUT0_TYPE* input_y, #ifdef SURFACE_MEM -KERNEL(convert_color_ref)(read_only image2d_t input_y, -#if INPUTS_COUNT == 2 - read_only image2d_t input_uv, +KERNEL(convert_color_ref)(read_only image2d_t input1, +#if INPUTS_COUNT > 1 + read_only image2d_t input2, +#if INPUTS_COUNT == 3 + read_only image2d_t input3, +#endif #endif __global OUTPUT_TYPE* output) { @@ -67,17 +76,22 @@ KERNEL(convert_color_ref)(read_only image2d_t input_y, const uint y = get_global_id(1); const uint x = get_global_id(2); - float4 Y = read_imagef(input_y, (int2)(x, y)); + float4 Y = read_imagef(input1, (int2)(x, y)); float Ycomponent = mad(Y.x, 296.82f, -18.624f); -#if INPUTS_COUNT == 2 - float4 UV = read_imagef(input_uv, (int2)(x / 2, y / 2)); +#if INPUTS_COUNT == 3 + float4 U = read_imagef(input2, (int2)(x / 2, y / 2)); + float4 V = read_imagef(input3, (int2)(x / 2, y / 2)); + float Ucomponent = mad(U.x, 255.0f, -128.f); + float Vcomponent = mad(V.x, 255.0f, -128.f); +#elif INPUTS_COUNT == 2 + float4 UV = read_imagef(input2, (int2)(x / 2, y / 2)); float Ucomponent = mad(UV.x, 255.0f, -128.f); float Vcomponent = mad(UV.y, 255.0f, -128.f); #else // Single plane uint input_y_offset = INPUT0_SIZE_Y / 3 * 2; - float4 U = read_imagef(input_y, (int2)((x / 2) * 2, y / 2 + input_y_offset)); - float4 V = read_imagef(input_y, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset)); + float4 U = read_imagef(input1, (int2)((x / 2) * 2, y / 2 + input_y_offset)); + float4 V = read_imagef(input1, (int2)((x / 2) * 2 + 1, y / 2 + input_y_offset)); float Ucomponent = mad(U.x, 255.0f, -128.f); float Vcomponent = mad(V.x, 255.0f, -128.f); #endif diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp index 7bb8947b1b8..f207235b761 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convert_color_gpu_test.cpp @@ -16,15 +16,15 @@ using namespace cldnn; using namespace ::tests; template -void createReferenceData(const T* arg_y, const T* arg_uv, U* out_ptr, - size_t batch_size, size_t image_h, size_t image_w, - size_t stride_y, size_t stride_uv, bool to_rgb) { - for (int batch = 0; batch < batch_size; batch++) { +void createReferenceDataNV12(const T* arg_y, const T* arg_uv, U* out_ptr, + size_t batch_size, size_t image_h, size_t image_w, + size_t stride_y, size_t stride_uv, bool to_rgb) { + for (size_t batch = 0; batch < batch_size; ++batch) { U* out = out_ptr + batch * image_w * image_h * 3; auto y_ptr = arg_y + batch * stride_y; auto uv_ptr = arg_uv + batch * stride_uv; - for (int h = 0; h < image_h; h++) { - for (int w = 0; w < image_w; w++) { + for (size_t h = 0; h < image_h; ++h) { + for (size_t w = 0; w < image_w; ++w) { auto y_index = h * image_w + w; auto y_val = static_cast(y_ptr[y_index]); auto uv_index = (h / 2) * image_w + (w / 2) * 2; @@ -87,8 +87,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp32) { auto outputs = network.execute(); std::vector ref_res(width * height * 3); - createReferenceData(input_y_data.data(), input_uv_data.data(), ref_res.data(), - 1, height, width, height * width, height * width / 2, true); + createReferenceDataNV12(input_y_data.data(), input_uv_data.data(), ref_res.data(), + 1, height, width, height * width, height * width / 2, true); auto output = outputs.at("convert_color").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -126,8 +126,8 @@ TEST(convert_color, nv12_to_bgr_two_planes_buffer_fp32) { auto outputs = network.execute(); std::vector ref_res(width * height * 3); - createReferenceData(input_y_data.data(), input_uv_data.data(), ref_res.data(), - 1, height, width, height * width, height * width / 2, false); + createReferenceDataNV12(input_y_data.data(), input_uv_data.data(), ref_res.data(), + 1, height, width, height * width, height * width / 2, false); auto output = outputs.at("convert_color").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -166,8 +166,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_u8) { auto outputs = network.execute(); std::vector ref_res(width * height * 3); - createReferenceData(input_y_data.data(), input_uv_data.data(), ref_res.data(), - 1, height, width, height * width, height * width / 2, true); + createReferenceDataNV12(input_y_data.data(), input_uv_data.data(), ref_res.data(), + 1, height, width, height * width, height * width / 2, true); auto output = outputs.at("convert_color").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -206,8 +206,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_buffer_fp16) { auto outputs = network.execute(); std::vector ref_res(width * height * 3); - createReferenceData(input_y_data.data(), input_uv_data.data(), ref_res.data(), - 1, height, width, height * width, height * width / 2, true); + createReferenceDataNV12(input_y_data.data(), input_uv_data.data(), ref_res.data(), + 1, height, width, height * width, height * width / 2, true); auto output = outputs.at("convert_color").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -243,8 +243,8 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_fp32) { auto outputs = network.execute(); std::vector ref_res(width * height * 3); - createReferenceData(input_data.data(), input_data.data() + height * width, ref_res.data(), - 1, height, width, input_height * width, input_height * width, true); + createReferenceDataNV12(input_data.data(), input_data.data() + height * width, ref_res.data(), + 1, height, width, input_height * width, input_height * width, true); auto output = outputs.at("convert_color").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -279,8 +279,8 @@ TEST(convert_color, nv12_to_rgb_single_plane_buffer_u8) { auto outputs = network.execute(); std::vector ref_res(width * height * 3); - createReferenceData(input_data.data(), input_data.data() + height * width, ref_res.data(), - 1, height, width, input_height * width, input_height * width, true); + createReferenceDataNV12(input_data.data(), input_data.data() + height * width, ref_res.data(), + 1, height, width, input_height * width, input_height * width, true); auto output = outputs.at("convert_color").get_memory(); cldnn::mem_lock output_ptr(output, get_test_stream()); @@ -356,8 +356,8 @@ TEST(convert_color, nv12_to_rgb_two_planes_surface_u8) { auto outputs = network.execute(); std::vector reference_results(width * height * 3); - createReferenceData(data.data(), data.data() + height * width, reference_results.data(), - 1, height, width, height * width, height * width / 2, true); + createReferenceDataNV12(data.data(), data.data() + height * width, reference_results.data(), + 1, height, width, height * width, height * width / 2, true); auto output_prim = outputs.begin()->second.get_memory(); cldnn::mem_lock output_ptr(output_prim, get_test_stream()); @@ -419,8 +419,8 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) { auto outputs = network.execute(); std::vector reference_results(width * height * 3); - createReferenceData(input_data.data(), input_data.data() + height * width, reference_results.data(), - 1, height, width, input_height * width, input_height * width, true); + createReferenceDataNV12(input_data.data(), input_data.data() + height * width, reference_results.data(), + 1, height, width, input_height * width, input_height * width, true); auto output_prim = outputs.begin()->second.get_memory(); cldnn::mem_lock output_ptr(output_prim, get_test_stream()); @@ -429,3 +429,185 @@ TEST(convert_color, nv12_to_rgb_single_plane_surface_u8) { } checkStatus(clReleaseMemObject(nv12_image), "clReleaseMemObject"); } + +template +std::tuple yuv_pixel_to_rgb(float y_val, float u_val, float v_val) { + auto c = y_val - 16.f; + auto d = u_val - 128.f; + auto e = v_val - 128.f; + auto clip = [](float a) -> T { + if (std::is_integral()) { + return static_cast(std::min(std::max(std::round(a), 0.f), 255.f)); + } else { + return static_cast(std::min(std::max(a, 0.f), 255.f)); + } + }; + auto b = clip(1.164f * c + 2.018f * d); + auto g = clip(1.164f * c - 0.391f * d - 0.813f * e); + auto r = clip(1.164f * c + 1.596f * e); + return std::tuple{r, g, b}; +} + +template +void createReferenceDataI420(const T* arg_y, const T* arg_u, const T* arg_v, U* out_ptr, + size_t batch_size, size_t image_h, size_t image_w, + size_t stride_y, size_t stride_uv, bool rgb_color_format) { + for (size_t batch = 0; batch < batch_size; ++batch) { + U* out = out_ptr + batch * image_w * image_h * 3; + auto y_ptr = arg_y + batch * stride_y; + auto u_ptr = arg_u + batch * stride_uv; + auto v_ptr = arg_v + batch * stride_uv; + for (size_t h = 0; h < image_h; ++h) { + for (size_t w = 0; w < image_w; ++w) { + auto y_index = h * image_w + w; + auto y_val = static_cast(y_ptr[y_index]); + auto uv_index = (h / 2) * (image_w / 2) + (w / 2); + auto u_val = static_cast(u_ptr[uv_index]); + auto v_val = static_cast(v_ptr[uv_index]); + T r, g, b; + std::tie(r, g, b) = yuv_pixel_to_rgb(y_val, u_val, v_val); + if (rgb_color_format) { + out[y_index * 3] = r; + out[y_index * 3 + 1] = g; + out[y_index * 3 + 2] = b; + } else { + out[y_index * 3] = b; + out[y_index * 3 + 1] = g; + out[y_index * 3 + 2] = r; + } + } + } + } +} + +TEST(convert_color, i420_to_rgb_three_planes_buffer_fp32) { + auto& engine = get_test_engine(); + int width = 224; + int height = 448; + + auto input_y = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width, height } }); + auto input_u = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } }); + auto input_v = engine.allocate_memory({ data_types::f32, format::byxf, { 1, 1, width / 2 , height / 2 } }); + + std::vector input_y_data = generate_random_1d(width * height, 0, 255); + std::vector input_u_data = generate_random_1d(width * height / 4, 0, 255); + std::vector input_v_data = generate_random_1d(width * height / 4, 0, 255); + + set_values(input_y, input_y_data); + set_values(input_u, input_u_data); + set_values(input_v, input_v_data); + + layout output_layout(data_types::f32, cldnn::format::byxf, { 1, 3, width, height }); + + topology topology; + topology.add(input_layout("input_y", input_y->get_layout())); + topology.add(input_layout("input_u", input_u->get_layout())); + topology.add(input_layout("input_v", input_v->get_layout())); + topology.add(convert_color("convert_color", { "input_y", "input_u", "input_v" }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::buffer, output_layout)); + + network network(engine, topology); + network.set_input_data("input_y", input_y); + network.set_input_data("input_u", input_u); + network.set_input_data("input_v", input_v); + + auto outputs = network.execute(); + + std::vector ref_res(width * height * 3); + createReferenceDataI420(input_y_data.data(), input_u_data.data(), input_v_data.data(), ref_res.data(), + 1, height, width, height * width, height * width / 2, true); + auto output = outputs.at("convert_color").get_memory(); + cldnn::mem_lock output_ptr(output, get_test_stream()); + + for (size_t i = 0; i < ref_res.size(); ++i) { + EXPECT_NEAR(ref_res[i], output_ptr[i], 1.001f); + } +} + +TEST(convert_color, i420_to_rgb_three_planes_surface_u8) { + int width = 224; + int height = 448; + + auto ocl_instance = std::make_shared(); + device_query query(engine_types::ocl, runtime_types::ocl, static_cast(ocl_instance->_context.get())); + auto devices = query.get_available_devices(); + + auto engine_config = cldnn::engine_configuration(); + auto engine = engine::create(engine_types::ocl, runtime_types::ocl, devices.begin()->second, engine_config); + + if (!engine->get_device_info().supports_image) { + GTEST_SKIP() << "Device doesn't support images"; + } + + int data_size = width * (height + height / 2); + std::vector data = generate_random_1d(data_size, 0, 255); + + cl_int err; + cl_image_format image_format; + image_format.image_channel_order = CL_R; + image_format.image_channel_data_type = CL_UNORM_INT8; + cl_image_desc image_desc = { CL_MEM_OBJECT_IMAGE2D, (size_t)width, (size_t)height, 0, + 0, 0, 0, 0, 0, { nullptr } }; + + cl_mem i420_image_plane_y = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); + checkStatus(err, "Creating i420 image plane_y failed"); + + image_desc.image_width = width / 2; + image_desc.image_height = height / 2; + + cl_mem i420_image_plane_u = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); + checkStatus(err, "Creating i420 image plane_u failed"); + + cl_mem i420_image_plane_v = clCreateImage(ocl_instance->_context.get(), CL_MEM_READ_WRITE, &image_format, &image_desc, nullptr, &err); + checkStatus(err, "Creating i420 image plane_v failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t y_region[3] = { (size_t)width, (size_t)height, 1 }; + size_t uv_region[3] = { (size_t)width / 2, (size_t)height / 2, 1 }; + + err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_y, true, origin, y_region, 0, 0, &data[0], 0, nullptr, nullptr); + checkStatus(err, "Writing i420 image plane_y failed"); + + err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_u, true, origin, uv_region, 0, 0, &data[width * height], 0, nullptr, nullptr); + checkStatus(err, "Writing i420 image plane_u failed"); + + err = clEnqueueWriteImage(ocl_instance->_queue.get(), i420_image_plane_v, true, origin, uv_region, 0, 0, &data[width * (height + height / 4)], 0, nullptr, nullptr); + checkStatus(err, "Writing i420 image plane_v failed"); + + auto input = input_layout("input", { data_types::u8, format::nv12, { 1, 1, width, height } }); + auto input2 = input_layout("input2", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } }); + auto input3 = input_layout("input3", { data_types::u8, format::nv12, { 1, 1, width / 2, height / 2 } }); + auto output_format = cldnn::format::byxf; + layout output_layout(data_types::f32, output_format, { 1, 3, width, height }); + + auto input_memory = engine->share_image(input.layout, i420_image_plane_y); + auto input_memory2 = engine->share_image(input2.layout, i420_image_plane_u); + auto input_memory3 = engine->share_image(input3.layout, i420_image_plane_v); + + topology topology; + topology.add(input); + topology.add(input2); + topology.add(input3); + topology.add(convert_color("convert_color", { "input", "input2", "input3" }, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB, + cldnn::convert_color::memory_type::image, output_layout)); + + network network(*engine, topology); + network.set_input_data("input", input_memory); + network.set_input_data("input2", input_memory2); + network.set_input_data("input3", input_memory3); + + auto outputs = network.execute(); + + std::vector reference_results(width * height * 3); + createReferenceDataI420(data.data(), data.data() + height * width, data.data() + width * (height + height / 4), reference_results.data(), + 1, height, width, height * width, height * width / 2, true); + + auto output_prim = outputs.begin()->second.get_memory(); + cldnn::mem_lock output_ptr(output_prim, get_test_stream()); + for (auto i = 0; i < reference_results.size(); i++) { + EXPECT_NEAR(reference_results[i], output_ptr[i], 1.001f); + } + checkStatus(clReleaseMemObject(i420_image_plane_y), "clReleaseMemObject"); + checkStatus(clReleaseMemObject(i420_image_plane_u), "clReleaseMemObject"); + checkStatus(clReleaseMemObject(i420_image_plane_v), "clReleaseMemObject"); +} diff --git a/src/core/include/openvino/op/i420_to_bgr.hpp b/src/core/include/openvino/op/i420_to_bgr.hpp index 59f1e661fd2..473a8e73fd7 100644 --- a/src/core/include/openvino/op/i420_to_bgr.hpp +++ b/src/core/include/openvino/op/i420_to_bgr.hpp @@ -34,6 +34,8 @@ class OPENVINO_API I420toBGR : public util::ConvertColorI420Base { public: OPENVINO_OP("I420toBGR", "opset8", util::ConvertColorI420Base); + BWDCMP_RTTI_DECLARATION; + I420toBGR() = default; /// \brief Constructs a conversion operation from input image in I420 format diff --git a/src/core/include/openvino/op/i420_to_rgb.hpp b/src/core/include/openvino/op/i420_to_rgb.hpp index c95f5de6b5c..cc0d1752de1 100644 --- a/src/core/include/openvino/op/i420_to_rgb.hpp +++ b/src/core/include/openvino/op/i420_to_rgb.hpp @@ -34,6 +34,8 @@ class OPENVINO_API I420toRGB : public util::ConvertColorI420Base { public: OPENVINO_OP("I420toRGB", "opset8", util::ConvertColorI420Base); + BWDCMP_RTTI_DECLARATION; + I420toRGB() = default; /// \brief Constructs a conversion operation from input image in I420 format diff --git a/src/core/src/op/i420_to_bgr.cpp b/src/core/src/op/i420_to_bgr.cpp index b6eac7b3c35..4e38a102100 100644 --- a/src/core/src/op/i420_to_bgr.cpp +++ b/src/core/src/op/i420_to_bgr.cpp @@ -6,6 +6,8 @@ #include "itt.hpp" +BWDCMP_RTTI_DEFINITION(ov::op::v8::I420toBGR); + ov::op::v8::I420toBGR::I420toBGR(const Output& arg) : util::ConvertColorI420Base(arg, util::ConvertColorI420Base::ColorConversion::I420_TO_BGR) { constructor_validate_and_infer_types(); diff --git a/src/core/src/op/i420_to_rgb.cpp b/src/core/src/op/i420_to_rgb.cpp index 9375c6eb921..e9a293de2b4 100644 --- a/src/core/src/op/i420_to_rgb.cpp +++ b/src/core/src/op/i420_to_rgb.cpp @@ -6,6 +6,8 @@ #include "itt.hpp" +BWDCMP_RTTI_DEFINITION(ov::op::v8::I420toRGB); + ov::op::v8::I420toRGB::I420toRGB(const Output& arg) : util::ConvertColorI420Base(arg, util::ConvertColorI420Base::ColorConversion::I420_TO_RGB) { constructor_validate_and_infer_types(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index 9285b5e7032..7ec7a460615 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -216,6 +216,8 @@ REGISTER_FACTORY(v8, GatherND); REGISTER_FACTORY(v8, DeformableConvolution); REGISTER_FACTORY(v8, NV12toRGB); REGISTER_FACTORY(v8, NV12toBGR); +REGISTER_FACTORY(v8, I420toRGB); +REGISTER_FACTORY(v8, I420toBGR); // --------------------------- Supported internal ops --------------------------- // REGISTER_FACTORY(internal, NonMaxSuppressionIEInternal); diff --git a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp index a3ba0ee20a8..c867b58fe38 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/convert_color.cpp @@ -44,16 +44,28 @@ static void CreateCommonConvertColorOp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1, 2}); - CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB); + CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::RGB); } static void CreateNV12toBGROp(Program& p, const std::shared_ptr& op) { p.ValidateInputs(op, {1, 2}); - CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR); + CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::NV12, cldnn::convert_color::color_format::BGR); +} + +static void CreateI420toRGBOp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {1, 3}); + CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::RGB); +} + +static void CreateI420toBGROp(Program& p, const std::shared_ptr& op) { + p.ValidateInputs(op, {1, 3}); + CreateCommonConvertColorOp(p, op, cldnn::convert_color::color_format::I420, cldnn::convert_color::color_format::BGR); } REGISTER_FACTORY_IMPL(v8, NV12toRGB); REGISTER_FACTORY_IMPL(v8, NV12toBGR); +REGISTER_FACTORY_IMPL(v8, I420toRGB); +REGISTER_FACTORY_IMPL(v8, I420toBGR); } // namespace intel_gpu } // namespace runtime diff --git a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp index 1e68ba52db9..53e83826b82 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/parameter.cpp @@ -181,7 +181,9 @@ static void CreateParameterOp(Program& p, const std::shared_ptrget_users()) { is_convert_color_input |= ngraph::is_type(node) || - ngraph::is_type(node); + ngraph::is_type(node) || + ngraph::is_type(node) || + ngraph::is_type(node); } if (is_convert_color_input) { diff --git a/src/plugins/intel_gpu/src/plugin/ops/result.cpp b/src/plugins/intel_gpu/src/plugin/ops/result.cpp index ed1f30dd482..dbe2737ffd9 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/result.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/result.cpp @@ -41,7 +41,9 @@ static void CreateResultOp(Program& p, const std::shared_ptr(prev) || - ngraph::is_type(prev)) { + ngraph::is_type(prev) || + ngraph::is_type(prev) || + ngraph::is_type(prev)) { outputlayout = NHWC; } diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp new file mode 100644 index 00000000000..a74aed3f77d --- /dev/null +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/convert_color_i420.cpp @@ -0,0 +1,58 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/convert_color_i420.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace LayerTestsDefinitions; + +namespace { + +const std::vector inShapes_nhwc = { + {1, 10, 10, 1} +}; + +const std::vector inTypes = { + ov::element::u8, ov::element::f32 +}; + +const auto testCase_values = ::testing::Combine( + ::testing::ValuesIn(inShapes_nhwc), + ::testing::ValuesIn(inTypes), + ::testing::Bool(), + ::testing::Bool(), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420, ConvertColorI420LayerTest, testCase_values, ConvertColorI420LayerTest::getTestCaseName); + +const auto testCase_accuracy_values = ::testing::Combine( + ::testing::Values(ov::Shape{1, 16*6, 16, 1}), + ::testing::Values(ov::element::u8), + ::testing::Bool(), + ::testing::Bool(), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); + +INSTANTIATE_TEST_SUITE_P(smoke_TestsConvertColorI420_acc, + ConvertColorI420AccuracyTest, + testCase_accuracy_values, + ConvertColorI420LayerTest::getTestCaseName); + +const auto testCase_accuracy_values_nightly = ::testing::Combine( + ::testing::Values(ov::Shape{1, 256*256, 256, 1}), + ::testing::Values(ov::element::u8), + ::testing::Values(false), + ::testing::Values(true), + ::testing::Values(CommonTestUtils::DEVICE_GPU) +); + +INSTANTIATE_TEST_SUITE_P(nightly_TestsConvertColorI420_acc, + ConvertColorI420AccuracyTest, + testCase_accuracy_values_nightly, + ConvertColorI420LayerTest::getTestCaseName); + +} // namespace From 4dab2662aa7ab4b71127b1cb5ae5a24ee4172e01 Mon Sep 17 00:00:00 2001 From: Yuan Hu Date: Thu, 16 Dec 2021 15:48:04 +0800 Subject: [PATCH 20/27] [Auto plugin] add KEY_NETWORK_PRIORITY (#8146) * add KEY_NETWORK_PRIORITY Signed-off-by: Hu, Yuan2 * use uniqueName instead of fulldieviceName use lamba instead of macro Signed-off-by: Hu, Yuan2 * test draft Signed-off-by: Hu, Yuan2 * fix compile issue Signed-off-by: Hu, Yuan2 * add test config Signed-off-by: Hu, Yuan2 * add origin select test Signed-off-by: Hu, Yuan2 * perf Counter as seperate member Signed-off-by: Hu, Yuan2 * add null device test case for select device Signed-off-by: Hu, Yuan2 * fix cache test failed issue Signed-off-by: Hu, Yuan2 * use a function to register priority Signed-off-by: Hu, Yuan2 * add const Signed-off-by: Hu, Yuan2 * add some comments Signed-off-by: Hu, Yuan2 * initialize AutoContext in define Signed-off-by: Hu, Yuan2 * use lock guard Signed-off-by: Hu, Yuan2 * change get() to wait() Signed-off-by: Hu, Yuan2 * add unregister in load failed Signed-off-by: Hu, Yuan2 * add const on unique name Signed-off-by: Hu, Yuan2 * fix compile issue Signed-off-by: Hu, Yuan2 * add test for ParseMetaDevices and fix logic issue Signed-off-by: Hu, Yuan2 * change key name to AUTO_NETWORK_PRIORITY Signed-off-by: Hu, Yuan2 * fix compile issue caused by API change in master Signed-off-by: Hu, Yuan2 * fix key incorrect test in ci Signed-off-by: Hu, Yuan2 * go back to origin set config code Signed-off-by: Hu, Yuan2 * fix mock test issue after Parameter changed to Any Signed-off-by: Hu, Yuan2 * fix macos compile issue Signed-off-by: Hu, Yuan2 * remove question comments Signed-off-by: Hu, Yuan2 * remove duplicate config Signed-off-by: Hu, Yuan2 * update test case from the hello_query_device CPU FULL_DEVICE_NAME : Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz GPU FULL_DEVICE_NAME : Intel(R) Gen9 HD Graphics (iGPU) MYRIAD.3.1-ma2480 FULL_DEVICE_NAME : Intel Movidius Myriad X VPU MYRIAD.3.2-ma2480 FULL_DEVICE_NAME : Intel Movidius Myriad X VPU currently GetMetric("MYRIAD","FULL_DEVICE_NAME") will thrown No KEY_DEVICE_ID Signed-off-by: Hu, Yuan2 * only GPU use the fullDeviceName Signed-off-by: Hu, Yuan2 * fix rebase issue Signed-off-by: Hu, Yuan2 * change directory after ie test moved Signed-off-by: Hu, Yuan2 * fix winodws compile unit test issue Signed-off-by: Hu, Yuan2 --- .../ie/multi-device/multi_device_config.hpp | 8 + src/plugins/auto/executable_network.cpp | 72 +++--- src/plugins/auto/executable_network.hpp | 8 + src/plugins/auto/plugin.cpp | 238 ++++++++++------- src/plugins/auto/plugin.hpp | 10 +- .../behavior/plugin/configuration_tests.cpp | 36 ++- .../behavior/plugin/configuration_tests.cpp | 29 ++- .../auto/auto_select_device_failed_test.cpp | 8 +- .../unit/auto/exec_network_get_metrics.cpp | 4 +- .../unit/auto/key_network_priority_test.cpp | 241 ++++++++++++++++++ .../unit/auto/parse_meta_device_test.cpp | 172 +++++++++++++ .../auto/plugin/mock_auto_device_plugin.hpp | 2 +- src/tests/unit/auto/select_device_test.cpp | 214 ++++++++++++++++ 13 files changed, 891 insertions(+), 151 deletions(-) create mode 100644 src/tests/unit/auto/key_network_priority_test.cpp create mode 100644 src/tests/unit/auto/parse_meta_device_test.cpp create mode 100644 src/tests/unit/auto/select_device_test.cpp diff --git a/src/inference/include/ie/multi-device/multi_device_config.hpp b/src/inference/include/ie/multi-device/multi_device_config.hpp index 0f4f2dd829b..2426ec11d6f 100644 --- a/src/inference/include/ie/multi-device/multi_device_config.hpp +++ b/src/inference/include/ie/multi-device/multi_device_config.hpp @@ -34,5 +34,13 @@ namespace MultiDeviceConfigParams { */ DECLARE_MULTI_CONFIG_KEY(DEVICE_PRIORITIES); +/** + * @brief network priority config option, the range of value is from 0 to the max integer, + * when there are multi devices, the value is smaller, the priority is higher, + * 0 is the highest priority. Auto plugin dispatch the network to device + * according to priority value. when all devices are free, even if the priority value + * is not 0, the network will be dispatched to the strongest device. + */ +DECLARE_CONFIG_KEY(AUTO_NETWORK_PRIORITY); } // namespace MultiDeviceConfigParams } // namespace InferenceEngine diff --git a/src/plugins/auto/executable_network.cpp b/src/plugins/auto/executable_network.cpp index d3375ef1405..fc23320f30d 100644 --- a/src/plugins/auto/executable_network.cpp +++ b/src/plugins/auto/executable_network.cpp @@ -149,11 +149,13 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string& const std::vector& metaDevices, const std::string& strDevices, MultiDeviceInferencePlugin* plugin, - const bool needPerfCounters) + const AutoContext& context, + const bool needPerfCounters) : _devicePriorities{metaDevices} , _devicePrioritiesInitial{metaDevices} , _needPerfCounters(needPerfCounters) , _multiPlugin(plugin) + , _context(context) , _workModeIsAUTO(true) { if (_multiPlugin->GetCore() == nullptr) { IE_THROW() << "Please, work with MULTI device via InferencEngine::Core object"; @@ -173,7 +175,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string& _loadContext[ACTUALDEVICE].isEnabled = true; _loadContext[ACTUALDEVICE].networkPrecision = GetNetworkPrecision(network); _loadContext[ACTUALDEVICE].metaDevices = metaDevices; - _loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices, _loadContext[ACTUALDEVICE].networkPrecision); + _loadContext[ACTUALDEVICE].deviceInfo = _multiPlugin->SelectDevice(metaDevices, + _loadContext[ACTUALDEVICE].networkPrecision, _context.modelPriority); LOG_INFO("[AUTOPLUGIN]:select device:%s", _loadContext[ACTUALDEVICE].deviceInfo.deviceName.c_str()); bool isActualDevCPU = _loadContext[ACTUALDEVICE].deviceInfo.deviceName.find("CPU") != std::string::npos; @@ -292,6 +295,13 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context, return; } + // need to reload network, unregister it's priority + // there maybe potential issue. + // for example they are dGPU, VPUX, iGPU, customer want to LoadNetwork with + // configure 0 dGPU, 1 VPUX, if dGPU load failed, + // the result will be not sure, maybe two network are loaded into VPUX, + // maybe 0 is loaded to VPUX, 1 is loaded to iGPU + _multiPlugin->UnregisterPriority(_context.modelPriority, context.deviceInfo.uniqueName); // remove the current device from deviceList auto eraseDevice = std::find_if(deviceList.begin(), deviceList.end(), [device](DeviceInformation& d){ @@ -305,7 +315,8 @@ void MultiDeviceExecutableNetwork::TryToLoadNetWork(AutoLoadContext& context, // select next candidate device try { - context.deviceInfo = _multiPlugin->SelectDevice(deviceList, context.networkPrecision); + context.deviceInfo = _multiPlugin->SelectDevice(deviceList, + context.networkPrecision, _context.modelPriority); } catch (const std::exception& e) { return; @@ -382,7 +393,7 @@ void MultiDeviceExecutableNetwork::WaitActualNetworkReady() const { // for every MultiDeviceExecutableNetwork instance std::call_once(_oc, [this] () { if (_loadContext[ACTUALDEVICE].future.valid()) { - _loadContext[ACTUALDEVICE].future.get(); + _loadContext[ACTUALDEVICE].future.wait(); } // if _loadContext[ACTUALDEVICE] load failed, fall back to _loadContext[CPU] if (!_loadContext[ACTUALDEVICE].isAlready) { @@ -460,13 +471,17 @@ void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) { } MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() { - // this is necessary to guarantee member destroyed after getting future - if (_workModeIsAUTO && _loadContext[CPU].isEnabled) { - _loadContext[CPU].future.get(); - WaitActualNetworkReady(); - // it's necessary to wait the loading network threads to stop here. - InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad"); - _executor.reset(); + if (_workModeIsAUTO) { + // this is necessary to guarantee member destroyed after getting future + if (_loadContext[CPU].isEnabled) { + _loadContext[CPU].future.wait(); + WaitActualNetworkReady(); + // it's necessary to wait the loading network threads to stop here. + InferenceEngine::ExecutorManager::getInstance()->clear("AutoDeviceAsyncLoad"); + _executor.reset(); + } + _multiPlugin->UnregisterPriority(_context.modelPriority, + _loadContext[ACTUALDEVICE].deviceInfo.uniqueName); } { std::lock_guard lock(_mutex); @@ -615,33 +630,32 @@ void MultiDeviceExecutableNetwork::SetConfig(const std::map lockConf(_confMutex); _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = priorities->second; - _confMutex.unlock(); } } } InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::string &name) const { - _confMutex.lock(); - auto it = _config.find(name); - if (it != _config.end()) { - _confMutex.unlock(); - return it->second; - } else { - _confMutex.unlock(); - // find config key among networks config keys - for (const auto& desc : _networksPerDevice) { - const auto& execNetwork = desc.second; - auto param = execNetwork->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); - for (auto &&configKey : param.as>()) { - if (configKey == name) { - return execNetwork->GetConfig(configKey); - } + { + std::lock_guard lock(_confMutex); + auto it = _config.find(name); + if (it != _config.end()) { + return it->second; + } + } + + // find config key among networks config keys + for (const auto& desc : _networksPerDevice) { + const auto& execNetwork = desc.second; + auto param = execNetwork->GetMetric(METRIC_KEY(SUPPORTED_CONFIG_KEYS)); + for (auto &&configKey : param.as>()) { + if (configKey == name) { + return execNetwork->GetConfig(configKey); } } - IE_THROW(NotFound) << name <<" not found in the ExecutableNetwork config"; } + IE_THROW(NotFound) << name <<" not found in the ExecutableNetwork config"; } InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::string &name) const { diff --git a/src/plugins/auto/executable_network.hpp b/src/plugins/auto/executable_network.hpp index 4034569211b..45efc0450c1 100644 --- a/src/plugins/auto/executable_network.hpp +++ b/src/plugins/auto/executable_network.hpp @@ -43,6 +43,12 @@ struct DeviceInformation { std::map config; int numRequestsPerDevices; std::string defaultDeviceID; + DeviceName uniqueName; +}; + +struct AutoContext { + bool needPerfCounters = {false}; + unsigned int modelPriority = 0; }; struct AutoLoadContext { @@ -153,6 +159,7 @@ public: const std::vector& metaDevices, const std::string& strDevices, MultiDeviceInferencePlugin* plugin, + const AutoContext& context, const bool needPerfCounters = false); void SetConfig(const std::map &config) override; @@ -202,6 +209,7 @@ private: std::shared_ptr _core; InferenceEngine::IStreamsExecutor::Ptr _executor; MultiDeviceInferencePlugin* _multiPlugin; + AutoContext _context; bool _workModeIsAUTO = {false}; mutable std::once_flag _oc; std::once_flag _firstLoadOC; diff --git a/src/plugins/auto/plugin.cpp b/src/plugins/auto/plugin.cpp index 4416d707e62..7e388cc7ff9 100644 --- a/src/plugins/auto/plugin.cpp +++ b/src/plugins/auto/plugin.cpp @@ -60,10 +60,15 @@ namespace { res.push_back(CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO)); res.push_back(PluginConfigParams::KEY_PERF_COUNT); res.push_back(PluginConfigParams::KEY_EXCLUSIVE_ASYNC_REQUESTS); + res.push_back(MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY); return res; }(); } // namespace + +std::mutex MultiDeviceInferencePlugin::_mtx; +std::map> MultiDeviceInferencePlugin::_priorityMap; + std::map MultiDeviceInferencePlugin::GetSupportedConfig( const std::map & config, const std::string & deviceName) const { std::vector supportedConfigKeys = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)); @@ -137,11 +142,31 @@ std::vector MultiDeviceInferencePlugin::ParseMetaDevices(cons std::string defaultDeviceID = ""; DeviceIDParser parsed{deviceName}; - if (parsed.getDeviceID().empty()) + std::string deviceid = parsed.getDeviceID(); + if (deviceid.empty()) { defaultDeviceID = getDefaultDeviceID(deviceName); + deviceid = defaultDeviceID; + } + std::string fullDeviceName = ""; + std::string uniqueName = ""; + if (parsed.getDeviceName() == "GPU") { + std::vector supportedMetrics = GetCore()->GetMetric(deviceName, METRIC_KEY(SUPPORTED_METRICS)); + if (std::find(supportedMetrics.begin(), supportedMetrics.end(), METRIC_KEY(FULL_DEVICE_NAME)) != supportedMetrics.end()) { + fullDeviceName = GetCore()->GetMetric(deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as(); + } + } + + if (fullDeviceName.empty()) { + uniqueName = parsed.getDeviceName() + "_" + deviceid; + } else { + uniqueName = fullDeviceName + "_" + deviceid; + } + + LOG_DEBUG("deviceName:%s, defaultDeviceID:%s, uniqueName:%s", + deviceName.c_str(), defaultDeviceID.c_str(), uniqueName.c_str()); // create meta device - metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID }); + metaDevices.push_back({ deviceName, getDeviceConfig(deviceName), numRequests, defaultDeviceID, uniqueName}); } return metaDevices; @@ -162,10 +187,9 @@ InferenceEngine::Parameter MultiDeviceInferencePlugin::GetConfig(const std::stri } void MultiDeviceInferencePlugin::SetConfig(const std::map & config) { - bool needPerfCounters = false; + AutoContext context; std::map filterConfig; - CheckConfig(config, needPerfCounters, filterConfig); - + CheckConfig(config, context, filterConfig); for (auto && kvp : config) { const auto& name = kvp.first; _config[name] = kvp.second; @@ -237,10 +261,11 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons if (workModeAuto) { // check the configure and check if need to set PerfCounters configure to device // and set filter configure + OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl::AutoMode"); - bool needPerfCounters = false; + AutoContext context; std::map filterConfig; - CheckConfig(fullConfig, needPerfCounters, filterConfig); + CheckConfig(fullConfig, context, filterConfig); // filter the device that supports filter configure auto strDevices = GetDeviceList(fullConfig); auto metaDevices = ParseMetaDevices(strDevices, fullConfig); @@ -269,7 +294,7 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons strDevices += ((iter + 1) == supportDevices.end()) ? "" : ","; } - return std::make_shared(modelPath, network, supportDevices, strDevices, this, needPerfCounters); + return std::make_shared(modelPath, network, supportDevices, strDevices, this, context, context.needPerfCounters); } OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::LoadNetworkImpl:MultiMode"); if (priorities == fullConfig.end()) { @@ -377,20 +402,18 @@ QueryNetworkResult MultiDeviceInferencePlugin::QueryNetwork(const CNNNetwork& return queryResult; } -DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector& metaDevices, const std::string& networkPrecision) { +DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector& metaDevices, + const std::string& networkPrecision, unsigned int priority) { OV_ITT_SCOPED_TASK(itt::domains::MULTIPlugin, "MultiDeviceInferencePlugin::SelectDevice"); if (metaDevices.empty()) { IE_THROW(NotFound) << "No available device to select in " << GetName() << " plugin"; } - if (metaDevices.size() == 1) { - return metaDevices.at(0); - } - std::vector CPU; - std::vector dGPU; - std::vector iGPU; - std::vector MYRIAD; - std::vector VPUX; + std::list CPU; + std::list dGPU; + std::list iGPU; + std::list MYRIAD; + std::list VPUX; for (auto& item : metaDevices) { if (item.deviceName.find("CPU") == 0) { @@ -406,96 +429,103 @@ DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vectorGetMetric(item.deviceName, METRIC_KEY(FULL_DEVICE_NAME)).as(); - if (gpuFullDeviceName.find("iGPU") != std::string::npos) { + auto& gpuUniqueName = item.uniqueName; + if (gpuUniqueName.find("iGPU") != std::string::npos) { iGPU.push_back(item); - } else if (gpuFullDeviceName.find("dGPU") != std::string::npos) { + } else if (gpuUniqueName.find("dGPU") != std::string::npos) { dGPU.push_back(item); } continue; } } - if (CPU.empty() && dGPU.empty() && iGPU.empty() && MYRIAD.empty() && VPUX.empty()) { - IE_THROW(NotFound) << "No available device found"; - } - // Priority of selecting device: dGPU > VPUX > iGPU > MYRIAD > CPU - if (!dGPU.empty()) { - for (auto&& item : dGPU) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); - if (supportNetwork != capability.end()) { - return item; - } - } - } else if (!VPUX.empty()) { - for (auto&& item : VPUX) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); - if (supportNetwork != capability.end()) { - return item; - } - } - } else if (!iGPU.empty()) { - for (auto&& item : iGPU) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); - if (supportNetwork != capability.end()) { - return item; - } - } - } else if (!MYRIAD.empty()) { - for (auto&& item : MYRIAD) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), networkPrecision); - if (supportNetwork != capability.end()) { - return item; - } - } - } + std::list devices; + devices.splice(devices.end(), dGPU); + devices.splice(devices.end(), VPUX); + devices.splice(devices.end(), iGPU); + devices.splice(devices.end(), MYRIAD); - // If network is FP32 but there is no device support FP32, offload FP32 network to device support FP16. + std::list validDevices; + + auto selectSupportDev = [this, &devices, &validDevices](const std::string& networkPrecision) { + for (auto iter = devices.begin(); iter != devices.end();) { + std::vector capability = GetCore()->GetMetric(iter->deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); + auto supportNetwork = std::find(capability.begin(), capability.end(), (networkPrecision)); + if (supportNetwork != capability.end()) { + validDevices.push_back(std::move(*iter)); + devices.erase(iter++); + continue; + } + iter++; + } + }; + selectSupportDev(networkPrecision); + // If network is FP32, continue to collect the device support FP16 but not support FP32. if (networkPrecision == "FP32") { - if (!dGPU.empty()) { - for (auto&& item : dGPU) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); - if (supportNetwork != capability.end()) { - return item; - } - } - } else if (!VPUX.empty()) { - for (auto&& item : VPUX) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); - if (supportNetwork != capability.end()) { - return item; - } - } - } else if (!iGPU.empty()) { - for (auto&& item : iGPU) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); - if (supportNetwork != capability.end()) { - return item; - } - } - } else if (!MYRIAD.empty()) { - for (auto&& item : MYRIAD) { - std::vector capability = GetCore()->GetMetric(item.deviceName, METRIC_KEY(OPTIMIZATION_CAPABILITIES)); - auto supportNetwork = std::find(capability.begin(), capability.end(), "FP16"); - if (supportNetwork != capability.end()) { - return item; - } + const std::string f16 = "FP16"; + selectSupportDev(f16); + } + // add cpu devices if exist. + validDevices.splice(validDevices.end(), CPU); + + if (validDevices.empty()) { + IE_THROW() << "Cannot select any device"; + } + // all available Devices are in validDevices now + // need to remove higher priority devices + // save the last device first + DeviceInformation lastDevice = validDevices.back(); + { + // begin to filter devices + std::lock_guard lck(_mtx); + for (auto && kvp : _priorityMap) { + if (kvp.first >= priority) { + continue; } + auto& filterDevices = kvp.second; + auto sd = std::remove_if(validDevices.begin(), validDevices.end(), [&filterDevices](DeviceInformation device) { + auto iter = std::find_if(filterDevices.begin(), filterDevices.end(), [&device](std::string uniqueName) { + return (uniqueName == device.uniqueName); + }); + return iter != filterDevices.end() ? true : false; + }); + validDevices.erase(sd, validDevices.end()); } } - if (CPU.empty()) { - IE_THROW() << "Cannot select any device"; + DeviceInformation* ptrSelectDevice = NULL; + if (validDevices.empty()) { + // after remove higher priority device,but the available devices is null, + // so select the last device of all available Devices. + ptrSelectDevice = &lastDevice; + } else { + // select the first device in the rest of available devices. + ptrSelectDevice = &validDevices.front(); } - return CPU[0]; + //recode the device priority + RegisterPriority(priority, ptrSelectDevice->uniqueName); + return *ptrSelectDevice; +} + +void MultiDeviceInferencePlugin::UnregisterPriority(const unsigned int& priority, + const std::string& deviceName) { + std::lock_guard lck(_mtx); + auto& priorityDevices = _priorityMap[priority]; + for (auto iter = priorityDevices.begin(); iter != priorityDevices.end();) { + if (*iter == deviceName) { + priorityDevices.erase(iter); + break; + } + iter++; + } +} + +void MultiDeviceInferencePlugin::RegisterPriority(const unsigned int& priority, + const std::string& deviceName) { + std::lock_guard lck(_mtx); + auto& priorityDevices = _priorityMap[priority]; + priorityDevices.push_back(deviceName); } std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map& config) const { @@ -520,19 +550,17 @@ std::string MultiDeviceInferencePlugin::GetDeviceList(const std::map& config, - bool& needPerfCounters, std::map& filterConfig) { + AutoContext& context, std::map& filterConfig) { // TODO need to optimize this code, too much duplicated code const auto perf_hints_configs = PerfHintsConfig::SupportedKeys(); for (auto&& kvp : config) { - if (kvp.first.find("AUTO_") == 0) { - continue; - } else if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) { + if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) { if (kvp.second == PluginConfigParams::YES) { - needPerfCounters = true; + context.needPerfCounters = true; filterConfig.insert({kvp.first, kvp.second}); } else if (kvp.second == PluginConfigParams::NO) { - needPerfCounters = false; + context.needPerfCounters = false; } else { IE_THROW() << "Unsupported config value: " << kvp.second << " for key: " << kvp.first; @@ -551,10 +579,24 @@ void MultiDeviceInferencePlugin::CheckConfig(const std::map #include #include +#include #include #include @@ -45,7 +46,10 @@ public: const std::map & config) const; std::string GetDeviceList(const std::map& config) const; - MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32)); + MOCKTESTMACRO DeviceInformation SelectDevice(const std::vector& metaDevices, + const std::string& networkPrecision = METRIC_VALUE(FP32), unsigned int priority = 0); + void UnregisterPriority(const unsigned int& priority, const std::string& deviceName); + void RegisterPriority(const unsigned int& priority, const std::string& deviceName); protected: std::map GetSupportedConfig(const std::map& config, @@ -56,10 +60,12 @@ private: InferenceEngine::CNNNetwork network, const std::map& config, const std::string &networkPrecision = METRIC_VALUE(FP32)); - static void CheckConfig(const std::map& config, bool& needPerfCounters, + static void CheckConfig(const std::map& config, AutoContext& context, std::map& filterConfig); std::vector FilterDevice(const std::vector& metaDevices, const std::map& config); + static std::mutex _mtx; + static std::map> _priorityMap; }; } // namespace MultiDevicePlugin diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp index ff240aa56ed..1becc193cf0 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp @@ -96,7 +96,13 @@ namespace { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_DEBUG}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}} + {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}} }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, CorrectConfigTests, @@ -149,17 +155,6 @@ namespace { {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}} }; - const std::vector> multiconf = { - {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}}, - {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}}, - {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}, - {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}}, - {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}} - }; - const std::vector> autoinconfigs = { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, "DOESN'T EXIST"}}, @@ -176,7 +171,22 @@ namespace { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT, "NAN"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}} + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}} + }; + + const std::vector> multiconf = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}, + {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}} }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, IncorrectConfigTests, diff --git a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp index 1cb9fc7a567..826d3f1fc47 100644 --- a/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp +++ b/src/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/configuration_tests.cpp @@ -68,6 +68,10 @@ namespace { {InferenceEngine::PluginConfigParams::KEY_DEVICE_ID, "DEVICE_UNKNOWN"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, "DOESN'T EXIST"}}, @@ -92,7 +96,13 @@ namespace { {InferenceEngine::PluginConfigParams::KEY_DEVICE_ID, "DEVICE_UNKNOWN"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}} + {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, "NAN"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "-1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "should be int"}} }; IE_SUPPRESS_DEPRECATED_END @@ -177,6 +187,12 @@ namespace { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::LATENCY}, {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT_NUM_REQUESTS, "1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, {InferenceEngine::PluginConfigParams::KEY_PERFORMANCE_HINT, InferenceEngine::PluginConfigParams::THROUGHPUT}}, @@ -204,7 +220,16 @@ namespace { {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_DEBUG}}, {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, - {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}} + {InferenceEngine::PluginConfigParams::KEY_LOG_LEVEL, InferenceEngine::PluginConfigParams::LOG_TRACE}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "0"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "1"}}, + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_AUTO_NETWORK_PRIORITY, "2"}} }; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, DefaultValuesConfigTests, diff --git a/src/tests/unit/auto/auto_select_device_failed_test.cpp b/src/tests/unit/auto/auto_select_device_failed_test.cpp index 8925c63c301..51ade26194a 100644 --- a/src/tests/unit/auto/auto_select_device_failed_test.cpp +++ b/src/tests/unit/auto/auto_select_device_failed_test.cpp @@ -207,7 +207,7 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) { // set the return value of SelectDevice // for example if there are three device, if will return GPU on the first call, and then MYRIAD // at last CPU - ON_CALL(*plugin, SelectDevice(Property(&std::vector::size, Eq(selDevsSize)), _)) + ON_CALL(*plugin, SelectDevice(Property(&std::vector::size, Eq(selDevsSize)), _, _)) .WillByDefault(Return(metaDevices[deviceConfigs.size() - selDevsSize])); devicesStr += deviceName; devicesStr += ((++iter) == deviceConfigs.end()) ? "" : ","; @@ -219,16 +219,16 @@ TEST_P(AutoLoadFailedTest, LoadCNNetWork) { if (thrExcWheSelect) { selDevsSize = deviceConfigs.size(); if (selDevsSize > 1) { - ON_CALL(*plugin, SelectDevice(Property(&std::vector::size, Eq(selDevsSize - 1)), _)) + ON_CALL(*plugin, SelectDevice(Property(&std::vector::size, Eq(selDevsSize - 1)), _, _)) .WillByDefault(Throw(InferenceEngine::GeneralError{""})); } else { - ON_CALL(*plugin, SelectDevice(Property(&std::vector::size, Eq(1)), _)) + ON_CALL(*plugin, SelectDevice(Property(&std::vector::size, Eq(1)), _, _)) .WillByDefault(Throw(InferenceEngine::GeneralError{""})); } } EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(AtLeast(1)); - EXPECT_CALL(*plugin, SelectDevice(_, _)).Times(selectCount); + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(selectCount); EXPECT_CALL(*core, LoadNetwork(::testing::Matcher(_), ::testing::Matcher(_), ::testing::Matcher(_))).Times(loadCount); diff --git a/src/tests/unit/auto/exec_network_get_metrics.cpp b/src/tests/unit/auto/exec_network_get_metrics.cpp index 2916e6d9e1c..6a29fd508c1 100644 --- a/src/tests/unit/auto/exec_network_get_metrics.cpp +++ b/src/tests/unit/auto/exec_network_get_metrics.cpp @@ -173,10 +173,10 @@ TEST_P(ExecNetworkGetMetric, OPTIMAL_NUMBER_OF_INFER_REQUESTS) { metaDevices.push_back({CommonTestUtils::DEVICE_CPU, {}, cpuCustomerNum, ""}); metaDevices.push_back({CommonTestUtils::DEVICE_GPU, {}, gpuCustomerNum, ""}); - ON_CALL(*plugin, SelectDevice(_, _)).WillByDefault(Return(metaDevices[1])); + ON_CALL(*plugin, SelectDevice(_, _, _)).WillByDefault(Return(metaDevices[1])); ON_CALL(*plugin, ParseMetaDevices(_, _)).WillByDefault(Return(metaDevices)); EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1); - EXPECT_CALL(*plugin, SelectDevice(_, _)).Times(1); + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(1); if (cpuSleep) { ON_CALL(*core, LoadNetwork(::testing::Matcher(_), diff --git a/src/tests/unit/auto/key_network_priority_test.cpp b/src/tests/unit/auto/key_network_priority_test.cpp new file mode 100644 index 00000000000..2410ba52d72 --- /dev/null +++ b/src/tests/unit/auto/key_network_priority_test.cpp @@ -0,0 +1,241 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp" +#include "unit_test_utils/mocks/mock_iinfer_request.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp" +#include +#include +#include +#include +#include +#include "plugin/mock_auto_device_plugin.hpp" +#include "cpp/ie_plugin.hpp" +#include "mock_common.hpp" + +using ::testing::MatcherCast; +using ::testing::AllOf; +using ::testing::Throw; +using ::testing::Matches; +using ::testing::_; +using ::testing::StrEq; +using ::testing::Return; +using ::testing::Property; +using ::testing::Eq; +using ::testing::ReturnRef; +using ::testing::AtLeast; +using ::testing::InvokeWithoutArgs; +using Config = std::map; +using namespace MockMultiDevice; + +using PriorityParams = std::tuple; //{priority, deviceUniquName} + +using ConfigParams = std::tuple< + std::string, // netPrecision + std::vector // {{priority, expect device uniqueName}} + >; +class KeyNetworkPriorityTest : public ::testing::TestWithParam { +public: + std::shared_ptr core; + std::shared_ptr plugin; + std::vector metaDevices; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::string netPrecision; + std::vector PriorityConfigs; + std::tie(netPrecision, PriorityConfigs) = obj.param; + std::ostringstream result; + for (auto& item : PriorityConfigs) { + result << "_priority_" << std::get<0>(item); + result << "_return_" << std::get<1>(item); + } + result << "netPrecision_" << netPrecision; + return result.str(); + } + + void TearDown() override { + core.reset(); + plugin.reset(); + metaDevices.clear(); + } + + void SetUp() override { + // prepare mockicore and cnnNetwork for loading + core = std::shared_ptr(new MockICore()); + auto* origin_plugin = new MockMultiDeviceInferencePlugin(); + plugin = std::shared_ptr(origin_plugin); + // replace core with mock Icore + plugin->SetCore(core); + metaDevices = {{CommonTestUtils::DEVICE_CPU, {}, 2, "", "CPU_01"}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01"}, + {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01"}, + {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01" }, + {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01"}}; + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, cpuCability, {"FP32", "FP16", "INT8", "BIN"}); + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, gpuCability, {"FP32", "FP16", "BATCHED_BLOB", "BIN"}); + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, myriadCability, {"FP16"}); + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, vpuxCability, {"INT8"}); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_CPU), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuCability)); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_GPU), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(gpuCability)); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_MYRIAD), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(myriadCability)); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_KEEMBAY), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(vpuxCability)); + ON_CALL(*plugin, SelectDevice).WillByDefault([this](const std::vector& metaDevices, + const std::string& netPrecision, unsigned int Priority) { + return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, Priority); + }); + } +}; + +TEST_P(KeyNetworkPriorityTest, SelectDevice) { + // get Parameter + std::string netPrecision; + std::vector PriorityConfigs; + std::tie(netPrecision, PriorityConfigs) = this->GetParam(); + std::vector resDevInfo; + + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size()); + EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4)); + + for (auto& item : PriorityConfigs) { + resDevInfo.push_back(plugin->SelectDevice(metaDevices, netPrecision, std::get<0>(item))); + } + for (unsigned int i = 0; i < PriorityConfigs.size(); i++) { + EXPECT_EQ(resDevInfo[i].uniqueName, std::get<1>(PriorityConfigs[i])); + plugin->UnregisterPriority(std::get<0>(PriorityConfigs[i]), std::get<1>(PriorityConfigs[i])); + } +} + +TEST_P(KeyNetworkPriorityTest, MultiThreadsSelectDevice) { + // get Parameter + std::string netPrecision; + std::vector PriorityConfigs; + std::tie(netPrecision, PriorityConfigs) = this->GetParam(); + std::vector resDevInfo; + std::vector> futureVect; + + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(PriorityConfigs.size() * 2); + EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(PriorityConfigs.size() * 4 * 2)); + // selectdevice in multi threads, and UnregisterPriority them all, should not affect the + // Priority Map + for (auto& item : PriorityConfigs) { + unsigned int priority = std::get<0>(item); + auto future = std::async(std::launch::async, [this, &netPrecision, priority] { + auto deviceInfo = plugin->SelectDevice(metaDevices, netPrecision, priority); + plugin->UnregisterPriority(priority, deviceInfo.uniqueName); + }); + futureVect.push_back(std::move(future)); + } + + for (auto& item : futureVect) { + item.get(); + } + + for (auto& item : PriorityConfigs) { + resDevInfo.push_back(plugin->SelectDevice(metaDevices, netPrecision, std::get<0>(item))); + } + for (unsigned int i = 0; i < PriorityConfigs.size(); i++) { + EXPECT_EQ(resDevInfo[i].uniqueName, std::get<1>(PriorityConfigs[i])); + plugin->UnregisterPriority(std::get<0>(PriorityConfigs[i]), std::get<1>(PriorityConfigs[i])); + } +} + + +// ConfigParams details +// example +// ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"}, +// PriorityParams {1, "iGPU_01"}, +// PriorityParams {2, "MYRIAD_01"}, +// PriorityParams {2, "MYRIAD_01"}}}, +// {netPrecision, PriorityParamsVector{{priority, expect device uniqueName}}} + +const std::vector testConfigs = { + ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "MYRIAD_01"}, + PriorityParams {2, "MYRIAD_01"}}}, + ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "iGPU_01"}, + PriorityParams {4, "MYRIAD_01"}, + PriorityParams {5, "CPU_01"}}}, + ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {2, "iGPU_01"}}}, + ConfigParams {"FP32", {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {3, "MYRIAD_01"}}}, + ConfigParams {"FP32", {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "MYRIAD_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "MYRIAD_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"INT8", {PriorityParams {0, "VPUX_01"}, + PriorityParams {1, "CPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {4, "CPU_01"}, + PriorityParams {5, "CPU_01"}}}, + ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"}, + PriorityParams {0, "VPUX_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"INT8", {PriorityParams {2, "VPUX_01"}, + PriorityParams {0, "VPUX_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"INT8", {PriorityParams {0, "VPUX_01"}, + PriorityParams {1, "CPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {0, "VPUX_01"}, + PriorityParams {1, "CPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"BIN", {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {2, "CPU_01"}}}, + ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"}, + PriorityParams {3, "iGPU_01"}, + PriorityParams {4, "CPU_01"}, + PriorityParams {5, "CPU_01"}}}, + ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {2, "iGPU_01"}}}, + ConfigParams {"BIN", {PriorityParams {2, "dGPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {2, "iGPU_01"}, + PriorityParams {3, "CPU_01"}}}, + ConfigParams {"BIN", {PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}, + PriorityParams {0, "dGPU_01"}, + PriorityParams {1, "iGPU_01"}, + PriorityParams {2, "CPU_01"}, + PriorityParams {3, "CPU_01"}}} + }; + + +INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, KeyNetworkPriorityTest, + ::testing::ValuesIn(testConfigs), + KeyNetworkPriorityTest::getTestCaseName); + diff --git a/src/tests/unit/auto/parse_meta_device_test.cpp b/src/tests/unit/auto/parse_meta_device_test.cpp new file mode 100644 index 00000000000..83d62f3e6a7 --- /dev/null +++ b/src/tests/unit/auto/parse_meta_device_test.cpp @@ -0,0 +1,172 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp" +#include "unit_test_utils/mocks/mock_iinfer_request.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp" +#include +#include +#include +#include +#include +#include "plugin/mock_auto_device_plugin.hpp" +#include "cpp/ie_plugin.hpp" +#include "mock_common.hpp" + +using ::testing::MatcherCast; +using ::testing::HasSubstr; +using ::testing::AllOf; +using ::testing::Throw; +using ::testing::Matches; +using ::testing::_; +using ::testing::StrEq; +using ::testing::StrNe; +using ::testing::Return; +using ::testing::Property; +using ::testing::Eq; +using ::testing::AnyNumber; +using ::testing::ReturnRef; +using ::testing::AtLeast; +using ::testing::InvokeWithoutArgs; +using Config = std::map; +using namespace MockMultiDevice; + +const char cpuFullDeviceName[] = "Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz"; +const char igpuFullDeviceName[] = "Intel(R) Gen9 HD Graphics (iGPU)"; +// const char dgpuFullDeviceName[] = "Intel(R) Iris(R) Xe MAX Graphics (dGPU)"; +const char myriadFullDeviceName[] = "Intel Movidius Myriad X VPU"; +const char vpuxFullDeviceName[] = ""; +using ConfigParams = std::tuple< + std::string, // Priority devices + std::vector, // expect metaDevices + bool // if throw exception + >; +class ParseMetaDeviceTest : public ::testing::TestWithParam { +public: + std::shared_ptr core; + std::shared_ptr plugin; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::string priorityDevices; + std::vector metaDevices; + bool throwException; + std::tie(priorityDevices, metaDevices, throwException) = obj.param; + std::ostringstream result; + result << "priorityDevices_" << priorityDevices; + if (throwException) { + result << "_throwException_true"; + } else { + result << "_throwException_false"; + } + return result.str(); + } + + void TearDown() override { + core.reset(); + plugin.reset(); + } + + void SetUp() override { + // prepare mockicore and cnnNetwork for loading + core = std::shared_ptr(new MockICore()); + auto* origin_plugin = new MockMultiDeviceInferencePlugin(); + plugin = std::shared_ptr(origin_plugin); + // replace core with mock Icore + plugin->SetCore(core); + + IE_SET_METRIC(SUPPORTED_METRICS, metrics, {METRIC_KEY(SUPPORTED_CONFIG_KEYS), METRIC_KEY(FULL_DEVICE_NAME)}); + ON_CALL(*core, GetMetric(_, StrEq(METRIC_KEY(SUPPORTED_METRICS)), _)) + .WillByDefault(RETURN_MOCK_VALUE(metrics)); + + ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_CPU), + StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(cpuFullDeviceName)); + ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_GPU), + StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(igpuFullDeviceName)); + ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_MYRIAD), + StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(myriadFullDeviceName)); + ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_KEEMBAY), + StrEq(METRIC_KEY(FULL_DEVICE_NAME)), _)).WillByDefault(Return(vpuxFullDeviceName)); + IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, otherConfigKeys, {CONFIG_KEY(DEVICE_ID)}); + IE_SET_METRIC(SUPPORTED_CONFIG_KEYS, cpuConfigKeys, {}); + ON_CALL(*core, GetMetric(HasSubstr(CommonTestUtils::DEVICE_CPU), + StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuConfigKeys)); + ON_CALL(*core, GetMetric(Not(HasSubstr(CommonTestUtils::DEVICE_CPU)), + StrEq(METRIC_KEY(SUPPORTED_CONFIG_KEYS)), _)).WillByDefault(RETURN_MOCK_VALUE(otherConfigKeys)); + ON_CALL(*core, GetConfig(_, StrEq(CONFIG_KEY(DEVICE_ID)))) + .WillByDefault(InvokeWithoutArgs([](){return "01";})); + + ON_CALL(*plugin, ParseMetaDevices).WillByDefault([this](const std::string& priorityDevices, + const std::map& config) { + return plugin->MultiDeviceInferencePlugin::ParseMetaDevices(priorityDevices, config); + }); + } + + void compare(std::vector& result, std::vector& expect) { + EXPECT_EQ(result.size(), expect.size()); + if (result.size() == expect.size()) { + for (unsigned int i = 0 ; i < result.size(); i++) { + EXPECT_EQ(result[i].deviceName, expect[i].deviceName); + EXPECT_EQ(result[i].uniqueName, expect[i].uniqueName); + EXPECT_EQ(result[i].numRequestsPerDevices, expect[i].numRequestsPerDevices); + EXPECT_EQ(result[i].defaultDeviceID, expect[i].defaultDeviceID); + } + } + } +}; + +TEST_P(ParseMetaDeviceTest, ParseMetaDevices) { + // get Parameter + std::string priorityDevices; + std::vector metaDevices; + bool throwException; + std::tie(priorityDevices, metaDevices, throwException) = this->GetParam(); + + EXPECT_CALL(*plugin, ParseMetaDevices(_, _)).Times(1); + EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AnyNumber()); + EXPECT_CALL(*core, GetConfig(_, _)).Times(AnyNumber()); + if (throwException) { + ASSERT_ANY_THROW(plugin->ParseMetaDevices(priorityDevices, {})); + } else { + auto result = plugin->ParseMetaDevices(priorityDevices, {}); + compare(result, metaDevices); + } +} + +// ConfigParams details +// example +// ConfigParams {devicePriority, expect metaDevices, ifThrowException} + +const std::vector testConfigs = { + ConfigParams {"CPU,GPU,MYRIAD,VPUX", + {{"CPU", {}, -1, "", "CPU_"}, + {"GPU", {}, -1, "01", std::string(igpuFullDeviceName) + "_01"}, + {"MYRIAD", {}, -1, "01", "MYRIAD_01"}, + {"VPUX", {}, -1, "01", "VPUX_01"}}, false}, + ConfigParams {"CPU(1),GPU(2),MYRIAD(3),VPUX(4)", + {{"CPU", {}, 1, "", "CPU_"}, + {"GPU", {}, 2, "01", std::string(igpuFullDeviceName) + "_01"}, + {"MYRIAD", {}, 3, "01", "MYRIAD_01"}, + {"VPUX", {}, 4, "01", "VPUX_01"}}, false}, + ConfigParams {"CPU(-1),GPU,MYRIAD,VPUX", {}, true}, + ConfigParams {"CPU(NA),GPU,MYRIAD,VPUX", {}, true}, + ConfigParams {"CPU.02(3),GPU.03,MYRIAD.04,VPUX.05", + {{"CPU.02", {}, 3, "", "CPU_02"}, + {"GPU.03", {}, -1, "", std::string(igpuFullDeviceName) + "_03"}, + {"MYRIAD.04", {}, -1, "", "MYRIAD_04"}, + {"VPUX.05", {}, -1, "", "VPUX_05"}}, false} + }; + + +INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, ParseMetaDeviceTest, + ::testing::ValuesIn(testConfigs), + ParseMetaDeviceTest::getTestCaseName); + +//toDo need add test for ParseMetaDevices(_, config) to check device config of +//return metaDevices diff --git a/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp b/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp index 883a5f581a8..f92bb52b187 100644 --- a/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp +++ b/src/tests/unit/auto/plugin/mock_auto_device_plugin.hpp @@ -14,7 +14,7 @@ namespace MockMultiDevice { class MockMultiDeviceInferencePlugin : public MultiDeviceInferencePlugin { public: MOCK_METHOD(DeviceInformation, SelectDevice, ((const std::vector&), - const std::string&), (override)); + const std::string&, unsigned int), (override)); MOCK_METHOD((std::vector), ParseMetaDevices, (const std::string&, (const std::map&)), (const, override)); }; diff --git a/src/tests/unit/auto/select_device_test.cpp b/src/tests/unit/auto/select_device_test.cpp new file mode 100644 index 00000000000..83d4cba279f --- /dev/null +++ b/src/tests/unit/auto/select_device_test.cpp @@ -0,0 +1,214 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_icore.hpp" +#include "unit_test_utils/mocks/mock_iinfer_request.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_ivariable_state_internal.hpp" +#include "unit_test_utils/mocks/cpp_interfaces/interface/mock_iinference_plugin.hpp" +#include +#include +#include +#include +#include +#include "plugin/mock_auto_device_plugin.hpp" +#include "cpp/ie_plugin.hpp" +#include "mock_common.hpp" + +using ::testing::MatcherCast; +using ::testing::AllOf; +using ::testing::Throw; +using ::testing::Matches; +using ::testing::_; +using ::testing::StrEq; +using ::testing::Return; +using ::testing::Property; +using ::testing::Eq; +using ::testing::ReturnRef; +using ::testing::AtLeast; +using ::testing::InvokeWithoutArgs; +using Config = std::map; +using namespace MockMultiDevice; + +using ConfigParams = std::tuple< + std::string, // netPrecision + std::vector, // metaDevices for select + DeviceInformation, // expect DeviceInformation + bool // throw exception + >; + +const DeviceInformation CPU_INFO = {CommonTestUtils::DEVICE_CPU, {}, 2, "01", "CPU_01"}; +const DeviceInformation IGPU_INFO = {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "iGPU_01"}; +const DeviceInformation DGPU_INFO = {CommonTestUtils::DEVICE_GPU, {}, 2, "01", "dGPU_01"}; +const DeviceInformation MYRIAD_INFO = {CommonTestUtils::DEVICE_MYRIAD, {}, 2, "01", "MYRIAD_01" }; +const DeviceInformation KEEMBAY_INFO = {CommonTestUtils::DEVICE_KEEMBAY, {}, 2, "01", "VPUX_01" }; +const std::vector fp32DeviceVector = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO}; +const std::vector fp16DeviceVector = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO}; +const std::vector int8DeviceVector = {KEEMBAY_INFO, CPU_INFO}; +const std::vector binDeviceVector = {DGPU_INFO, IGPU_INFO, CPU_INFO}; +// if CPU support BATCHED_BLOB ? +// currently if there are CPU in metaDevices, the selectDevice will select CPU if on other device support +//const std::vector batchedblobDeviceVector = {DGPU_INFO, IGPU_INFO}; +const std::vector batchedblobDeviceVector = {DGPU_INFO, IGPU_INFO, CPU_INFO}; +std::map> devicesMap = {{"FP32", fp32DeviceVector}, + {"FP16", fp16DeviceVector}, + {"INT8", int8DeviceVector}, + {"BIN", binDeviceVector}, + {"BATCHED_BLOB", batchedblobDeviceVector} + }; +const std::vector totalDevices = {DGPU_INFO, IGPU_INFO, MYRIAD_INFO, CPU_INFO, KEEMBAY_INFO}; +const std::vector netPrecisions = {"FP32", "FP16", "INT8", "BIN", "BATCHED_BLOB"}; +std::vector testConfigs; + +class SelectDeviceTest : public ::testing::TestWithParam { +public: + std::shared_ptr core; + std::shared_ptr plugin; + +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::string netPrecision; + std::vector devices; + DeviceInformation expect; + bool throwExcept; + std::tie(netPrecision, devices, expect, throwExcept) = obj.param; + std::ostringstream result; + result << "_netPrecision_" << netPrecision; + for (auto& item : devices) { + result << "_device_" << item.uniqueName; + } + result << "_expect_" << expect.uniqueName; + if (throwExcept) { + result << "_throwExcept_true"; + } else { + result << "_throwExcept_false"; + } + return result.str(); + } + // combine select_num devices from devices and make them to ConfigParams + // insert the ConfigParams into testConfigs + static void combine_device(const std::vector& devices, int start, + int* result, int result_index, const int select_num, std::string& netPrecision) { + int i = 0; + for (i = start; i < devices.size() + 1 - result_index; i++) { + result[result_index - 1] = i; + if (result_index - 1 == 0) { + std::vector metaDevices = {}; + for (int j = select_num - 1; j >= 0; j--) { + metaDevices.push_back(devices[result[j]]); + } + // Debug the combine_device + // for (auto& item : metaDevices) { + // std::cout << item.uniqueName << "_"; + // } + // std::cout << netPrecision << std::endl; + auto& devicesInfo = devicesMap[netPrecision]; + bool find = false; + DeviceInformation expect; + for (auto& item : devicesInfo) { + auto device = std::find_if(metaDevices.begin(), metaDevices.end(), + [&item](const DeviceInformation& d)->bool{return d.uniqueName == item.uniqueName;}); + if (device != metaDevices.end()) { + find = true; + expect = item; + break; + } + } + testConfigs.push_back(std::make_tuple(netPrecision, metaDevices, expect, !find)); + } else { + combine_device(devices, i + 1, result, result_index - 1, select_num, netPrecision); + } + } + } + + static std::vector CreateConfigs() { + auto result = new int[totalDevices.size()]; + // test all netPrecision with all possible combine devices + // netPrecision number is 5 + // device number is 5 + // combine devices is 5!/5! + 5!/(4!*1!) + 5!/(3!*2!) + 5!/(2!*3!) + 5(1!*4!) = 31 + // null device 1 + // total test config num is 32*5 = 160 + for (auto netPrecision : netPrecisions) { + for (int i = 1; i <= totalDevices.size(); i++) { + combine_device(totalDevices, 0, result, i, i, netPrecision); + } + // test null device + testConfigs.push_back(ConfigParams{netPrecision, {}, {}, true}); + } + delete []result; + return testConfigs; + } + + void compare(DeviceInformation& a, DeviceInformation& b) { + EXPECT_EQ(a.deviceName, b.deviceName); + EXPECT_EQ(a.uniqueName, b.uniqueName); + EXPECT_EQ(a.defaultDeviceID, b.defaultDeviceID); + } + + void TearDown() override { + core.reset(); + plugin.reset(); + } + + void SetUp() override { + // prepare mockicore and cnnNetwork for loading + core = std::shared_ptr(new MockICore()); + auto* origin_plugin = new MockMultiDeviceInferencePlugin(); + plugin = std::shared_ptr(origin_plugin); + // replace core with mock Icore + plugin->SetCore(core); + + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, cpuCability, {"FP32", "FP16", "INT8", "BIN"}); + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, gpuCability, {"FP32", "FP16", "BATCHED_BLOB", "BIN"}); + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, myriadCability, {"FP16"}); + IE_SET_METRIC(OPTIMIZATION_CAPABILITIES, vpuxCability, {"INT8"}); + + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_CPU), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(cpuCability)); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_GPU), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(gpuCability)); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_MYRIAD), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(myriadCability)); + ON_CALL(*core, GetMetric(StrEq(CommonTestUtils::DEVICE_KEEMBAY), + StrEq(METRIC_KEY(OPTIMIZATION_CAPABILITIES)), _)).WillByDefault(RETURN_MOCK_VALUE(vpuxCability)); + ON_CALL(*plugin, SelectDevice).WillByDefault([this](const std::vector& metaDevices, + const std::string& netPrecision, unsigned int priority) { + return plugin->MultiDeviceInferencePlugin::SelectDevice(metaDevices, netPrecision, priority); + }); + } +}; + +TEST_P(SelectDeviceTest, SelectDevice) { + // get Parameter + std::string netPrecision; + std::vector devices; + DeviceInformation expect; + bool throwExcept; + std::tie(netPrecision, devices, expect, throwExcept) = this->GetParam(); + + EXPECT_CALL(*plugin, SelectDevice(_, _, _)).Times(1); + if (devices.size() >= 1) { + EXPECT_CALL(*core, GetMetric(_, _, _)).Times(AtLeast(devices.size() - 1)); + } else { + EXPECT_CALL(*core, GetMetric(_, _, _)).Times(0); + } + + if (throwExcept) { + ASSERT_THROW(plugin->SelectDevice(devices, netPrecision, 0), InferenceEngine::Exception); + } else { + auto result = plugin->SelectDevice(devices, netPrecision, 0); + compare(result, expect); + } +} + + + +INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, SelectDeviceTest, + ::testing::ValuesIn(SelectDeviceTest::CreateConfigs()), + SelectDeviceTest::getTestCaseName); + From fa05743e01063233321f365ef253748674c1128b Mon Sep 17 00:00:00 2001 From: Anton Chetverikov Date: Thu, 16 Dec 2021 11:20:50 +0300 Subject: [PATCH 21/27] [MO]Handle new format of fp16 irs in croupconv pass (#8921) * Handle new format of fp16 irs in croupconv pass * Update condition check * Make check more explicit --- .../openvino/tools/mo/utils/ir_reader/layer_to_class.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py b/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py index bbbc429fc0d..e207478e1ae 100644 --- a/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py +++ b/tools/mo/openvino/tools/mo/utils/ir_reader/layer_to_class.py @@ -189,6 +189,14 @@ def groupconv_to_conv(op: Node): # We use add_destination method here to support case with multiple destinations of source port weights_node.in_port(0).get_source().get_connection().add_destination(op.in_port(1)) weights_node.in_port(0).disconnect() + elif weights_node.type == 'Convert' and weights_node.destination_type == 'f32'\ + and weights_node.in_port(0).get_source().node.type == 'Const': + # Support new FP16 IRs + const_node = weights_node.in_port(0).get_source().node + assert const_node.has_valid('value'), \ + 'Weights of GroupConv node {} have incorrect format'.format(op.name) + const_node.value = np.reshape(const_node.value, new_shape) + else: assert op.in_port(1).get_source().data.get_shape() == new_shape, \ 'Weight shape and calculated shape mismatch in GroupConv node {}.'.format(op.name) From 460a6634fdf8d62405969c17cb77953ffb855996 Mon Sep 17 00:00:00 2001 From: Sergey Lyubimtsev Date: Thu, 16 Dec 2021 13:01:03 +0300 Subject: [PATCH 22/27] Fix build issue for openvino wheel package on Windows (#9231) * fix build issue for openvino wheel package on Windows * revert ngraph_libs --- .../ie_bridges/python/wheel/.env.in | 15 ------- .../ie_bridges/python/wheel/CMakeLists.txt | 44 ++++++------------- .../wheel/meta/openvino-dev.requirements.txt | 28 ------------ .../python/wheel/meta/openvino-dev.setup.cfg | 22 ---------- .../wheel/meta/openvino.requirements.txt | 1 - .../python/wheel/meta/pypi_overview.md | 32 -------------- .../python/wheel/requirements-dev.txt | 2 +- .../ie_bridges/python/wheel/setup.cfg | 14 +++--- .../ie_bridges/python/wheel/setup.py | 34 +++++++------- tools/openvino_dev/setup.py | 2 +- 10 files changed, 41 insertions(+), 153 deletions(-) delete mode 100644 inference-engine/ie_bridges/python/wheel/.env.in delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt delete mode 100644 inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md diff --git a/inference-engine/ie_bridges/python/wheel/.env.in b/inference-engine/ie_bridges/python/wheel/.env.in deleted file mode 100644 index 5dc313f6b1d..00000000000 --- a/inference-engine/ie_bridges/python/wheel/.env.in +++ /dev/null @@ -1,15 +0,0 @@ -WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@ -WHEEL_VERSION=@WHEEL_VERSION@ -WHEEL_BUILD=@WHEEL_BUILD@ -WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@ -WHEEL_AUTHOR=@WHEEL_AUTHOR@ -WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@ -WHEEL_DESC=@WHEEL_DESC@ -WHEEL_LICENSE=@WHEEL_LICENSE@ -WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@ -WHEEL_OVERVIEW=@WHEEL_OVERVIEW@ - -CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@ -OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@ -TBB_LIBS_DIR=@TBB_LIBS_DIR@ -PY_PACKAGES_DIR=@PY_PACKAGES_DIR@ diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt index 53d8207c347..64728a5cd7c 100644 --- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt @@ -1,40 +1,13 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # - -set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package") -set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package") -set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package author’s name") -set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author") -set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package") -set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url") -set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url") -set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE) set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) -set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file") -set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file") -set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description") - -set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py") -set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in") -set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env") - set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}) set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib) - -if(APPLE) - set(WHEEL_PLATFORM macosx_10_15_x86_64) -elseif(UNIX) - set(WHEEL_PLATFORM manylinux2014_x86_64) -elseif(WIN32) - set(WHEEL_PLATFORM win_amd64) +if(WIN32) set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin) -else() - message(FATAL_ERROR "This platform is not supported") endif() -configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY) - if(LINUX) find_host_program(patchelf_program NAMES patchelf @@ -55,21 +28,30 @@ endforeach() execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI) execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER) +execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM) string(STRIP ${PYTHON_ABI} PYTHON_ABI) string(STRIP ${INTERPRETER} INTERPRETER) +string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM) set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl") set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels") set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}") add_custom_command(OUTPUT ${openvino_wheel_path} + COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" + COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing" COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages" - COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel + COMMAND ${CMAKE_COMMAND} -E env OPENVINO_VERSION=${IE_VERSION} + OPENVINO_VERSION=${IE_VERSION} + CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR} + OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH} + TBB_LIBS_DIR=${TBB_LIBS_DIR} + PY_PACKAGES_DIR=${PY_PACKAGES_DIR} + ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel --dist-dir ${openvino_wheels_output_dir} --build=${WHEEL_BUILD} --plat-name=${WHEEL_PLATFORM} - # COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT} - DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT} + DEPENDS ${openvino_wheel_deps} WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" COMMENT "Building Python wheel ${openvino_wheel_name}" VERBATIM) diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt deleted file mode 100644 index ccc569a0194..00000000000 --- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt +++ /dev/null @@ -1,28 +0,0 @@ -defusedxml>=0.7.1 -scipy~=1.5.4 -jstyleson~=0.0.2 -numpy>=1.16.6,<1.20 -addict>=2.4.0 -pandas~=1.1.5 -hyperopt~=0.1.2 -networkx~=2.5 -tqdm>=4.54.1 -texttable~=1.6.3 -py-cpuinfo>=7.0.0 -PyYAML>=5.4.1 -pillow>=8.1.2 -scikit-image>=0.17.2 -scikit-learn>=0.24.1 -yamlloader>=0.5 -shapely>=1.7.1 -nibabel>=3.2.1 -pydicom>=2.1.2 -sentencepiece>=0.1.95 -tokenizers>=0.10.1 -editdistance>=0.5.3 -parasail>=1.2.4 -fast-ctc-decode>=0.2.5 -rawpy>=0.16.0 -nltk>=3.5 -opencv-python==4.5.* -progress>=1.5 diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg deleted file mode 100644 index d6789c4a084..00000000000 --- a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg +++ /dev/null @@ -1,22 +0,0 @@ -[options] -py_modules = - mo - mo_tf - mo_caffe - mo_mxnet - mo_onnx - mo_kaldi - -[options.package_data] - * = * - -[options.entry_points] -console_scripts = - -[metadata] -license_files = - readme* - *LICENSE* - *license* - *third-party-programs* - *EULA* diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt deleted file mode 100644 index 63012dd1739..00000000000 --- a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt +++ /dev/null @@ -1 +0,0 @@ -numpy>=1.16.6,<1.20 diff --git a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md deleted file mode 100644 index 418a04bc16a..00000000000 --- a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md +++ /dev/null @@ -1,32 +0,0 @@ -## OpenVINOâ„¢ Toolkit - -OpenVINOâ„¢ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINOâ„¢ toolkit includes the Deep Learning Deployment Toolkit (DLDT). - -OpenVINOâ„¢ toolkit: - -- Enables CNN-based deep learning inference on the edge -- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidiusâ„¢ VPUs -- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels -- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCLâ„¢ - -Operating Systems: -- Ubuntu* 18.04 long-term support (LTS), 64-bit -- Windows* 10, 64-bit -- macOS* 10.15, 64-bit - -## Install the Runtime Package Using the PyPI Repository -1. Set up and update pip to the highest version: - ```sh - python3 -m pip install --upgrade pip - ``` -2. Install the Intel® distribution of OpenVINOâ„¢ toolkit: - ```sh - pip install openvino - ``` - -3. Verify that the package is installed: - ```sh - python3 -c "from openvino.inference_engine import IECore" - ``` - -Now you are ready to develop and run your application. \ No newline at end of file diff --git a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt index 8c4ce47c35f..b7574b392d2 100644 --- a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt +++ b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt @@ -1,3 +1,3 @@ setuptools>=53.0.0 wheel>=0.36.2 -python-decouple>=3.4 + diff --git a/inference-engine/ie_bridges/python/wheel/setup.cfg b/inference-engine/ie_bridges/python/wheel/setup.cfg index abb1790b67f..c6893c93c42 100644 --- a/inference-engine/ie_bridges/python/wheel/setup.cfg +++ b/inference-engine/ie_bridges/python/wheel/setup.cfg @@ -1,7 +1,11 @@ [metadata] license_files = - readme* - *LICENSE* - *license* - *third-party-programs* - *EULA* + readme* + *LICENSE* + *license* + *third-party-programs* + ../../../../licensing/runtime-third-party-programs.txt + ../../../../licensing/tbb_third-party-programs.txt + ../../../../licensing/onednn_third-party-programs.txt + ../../../../LICENSE + diff --git a/inference-engine/ie_bridges/python/wheel/setup.py b/inference-engine/ie_bridges/python/wheel/setup.py index 517dce7560e..eb8d573dfba 100644 --- a/inference-engine/ie_bridges/python/wheel/setup.py +++ b/inference-engine/ie_bridges/python/wheel/setup.py @@ -21,7 +21,6 @@ from setuptools import setup, find_namespace_packages, Extension from setuptools.command.build_ext import build_ext from setuptools.command.build_clib import build_clib from setuptools.command.install import install -from decouple import config WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs') WHEEL_LIBS_PACKAGE = 'openvino.libs' @@ -41,10 +40,11 @@ elif machine == 'aarch64': ARCH = 'arm64' # The following variables can be defined in environment or .env file -CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.') -OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}') -TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}') -PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}') +SCRIPT_DIR = Path(__file__).resolve().parents[0] +CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.') +OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}') +TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}') +PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}') LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path' LIB_INSTALL_CFG = { @@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms): sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin') # copy license file into the build directory -package_license = config('WHEEL_LICENSE', '') +package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE') if os.path.exists(package_license): copyfile(package_license, 'LICENSE') packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG)) package_data: typing.Dict[str, list] = {} -pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino') +pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino') ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else [] setup( - version=config('WHEEL_VERSION', '0.0.0'), - build=config('WHEEL_BUILD', '000'), - author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'), + version=os.getenv('WHEEL_VERSION', '0.0.0'), + build=os.getenv('WHEEL_BUILD', '000'), + author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'), name=pkg_name, - license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'), - author=config('WHEEL_AUTHOR', 'Intel Corporation'), - description=config('WHEEL_DESC', 'Inference Engine Python* API'), - install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')), - long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')), + license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'), + author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'), + description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'), + install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')), + long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')), long_description_content_type='text/markdown', - download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'), - url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'), + download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'), + url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'), cmdclass={ 'build': CustomBuild, 'install': CustomInstall, diff --git a/tools/openvino_dev/setup.py b/tools/openvino_dev/setup.py index a18d6991a75..44861d6a3ed 100644 --- a/tools/openvino_dev/setup.py +++ b/tools/openvino_dev/setup.py @@ -195,7 +195,7 @@ setup( author_email='openvino_pushbot@intel.com', url='https://docs.openvinotoolkit.org/latest/index.html', download_url='https://github.com/openvinotoolkit/openvino/tags', - description='OpenVINOâ„¢ Developer Package', + description='OpenVINO(TM) Development Tools', long_description=get_description(SCRIPT_DIR.parents[1] / 'docs/install_guides/pypi-openvino-dev.md'), long_description_content_type='text/markdown', classifiers=[ From 2db19e6bf7749c49c19214c5528d04328d8d71a5 Mon Sep 17 00:00:00 2001 From: Vitaliy Urusovskij Date: Thu, 16 Dec 2021 13:13:05 +0300 Subject: [PATCH 23/27] Fix c4146 warning: unary minus operator on unsigned type (#9153) --- .../template_plugin/backend/evaluates_map.cpp | 19 ++++++++++++++++++- src/core/reference/CMakeLists.txt | 4 ---- .../include/ngraph/runtime/reference/abs.hpp | 8 +++++++- .../include/ngraph/runtime/reference/max.hpp | 3 +-- .../ngraph/runtime/reference/sigmoid.hpp | 12 +++++++++++- src/core/src/op/range.cpp | 3 ++- 6 files changed, 39 insertions(+), 10 deletions(-) diff --git a/docs/template_plugin/backend/evaluates_map.cpp b/docs/template_plugin/backend/evaluates_map.cpp index a8eee7c4488..789cff5b4b1 100644 --- a/docs/template_plugin/backend/evaluates_map.cpp +++ b/docs/template_plugin/backend/evaluates_map.cpp @@ -1707,7 +1707,24 @@ bool evaluate(const shared_ptr& op, const HostTensorVector& outputs } namespace ctc_loss_v4 { -template +template ::value_type>::value && + !std::is_same::value_type, bfloat16>::value && + !std::is_same::value_type, float16>::value, + bool>::type = true> +inline void evaluate(const shared_ptr& op, + const HostTensorVector& outputs, + const HostTensorVector& inputs) { + OPENVINO_ASSERT(false, "The data type for logits is expected to be a floating point type. Got:", element::Type(t1)); +} + +template ::value_type>::value || + std::is_same::value_type, bfloat16>::value || + std::is_same::value_type, float16>::value, + bool>::type = true> inline void evaluate(const shared_ptr& op, const HostTensorVector& outputs, const HostTensorVector& inputs) { diff --git a/src/core/reference/CMakeLists.txt b/src/core/reference/CMakeLists.txt index 0827873c4d9..82efbca3095 100644 --- a/src/core/reference/CMakeLists.txt +++ b/src/core/reference/CMakeLists.txt @@ -22,10 +22,6 @@ ie_faster_build(${TARGET_NAME} UNITY PCH PRIVATE "src/precomp.hpp") -if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - target_compile_options(${TARGET_NAME} PUBLIC /wd4146) -endif() - target_compile_definitions(${TARGET_NAME} PRIVATE XBYAK_NO_OP_NAMES XBYAK64) if(NOT BUILD_SHARED_LIBS) diff --git a/src/core/reference/include/ngraph/runtime/reference/abs.hpp b/src/core/reference/include/ngraph/runtime/reference/abs.hpp index e3738d17ee0..6528ca0674b 100644 --- a/src/core/reference/include/ngraph/runtime/reference/abs.hpp +++ b/src/core/reference/include/ngraph/runtime/reference/abs.hpp @@ -5,11 +5,17 @@ #pragma once #include +#include namespace ngraph { namespace runtime { namespace reference { -template +template ::value, bool>::type = true> +void abs(const T* arg, T* out, size_t count) { + std::copy(arg, arg + count, out); +} + +template ::value, bool>::type = true> void abs(const T* arg, T* out, size_t count) { for (size_t i = 0; i < count; i++) { // TODO: generic "abs" doesn't work here for some reason. diff --git a/src/core/reference/include/ngraph/runtime/reference/max.hpp b/src/core/reference/include/ngraph/runtime/reference/max.hpp index 842e6e9a051..35dc081fbdf 100644 --- a/src/core/reference/include/ngraph/runtime/reference/max.hpp +++ b/src/core/reference/include/ngraph/runtime/reference/max.hpp @@ -16,8 +16,7 @@ namespace runtime { namespace reference { template void max(const T* arg, T* out, const Shape& in_shape, const AxisSet& reduction_axes) { - T minval = - std::numeric_limits::has_infinity ? T(-std::numeric_limits::infinity()) : std::numeric_limits::min(); + T minval = std::numeric_limits::lowest(); constexpr bool dont_keep_dims_in_output = false; const auto out_shape = reduce(in_shape, reduction_axes, dont_keep_dims_in_output); diff --git a/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp b/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp index baaf3db2add..bf9b0743675 100644 --- a/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp +++ b/src/core/reference/include/ngraph/runtime/reference/sigmoid.hpp @@ -6,11 +6,21 @@ #include #include +#include namespace ngraph { namespace runtime { namespace reference { -template +template ::value, bool>::type = true> +void sigmoid(const T* arg, T* out, size_t count) { + T exp_value; + for (size_t i = 0; i < count; i++) { + exp_value = std::exp(-static_cast::type>(arg[i])); + out[i] = 1 / (1 + exp_value); + } +} + +template ::value, bool>::type = true> void sigmoid(const T* arg, T* out, size_t count) { T exp_value; for (size_t i = 0; i < count; i++) { diff --git a/src/core/src/op/range.cpp b/src/core/src/op/range.cpp index 4d850eb7667..670ebe07faf 100644 --- a/src/core/src/op/range.cpp +++ b/src/core/src/op/range.cpp @@ -314,7 +314,8 @@ void static check_step(const op::v0::Range* node, T step) { template static typename std::enable_if::value, T>::type adjust_for_step_and_sign(T span, T step) { - return ceil_div(span < 0 ? -span : span, step < 0 ? -step : step); + return ceil_div(span < 0 ? -static_cast::type>(span) : span, + step < 0 ? -static_cast::type>(step) : step); } template From d10e8005c0ea32f748a3153623cb36baba5ebef0 Mon Sep 17 00:00:00 2001 From: Alexander Zhogov Date: Thu, 16 Dec 2021 14:33:56 +0300 Subject: [PATCH 24/27] Revert "Fix build issue for openvino wheel package on Windows (#9231)" (#9260) This reverts commit 460a6634fdf8d62405969c17cb77953ffb855996. --- .../ie_bridges/python/wheel/.env.in | 15 +++++++ .../ie_bridges/python/wheel/CMakeLists.txt | 44 +++++++++++++------ .../wheel/meta/openvino-dev.requirements.txt | 28 ++++++++++++ .../python/wheel/meta/openvino-dev.setup.cfg | 22 ++++++++++ .../wheel/meta/openvino.requirements.txt | 1 + .../python/wheel/meta/pypi_overview.md | 32 ++++++++++++++ .../python/wheel/requirements-dev.txt | 2 +- .../ie_bridges/python/wheel/setup.cfg | 14 +++--- .../ie_bridges/python/wheel/setup.py | 34 +++++++------- tools/openvino_dev/setup.py | 2 +- 10 files changed, 153 insertions(+), 41 deletions(-) create mode 100644 inference-engine/ie_bridges/python/wheel/.env.in create mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt create mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg create mode 100644 inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt create mode 100644 inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md diff --git a/inference-engine/ie_bridges/python/wheel/.env.in b/inference-engine/ie_bridges/python/wheel/.env.in new file mode 100644 index 00000000000..5dc313f6b1d --- /dev/null +++ b/inference-engine/ie_bridges/python/wheel/.env.in @@ -0,0 +1,15 @@ +WHEEL_PACKAGE_NAME=@WHEEL_PACKAGE_NAME@ +WHEEL_VERSION=@WHEEL_VERSION@ +WHEEL_BUILD=@WHEEL_BUILD@ +WHEEL_LICENCE_TYPE=@WHEEL_LICENCE_TYPE@ +WHEEL_AUTHOR=@WHEEL_AUTHOR@ +WHEEL_AUTHOR_EMAIL=@WHEEL_AUTHOR_EMAIL@ +WHEEL_DESC=@WHEEL_DESC@ +WHEEL_LICENSE=@WHEEL_LICENSE@ +WHEEL_REQUIREMENTS=@WHEEL_REQUIREMENTS@ +WHEEL_OVERVIEW=@WHEEL_OVERVIEW@ + +CMAKE_BUILD_DIR=@CMAKE_BINARY_DIR@ +OV_RUNTIME_LIBS_DIR=@IE_CPACK_RUNTIME_PATH@ +TBB_LIBS_DIR=@TBB_LIBS_DIR@ +PY_PACKAGES_DIR=@PY_PACKAGES_DIR@ diff --git a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt index 64728a5cd7c..53d8207c347 100644 --- a/inference-engine/ie_bridges/python/wheel/CMakeLists.txt +++ b/inference-engine/ie_bridges/python/wheel/CMakeLists.txt @@ -1,13 +1,40 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # + +set(WHEEL_PACKAGE_NAME "openvino" CACHE STRING "Name of the package") +set(WHEEL_LICENCE_TYPE "OSI Approved :: Apache Software License" CACHE STRING "License type for the package") +set(WHEEL_AUTHOR "Intel Corporation" CACHE STRING "Package author’s name") +set(WHEEL_AUTHOR_EMAIL "openvino_pushbot@intel.com" CACHE STRING "Email address of the package author") +set(WHEEL_DESC "Inference Engine Python* API" CACHE STRING "Short, summary description of the package") +set(WHEEL_URL "https://docs.openvinotoolkit.org/latest/index.html" CACHE STRING "Home page url") +set(WHEEL_DOWNLOAD_URL "https://github.com/openvinotoolkit/openvino/tags" CACHE STRING "Download page url") +set(WHEEL_VERSION "${IE_VERSION}" CACHE STRING "Version of this release" FORCE) set(WHEEL_BUILD "${IE_VERSION_BUILD}" CACHE STRING "Build number of this release" FORCE) +set(WHEEL_LICENSE "${CMAKE_SOURCE_DIR}/LICENSE" CACHE STRING "Wheel license file") +set(WHEEL_REQUIREMENTS "${CMAKE_CURRENT_SOURCE_DIR}/meta/openvino.requirements.txt" CACHE STRING "Wheel requirements.txt file") +set(WHEEL_OVERVIEW "${CMAKE_CURRENT_SOURCE_DIR}/meta/pypi_overview.md" CACHE STRING "Detailed description") + +set(SETUP_PY "${CMAKE_CURRENT_SOURCE_DIR}/setup.py") +set(SETUP_ENV "${CMAKE_CURRENT_SOURCE_DIR}/.env.in") +set(SETUP_ENV_OUT "${CMAKE_CURRENT_SOURCE_DIR}/.env") + set(PY_PACKAGES_DIR ${PYTHON_BRIDGE_CPACK_PATH}/${PYTHON_VERSION}) set(TBB_LIBS_DIR runtime/3rdparty/tbb/lib) -if(WIN32) + +if(APPLE) + set(WHEEL_PLATFORM macosx_10_15_x86_64) +elseif(UNIX) + set(WHEEL_PLATFORM manylinux2014_x86_64) +elseif(WIN32) + set(WHEEL_PLATFORM win_amd64) set(TBB_LIBS_DIR runtime/3rdparty/tbb/bin) +else() + message(FATAL_ERROR "This platform is not supported") endif() +configure_file(${SETUP_ENV} ${SETUP_ENV_OUT} @ONLY) + if(LINUX) find_host_program(patchelf_program NAMES patchelf @@ -28,30 +55,21 @@ endforeach() execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.bdist_wheel ; print(f'{wheel.bdist_wheel.get_abi_tag()}')" OUTPUT_VARIABLE PYTHON_ABI) execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{tags.interpreter_name()}{tags.interpreter_version()}')" OUTPUT_VARIABLE INTERPRETER) -execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import wheel.vendored.packaging.tags as tags ; print(f'{next(tags._platform_tags())}')" OUTPUT_VARIABLE WHEEL_PLATFORM) string(STRIP ${PYTHON_ABI} PYTHON_ABI) string(STRIP ${INTERPRETER} INTERPRETER) -string(STRIP ${WHEEL_PLATFORM} WHEEL_PLATFORM) set(openvino_wheel_name "openvino-${WHEEL_VERSION}-${WHEEL_BUILD}-${INTERPRETER}-${PYTHON_ABI}-${WHEEL_PLATFORM}.whl") set(openvino_wheels_output_dir "${CMAKE_BINARY_DIR}/wheels") set(openvino_wheel_path "${openvino_wheels_output_dir}/${openvino_wheel_name}") add_custom_command(OUTPUT ${openvino_wheel_path} - COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" - COMMAND ${CMAKE_COMMAND} -E copy_directory "${CMAKE_SOURCE_DIR}/licensing" "${CMAKE_BINARY_DIR}/licensing" COMMAND ${CMAKE_COMMAND} -E remove_directory "${CMAKE_CURRENT_BINARY_DIR}/site-packages" - COMMAND ${CMAKE_COMMAND} -E env OPENVINO_VERSION=${IE_VERSION} - OPENVINO_VERSION=${IE_VERSION} - CMAKE_BUILD_DIR=${CMAKE_BINARY_DIR} - OV_RUNTIME_LIBS_DIR=${IE_CPACK_RUNTIME_PATH} - TBB_LIBS_DIR=${TBB_LIBS_DIR} - PY_PACKAGES_DIR=${PY_PACKAGES_DIR} - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/setup.py" clean bdist_wheel + COMMAND ${PYTHON_EXECUTABLE} ${SETUP_PY} clean bdist_wheel --dist-dir ${openvino_wheels_output_dir} --build=${WHEEL_BUILD} --plat-name=${WHEEL_PLATFORM} - DEPENDS ${openvino_wheel_deps} + # COMMAND ${CMAKE_COMMAND} -E remove ${SETUP_ENV_OUT} + DEPENDS ${openvino_wheel_deps} ${SETUP_ENV_OUT} WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" COMMENT "Building Python wheel ${openvino_wheel_name}" VERBATIM) diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt new file mode 100644 index 00000000000..ccc569a0194 --- /dev/null +++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.requirements.txt @@ -0,0 +1,28 @@ +defusedxml>=0.7.1 +scipy~=1.5.4 +jstyleson~=0.0.2 +numpy>=1.16.6,<1.20 +addict>=2.4.0 +pandas~=1.1.5 +hyperopt~=0.1.2 +networkx~=2.5 +tqdm>=4.54.1 +texttable~=1.6.3 +py-cpuinfo>=7.0.0 +PyYAML>=5.4.1 +pillow>=8.1.2 +scikit-image>=0.17.2 +scikit-learn>=0.24.1 +yamlloader>=0.5 +shapely>=1.7.1 +nibabel>=3.2.1 +pydicom>=2.1.2 +sentencepiece>=0.1.95 +tokenizers>=0.10.1 +editdistance>=0.5.3 +parasail>=1.2.4 +fast-ctc-decode>=0.2.5 +rawpy>=0.16.0 +nltk>=3.5 +opencv-python==4.5.* +progress>=1.5 diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg new file mode 100644 index 00000000000..d6789c4a084 --- /dev/null +++ b/inference-engine/ie_bridges/python/wheel/meta/openvino-dev.setup.cfg @@ -0,0 +1,22 @@ +[options] +py_modules = + mo + mo_tf + mo_caffe + mo_mxnet + mo_onnx + mo_kaldi + +[options.package_data] + * = * + +[options.entry_points] +console_scripts = + +[metadata] +license_files = + readme* + *LICENSE* + *license* + *third-party-programs* + *EULA* diff --git a/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt new file mode 100644 index 00000000000..63012dd1739 --- /dev/null +++ b/inference-engine/ie_bridges/python/wheel/meta/openvino.requirements.txt @@ -0,0 +1 @@ +numpy>=1.16.6,<1.20 diff --git a/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md new file mode 100644 index 00000000000..418a04bc16a --- /dev/null +++ b/inference-engine/ie_bridges/python/wheel/meta/pypi_overview.md @@ -0,0 +1,32 @@ +## OpenVINOâ„¢ Toolkit + +OpenVINOâ„¢ toolkit quickly deploys applications and solutions that emulate human vision. Based on Convolutional Neural Networks (CNNs), the toolkit extends computer vision (CV) workloads across Intel® hardware, maximizing performance. The OpenVINOâ„¢ toolkit includes the Deep Learning Deployment Toolkit (DLDT). + +OpenVINOâ„¢ toolkit: + +- Enables CNN-based deep learning inference on the edge +- Supports heterogeneous execution across an Intel® CPU, Intel® Integrated Graphics, Intel® Neural Compute Stick 2, and Intel® Vision Accelerator Design with Intel® Movidiusâ„¢ VPUs +- Speeds time-to-market via an easy-to-use library of computer vision functions and pre-optimized kernels +- Includes optimized calls for computer vision standards, including OpenCV\* and OpenCLâ„¢ + +Operating Systems: +- Ubuntu* 18.04 long-term support (LTS), 64-bit +- Windows* 10, 64-bit +- macOS* 10.15, 64-bit + +## Install the Runtime Package Using the PyPI Repository +1. Set up and update pip to the highest version: + ```sh + python3 -m pip install --upgrade pip + ``` +2. Install the Intel® distribution of OpenVINOâ„¢ toolkit: + ```sh + pip install openvino + ``` + +3. Verify that the package is installed: + ```sh + python3 -c "from openvino.inference_engine import IECore" + ``` + +Now you are ready to develop and run your application. \ No newline at end of file diff --git a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt index b7574b392d2..8c4ce47c35f 100644 --- a/inference-engine/ie_bridges/python/wheel/requirements-dev.txt +++ b/inference-engine/ie_bridges/python/wheel/requirements-dev.txt @@ -1,3 +1,3 @@ setuptools>=53.0.0 wheel>=0.36.2 - +python-decouple>=3.4 diff --git a/inference-engine/ie_bridges/python/wheel/setup.cfg b/inference-engine/ie_bridges/python/wheel/setup.cfg index c6893c93c42..abb1790b67f 100644 --- a/inference-engine/ie_bridges/python/wheel/setup.cfg +++ b/inference-engine/ie_bridges/python/wheel/setup.cfg @@ -1,11 +1,7 @@ [metadata] license_files = - readme* - *LICENSE* - *license* - *third-party-programs* - ../../../../licensing/runtime-third-party-programs.txt - ../../../../licensing/tbb_third-party-programs.txt - ../../../../licensing/onednn_third-party-programs.txt - ../../../../LICENSE - + readme* + *LICENSE* + *license* + *third-party-programs* + *EULA* diff --git a/inference-engine/ie_bridges/python/wheel/setup.py b/inference-engine/ie_bridges/python/wheel/setup.py index eb8d573dfba..517dce7560e 100644 --- a/inference-engine/ie_bridges/python/wheel/setup.py +++ b/inference-engine/ie_bridges/python/wheel/setup.py @@ -21,6 +21,7 @@ from setuptools import setup, find_namespace_packages, Extension from setuptools.command.build_ext import build_ext from setuptools.command.build_clib import build_clib from setuptools.command.install import install +from decouple import config WHEEL_LIBS_INSTALL_DIR = os.path.join('openvino', 'libs') WHEEL_LIBS_PACKAGE = 'openvino.libs' @@ -40,11 +41,10 @@ elif machine == 'aarch64': ARCH = 'arm64' # The following variables can be defined in environment or .env file -SCRIPT_DIR = Path(__file__).resolve().parents[0] -CMAKE_BUILD_DIR = os.getenv('CMAKE_BUILD_DIR', '.') -OV_RUNTIME_LIBS_DIR = os.getenv('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}') -TBB_LIBS_DIR = os.getenv('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}') -PY_PACKAGES_DIR = os.getenv('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}') +CMAKE_BUILD_DIR = config('CMAKE_BUILD_DIR', '.') +OV_RUNTIME_LIBS_DIR = config('OV_RUNTIME_LIBS_DIR', f'runtime/{LIBS_DIR}/{ARCH}/{CONFIG}') +TBB_LIBS_DIR = config('TBB_LIBS_DIR', f'runtime/3rdparty/tbb/{LIBS_DIR}') +PY_PACKAGES_DIR = config('PY_PACKAGES_DIR', f'python/{PYTHON_VERSION}') LIBS_RPATH = '$ORIGIN' if sys.platform == 'linux' else '@loader_path' LIB_INSTALL_CFG = { @@ -428,28 +428,28 @@ if not any(pl in sys.platform for pl in platforms): sys.exit(f'Unsupported platform: {sys.platform}, expected: linux, win32, darwin') # copy license file into the build directory -package_license = os.getenv('WHEEL_LICENSE', SCRIPT_DIR.parents[3] / 'LICENSE') +package_license = config('WHEEL_LICENSE', '') if os.path.exists(package_license): copyfile(package_license, 'LICENSE') packages = find_namespace_packages(get_package_dir(PY_INSTALL_CFG)) package_data: typing.Dict[str, list] = {} -pkg_name = os.getenv('WHEEL_PACKAGE_NAME', 'openvino') +pkg_name = config('WHEEL_PACKAGE_NAME', 'openvino') ext_modules = find_prebuilt_extensions(get_dir_list(PY_INSTALL_CFG)) if pkg_name == 'openvino' else [] setup( - version=os.getenv('WHEEL_VERSION', '0.0.0'), - build=os.getenv('WHEEL_BUILD', '000'), - author_email=os.getenv('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'), + version=config('WHEEL_VERSION', '0.0.0'), + build=config('WHEEL_BUILD', '000'), + author_email=config('WHEEL_AUTHOR_EMAIL', 'openvino_pushbot@intel.com'), name=pkg_name, - license=os.getenv('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'), - author=os.getenv('WHEEL_AUTHOR', 'Intel(R) Corporation'), - description=os.getenv('WHEEL_DESC', 'OpenVINO(TM) Runtime'), - install_requires=get_dependencies(os.getenv('WHEEL_REQUIREMENTS', SCRIPT_DIR.parents[0] / 'requirements.txt')), - long_description=get_description(os.getenv('WHEEL_OVERVIEW', SCRIPT_DIR.parents[3] / 'docs/install_guides/pypi-openvino-rt.md')), + license=config('WHEEL_LICENCE_TYPE', 'OSI Approved :: Apache Software License'), + author=config('WHEEL_AUTHOR', 'Intel Corporation'), + description=config('WHEEL_DESC', 'Inference Engine Python* API'), + install_requires=get_dependencies(config('WHEEL_REQUIREMENTS', 'meta/openvino.requirements.txt')), + long_description=get_description(config('WHEEL_OVERVIEW', 'meta/pypi_overview.md')), long_description_content_type='text/markdown', - download_url=os.getenv('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'), - url=os.getenv('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'), + download_url=config('WHEEL_DOWNLOAD_URL', 'https://github.com/openvinotoolkit/openvino/tags'), + url=config('WHEEL_URL', 'https://docs.openvinotoolkit.org/latest/index.html'), cmdclass={ 'build': CustomBuild, 'install': CustomInstall, diff --git a/tools/openvino_dev/setup.py b/tools/openvino_dev/setup.py index 44861d6a3ed..a18d6991a75 100644 --- a/tools/openvino_dev/setup.py +++ b/tools/openvino_dev/setup.py @@ -195,7 +195,7 @@ setup( author_email='openvino_pushbot@intel.com', url='https://docs.openvinotoolkit.org/latest/index.html', download_url='https://github.com/openvinotoolkit/openvino/tags', - description='OpenVINO(TM) Development Tools', + description='OpenVINOâ„¢ Developer Package', long_description=get_description(SCRIPT_DIR.parents[1] / 'docs/install_guides/pypi-openvino-dev.md'), long_description_content_type='text/markdown', classifiers=[ From 2514c0ef38b8a46f515dbe1dd95355bd2a7652ad Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Thu, 16 Dec 2021 15:20:28 +0300 Subject: [PATCH 25/27] [GPU] Add gemm_tiled_opt i8/u8 output support (#9202) --- .../gemm/gemm_kernel_tiled_opt.cpp | 21 ++++++++++---- .../core/cl_kernels/gemm_tiled_opt.cl | 24 +++++++-------- .../tests/test_cases/fusings_gpu_test.cpp | 29 +++++++++++++++++++ 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp index 93df406663c..9f77050b46d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_tiled_opt.cpp @@ -13,6 +13,8 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const { k.EnableInputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::INT8); + k.EnableOutputDataType(Datatype::UINT8); k.EnableInputLayout(DataLayout::bfyx); k.EnableOutputLayout(DataLayout::bfyx); k.EnableInputLayout(DataLayout::bfzyx); @@ -21,6 +23,7 @@ ParamsKey GemmKernelTiledOpt::GetSupportedKey() const { k.EnableOutputLayout(DataLayout::bfwzyx); k.EnableBatching(); + k.EnableDifferentTypes(); return k; } @@ -117,25 +120,29 @@ JitConstants GemmKernelTiledOpt::GetJitConstants(const gemm_params& params) cons if (tuning_data.tile_k_size > tuning_data.simd_size) { jit.AddConstants({ MakeJitConstant("A_VEC_SIZE", tuning_data.tile_k_size / tuning_data.simd_size), - MakeJitConstant("A_FLOATN", std::string("UNIT_TYPE") + toCodeString(tuning_data.tile_k_size / tuning_data.simd_size)), + MakeJitConstant("A_FLOATN", std::string("CAT(INPUT0_TYPE, ") + toCodeString(tuning_data.tile_k_size / tuning_data.simd_size) + ")"), }); } else { jit.AddConstants({ MakeJitConstant("A_VEC_SIZE", 1), - MakeJitConstant("A_FLOATN", std::string("UNIT_TYPE")), + MakeJitConstant("A_FLOATN", std::string("INPUT0_TYPE")), }); } if (tuning_data.tile_n_size > tuning_data.simd_size) { jit.AddConstants({ MakeJitConstant("B_VEC_SIZE", b_vec_size), - MakeJitConstant("B_FLOATN", std::string("UNIT_TYPE") + toCodeString(b_vec_size)), + MakeJitConstant("B_FLOATN", std::string("CAT(INPUT1_TYPE, ") + toCodeString(b_vec_size) + ")"), + MakeJitConstant("OUTPUT_TYPE_VEC", std::string("CAT(OUTPUT_TYPE, ") + toCodeString(b_vec_size) + ")"), + MakeJitConstant("ACCUMULATOR_TYPE_VEC", std::string("CAT(ACCUMULATOR_TYPE, ") + toCodeString(b_vec_size) + ")"), }); } else { b_vec_size = 1; jit.AddConstants({ - MakeJitConstant("B_VEC_SIZE", 1), - MakeJitConstant("B_FLOATN", std::string("UNIT_TYPE")), + MakeJitConstant("B_VEC_SIZE", b_vec_size), + MakeJitConstant("B_FLOATN", std::string("INPUT1_TYPE")), + MakeJitConstant("OUTPUT_TYPE_VEC", std::string("OUTPUT_TYPE")), + MakeJitConstant("ACCUMULATOR_TYPE_VEC", std::string("ACCUMULATOR_TYPE")), }); } @@ -183,6 +190,10 @@ bool GemmKernelTiledOpt::Validate(const Params& params, const optional_params& o if ((gmm_params.transpose_input0 || gmm_params.transpose_input1) && gemm_leftovers) return false; + for (size_t i = 1; i < gmm_params.inputs.size(); i++) + if (gmm_params.inputs[0].GetDType() != gmm_params.inputs[i].GetDType()) + return false; + return true; } } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl index ae79242b369..cba34cdcf8c 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_tiled_opt.cl @@ -3,7 +3,7 @@ // #include "include/batch_headers/fetch_data.cl" -#include "include/unit_type.cl" +#include "include/batch_headers/data_types.cl" #define unroll_for __attribute__((opencl_unroll_hint)) for @@ -14,17 +14,17 @@ #endif // INPUT0_TYPE_SIZE == 4 #if TILE_K > SIMD_WIDTH - #define BLOCK_READ_A(ptr, offset) CAT(UNIT_BLOCK_READ, A_VEC_SIZE)(ptr, offset) + #define BLOCK_READ_A(ptr, offset) BLOCK_READN(INPUT0_TYPE, A_VEC_SIZE, ptr, offset) #else // TILE_K > SIMD_WIDTH - #define BLOCK_READ_A(ptr, offset) UNIT_BLOCK_READ(ptr, offset) + #define BLOCK_READ_A(ptr, offset) BLOCK_READN(INPUT0_TYPE, 1, ptr, offset) #endif // TILE_K > SIMD_WIDTH #if TILE_N > SIMD_WIDTH - #define BLOCK_READ_B(ptr, offset) CAT(UNIT_BLOCK_READ, B_VEC_SIZE)(ptr, offset) - #define BLOCK_WRITE_C(ptr, offset, data) CAT(UNIT_BLOCK_WRITE, B_VEC_SIZE)(ptr, offset, data) + #define BLOCK_READ_B(ptr, offset) BLOCK_READN(INPUT1_TYPE, B_VEC_SIZE, ptr, offset) + #define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, B_VEC_SIZE, ptr, offset, data) #else // TILE_N > SIMD_WIDTH - #define BLOCK_READ_B(ptr, offset) UNIT_BLOCK_READ(ptr, offset) - #define BLOCK_WRITE_C(ptr, offset, data) UNIT_BLOCK_WRITE(ptr, offset, data) + #define BLOCK_READ_B(ptr, offset) BLOCK_READN(INPUT1_TYPE, 1, ptr, offset) + #define BLOCK_WRITE_C(ptr, offset, data) BLOCK_WRITEN(OUTPUT_TYPE, 1, ptr, offset, data) #endif // TILE_N > SIMD_WIDTH inline uint FUNC(get_input0_batch_offset)(uint b, uint f, uint w, uint z) { @@ -294,9 +294,9 @@ KERNEL(gemm_tiled_opt)( #if TILE_N_NOT_DIVISIBLE if (b_raw_global_id < N) { #ifdef INPUT2_TYPE - OUTPUT_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_ptr[sglid]; + ACCUMULATOR_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_ptr[sglid]; #else // INPUT2_TYPE - OUTPUT_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id]; + ACCUMULATOR_TYPE dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id]; #endif // INPUT2_TYPE #if HAS_FUSED_OPS @@ -316,9 +316,9 @@ KERNEL(gemm_tiled_opt)( #ifdef INPUT2_TYPE B_FLOATN c_val = BLOCK_READ_B(c_ptr, 0); - B_FLOATN dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_val; + ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id] + TO_ACCUMULATOR_TYPE(BETA) * c_val; #else // INPUT2_TYPE - B_FLOATN dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id]; + ACCUMULATOR_TYPE_VEC dequantized = TO_ACCUMULATOR_TYPE(ALPHA) * c_tile[write_id]; #endif // INPUT2_TYPE #if HAS_FUSED_OPS @@ -327,7 +327,7 @@ KERNEL(gemm_tiled_opt)( #else // FUSED_OPS_CAN_USE_PRELOAD FUSED_OPS_VEC; #endif // FUSED_OPS_CAN_USE_PRELOAD - B_FLOATN res = FUSED_OPS_RESULT_VEC; + OUTPUT_TYPE_VEC res = FUSED_OPS_RESULT_VEC; BLOCK_WRITE_C(d_ptr, 0, res); #else // HAS_FUSED_OPS BLOCK_WRITE_C(d_ptr, 0, dequantized); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 962759bdc7c..35e4fe25e08 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -3264,6 +3264,35 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_quantize_u8, //gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 }, })); +class gemm_2in_quantize_float_in : public GemmFusingTest {}; +TEST_P(gemm_2in_quantize_float_in, basic) { + auto p = GetParam(); + create_topologies(input_layout("input0", get_input_layout(p, 0)), + input_layout("input1", get_input_layout(p, 1)), + data("in_lo", get_mem(get_per_channel_layout(p), 0)), + data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)), + data("out_lo", get_mem(get_single_element_layout(p), 0)), + data("out_hi", get_mem(get_single_element_layout(p), 255)), + gemm("gemm_prim", { "input0", "input1" }, data_types::f32), + quantize("quantize", "gemm_prim", "in_lo", "in_hi", "out_lo", "out_hi", 256, data_types::u8), + reorder("reorder_bfyx", "quantize", p.default_format, data_types::f32) + ); + + implementation_desc gemm_impl = { format::bfyx, "gemm_tiled_opt" }; + bo_fused.set_option(build_option::force_implementations({ {"gemm_prim", gemm_impl} })); + + tolerance = 1.0f; + execute(p); +} + +INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_quantize_float_in, + ::testing::ValuesIn(std::vector{ + gemm_test_params{ CASE_GEMM_2IN_FP16_1, 3, 4 }, + gemm_test_params{ CASE_GEMM_2IN_FP32_1, 3, 4 }, + gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP16_1, 3, 4 }, + gemm_test_params{ CASE_GEMM_ELTWISE_2IN_FP32_1, 3, 4 }, +})); + class gemm_2in_scale : public GemmFusingTest {}; TEST_P(gemm_2in_scale, basic) { auto p = GetParam(); From 6ddc47a7ef0a0a72e5d55511255ed7e39d46573e Mon Sep 17 00:00:00 2001 From: Vladislav Volkov Date: Thu, 16 Dec 2021 16:58:19 +0300 Subject: [PATCH 26/27] [CPU] Convert precisions on inputs/outputs (#8805) --- .../src/mkldnn_plugin/mkldnn_plugin.cpp | 31 +- .../nodes/common/cpu_convert.cpp | 616 ++++++++++++++++-- .../mkldnn_plugin/nodes/common/cpu_convert.h | 29 +- .../nodes/mkldnn_convert_node.cpp | 21 +- .../mkldnn_plugin/nodes/mkldnn_convert_node.h | 1 + .../python/tests/test_onnx/test_backend.py | 7 - .../python/tests/test_onnx/test_zoo_models.py | 6 - .../interface/ie_iplugin_internal.cpp | 16 +- .../blob_tests/set_blob.cpp | 11 +- .../single_layer_tests/conversion.cpp | 28 +- .../skip_tests_config.cpp | 24 +- .../ov_executable_network/exec_graph_info.hpp | 8 + .../plugin/shared/src/blob_tests/set_blob.cpp | 22 +- .../src/base/layer_test_utils.cpp | 84 +-- .../src/single_layer/conversion.cpp | 3 + 15 files changed, 725 insertions(+), 182 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 1c8b40aaa8b..a6bda152e23 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -504,23 +504,24 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl"); // verification of supported input - InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo(); - for (const auto &ii : _networkInputs) { + for (const auto &ii : network.getInputsInfo()) { auto input_precision = ii.second->getPrecision(); - if (input_precision != InferenceEngine::Precision::FP64 && - input_precision != InferenceEngine::Precision::FP32 && - input_precision != InferenceEngine::Precision::I32 && - input_precision != InferenceEngine::Precision::U32 && - input_precision != InferenceEngine::Precision::U16 && - input_precision != InferenceEngine::Precision::I16 && - input_precision != InferenceEngine::Precision::I8 && - input_precision != InferenceEngine::Precision::U8 && - input_precision != InferenceEngine::Precision::BF16 && - input_precision != InferenceEngine::Precision::BOOL && - input_precision != InferenceEngine::Precision::I64 && - input_precision != InferenceEngine::Precision::U64) { + + using hash_t = std::hash::type>; + + static const std::unordered_set supported_precisions = { + Precision::U8, Precision::I8, + Precision::U16, Precision::I16, + Precision::U32, Precision::I32, + Precision::U64, Precision::I64, + Precision::BF16, Precision::FP16, + Precision::FP32, Precision::FP64, + Precision::BOOL + }; + + if (!supported_precisions.count(input_precision)) { IE_THROW(NotImplemented) - << "Input image format " << input_precision << " is not supported yet..."; + << "Input image format " << input_precision << " is not supported yet..."; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp index 8763b551af9..31205ad84e3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.cpp @@ -4,27 +4,208 @@ #include "cpu_convert.h" #include "cpu_memcpy.h" -#include "utils/bfloat16.hpp" +#include +#include #include +#include +#include +#include +#include #include #include -#include +#include +using namespace MKLDNNPlugin; using namespace InferenceEngine; +using namespace dnnl::impl::cpu::x64; +using namespace dnnl::impl::utils; +using namespace Xbyak; namespace { -template -void convert(const void *srcPtr, void *dstPtr, const size_t size) { - if (std::is_same::value) { - cpu_memcpy(dstPtr, srcPtr, size*sizeof(dstType)); - } else { - const srcType *srcData = reinterpret_cast(srcPtr); - dstType *dstData = reinterpret_cast(dstPtr); +template +void convert_vec(jit_generator & gen, + const RegExp & src, + const RegExp & dst); - parallel_for(size, [&](size_t i) { - dstData[i] = static_cast(srcData[i]); +template <> +void convert_vec(jit_generator & gen, + const RegExp & src, + const RegExp & dst) { + auto const & f16vec = gen.xmm3; + auto const & f32vec = gen.ymm4; + + gen.movdqu(f16vec, gen.xword[src]); + gen.vcvtph2ps(f32vec, f16vec); + gen.vmovups(gen.yword[dst], f32vec); +} + +template <> +void convert_vec(jit_generator & gen, + const RegExp & src, + const RegExp & dst) { + auto const & f16vec = gen.xmm3; + auto const & f32vec = gen.ymm4; + + gen.vmovups(f32vec, gen.yword[src]); + gen.vcvtps2ph(f16vec, f32vec, 0); + gen.movdqu(gen.xword[dst], f16vec); +} + +class jit_convert_array : public jit_generator { + DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_convert_array) + + void generate() override { + const size_t vlen = 8u; + const size_t vlen_log2 = 3; + + auto reg_src = rax; + auto reg_dst = rbx; + auto reg_sz = rdx; + + Label tail, exit; + + preamble(); + + mov(reg_src, ptr[param1 + offsetof(args_t, src)]); + mov(reg_dst, ptr[param1 + offsetof(args_t, out)]); + mov(reg_sz, ptr[param1 + offsetof(args_t, count)]); + + xor_(rsi, rsi); + mov(r8, reg_sz); + shr(r8, vlen_log2); + + foreach(rsi, 1, r8, [&, this](const Xbyak::Reg64& idx) { + _convert_vec(*this, reg_src, reg_dst); + add(reg_src, _src_size * vlen); + add(reg_dst, _dst_size * vlen); }); + + L(tail); + + shl(rsi, vlen_log2); + sub(reg_sz, rsi); + test(reg_sz, reg_sz); + jz(exit); + + // allocate array for 8 floats on stack + sub(rsp, vlen * sizeof(float)); + mov(r8, rsp); + + vpxor(ymm4, ymm4, ymm4); + vmovups(yword[r8], ymm4); + + // Tail conversion + copy(r8, reg_src, reg_sz, _src_size); + _convert_vec(*this, r8, r8); + copy(reg_dst, r8, reg_sz, _dst_size); + + // Free the array on stack + add(rsp, vlen * sizeof(float)); + + L(exit); + + postamble(); + } + + void foreach(const Xbyak::Reg64& idx, + size_t step, + const Xbyak::Reg64& end, + std::function && fn) { + Label loop, exit; + + L(loop); + cmp(idx, end); + jge(exit); + + fn(idx); + + add(idx, step); + jmp(loop); + L(exit); + } + + void copy(const Xbyak::Reg64& dst, + const Xbyak::Reg64& src, + const Xbyak::Reg64& size, + size_t item_size) { + push(rsi); + push(r15); + + xor_(rsi, rsi); + + auto address_frame = [this](size_t size) -> const AddressFrame& { + switch (size) { + case 1: return byte; + case 2: return word; + case 4: return dword; + case 8: return qword; + default: + break; + } + return ptr; + }; + + const auto & addr_frame = address_frame(item_size); + + foreach(rsi, 1, size, [&, this](const Xbyak::Reg64& idx) { + mov(r15, addr_frame[src + idx * item_size]); + mov(addr_frame[dst + idx * item_size], r15); + }); + + pop(r15); + pop(rsi); + } + +public: + typedef struct { + const void* src; + void* out; + const size_t count; + } args_t; + + typedef void (*fn_t)(const args_t*); + + typedef void (*convert_vec_t)(jit_generator &, + const RegExp &, + const RegExp &); + + jit_convert_array(convert_vec_t convert_vec, + size_t src_size, + size_t dst_size) + : _convert_vec(convert_vec) + , _src_size(src_size) + , _dst_size(dst_size) {} + + template + static fn_t get() { + if (mayiuse(avx2) && cpu().has(util::Cpu::tF16C)) { + static jit_convert_array converter(convert_vec, sizeof(src_t), sizeof(dst_t)); + auto & generator = static_cast(converter); + generator.create_kernel(); + return (fn_t)generator.jit_ker(); + } + return nullptr; + } + +private: + convert_vec_t _convert_vec; + size_t _src_size; + size_t _dst_size; +}; + +template +void jit_convert(const TI* arg, TO* out, size_t count) { + using jit_impl = jit_convert_array; + static auto converter = jit_impl::get(); + + if (converter) { + typename jit_impl::args_t args = { arg, out, count }; + converter(&args); + } else { + for (size_t i = 0; i < count; ++i) { + out[i] = static_cast(arg[i]); + } } } @@ -35,84 +216,391 @@ struct PrecisionInfo { template <> struct PrecisionInfo { - using value_type = MKLDNNPlugin::bfloat16_t; + using value_type = bfloat16_t; }; +template <> +struct PrecisionInfo { + using value_type = ov::float16; +}; + +template <> +struct PrecisionInfo { + using value_type = uint8_t; +}; + +template::value + || std::is_same::value, + float, T>::type> +struct Range { + const std::tuple & fit(const Precision & prec); + +private: + std::tuple _range { + std::numeric_limits::lowest(), + std::numeric_limits::max() + }; +}; + +template +const std::tuple & Range::fit(const Precision & prec) { + if (prec.is_float()) { + double lbound, ubound; + switch (prec) { + case Precision::BF16: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::FP16: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::FP32: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::FP64: + lbound = std::numeric_limits::lowest(); + ubound = std::numeric_limits::max(); + break; + default: + IE_THROW() << "Unsupported precision"; + } + std::get<0>(_range) = static_cast(std::max(static_cast(std::get<0>(_range)), lbound)); + std::get<1>(_range) = static_cast(std::min(static_cast(std::get<1>(_range)), ubound)); + } else { + int64_t lbound; + uint64_t ubound; + switch (prec) { + case Precision::BOOL: + case Precision::U8: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::I8: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::U16: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::I16: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::U32: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::I32: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::U64: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + case Precision::I64: + lbound = static_cast(std::numeric_limits::lowest()); + ubound = static_cast(std::numeric_limits::max()); + break; + default: + IE_THROW() << "Unsupported precision"; + } + using ltype = typename std::conditional< + std::is_floating_point::value, + double, int64_t>::type; + using utype = typename std::conditional< + std::is_floating_point::value, + double, uint64_t>::type; + std::get<0>(_range) = static_cast(std::max(static_cast(std::get<0>(_range)), static_cast(lbound))); + std::get<1>(_range) = static_cast(std::min(static_cast(std::get<1>(_range)), static_cast(ubound))); + } + return _range; +} + struct ConvertContext { const void *srcPtr; void *dstPtr; size_t size; + Precision interimPrc; + Precision dstPrc; bool converted; + + template + std::tuple range() const { + Range r; + r.fit(interimPrc); + return r.fit(dstPrc); + } }; template -struct ConvertPrecision { - using src_t = typename std::tuple_element<0, T>::type; - using dst_t = typename std::tuple_element<1, T>::type; +struct ConvertPrecision; +template +struct ConvertPrecision> { void operator()(ConvertContext & ctx) { - convert(ctx.srcPtr, ctx.dstPtr, ctx.size); + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + src_t lbound, ubound; + std::tie(lbound, ubound) = ctx.range(); + + if (std::is_integral::value + || ctx.interimPrc.is_float() + || std::is_integral::value) { + parallel_for(ctx.size, [&](size_t i) { + dst[i] = static_cast(std::max(std::min(src[i], ubound), lbound)); + }); + } else { + parallel_for(ctx.size, [&](size_t i) { + dst[i] = static_cast(std::trunc(std::max(std::min(src[i], ubound), lbound))); + }); + } + ctx.converted = true; } }; +template<> +struct ConvertPrecision> { + void operator()(ConvertContext & ctx) { + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + + if (ctx.interimPrc.is_float()) { + parallel_for(ctx.size, [&](size_t i) { + dst[i] = static_cast(src[i]); + }); + } else { + float lbound, ubound; + std::tie(lbound, ubound) = ctx.range(); + parallel_for(ctx.size, [&](size_t i) { + dst[i] = static_cast(std::trunc(std::max(std::min(src[i], ubound), lbound))); + }); + } + + ctx.converted = true; + } +}; + +template<> +struct ConvertPrecision> { + void operator()(ConvertContext & ctx) { + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + + if (ctx.interimPrc.is_float()) { + parallel_for(ctx.size, [&](size_t i) { + dst[i] = static_cast(src[i]); + }); + } else { + float lbound, ubound; + std::tie(lbound, ubound) = ctx.range(); + parallel_for(ctx.size, [&](size_t i) { + dst[i] = std::trunc(std::max(std::min(static_cast(src[i]), ubound), lbound)); + }); + } + + ctx.converted = true; + } +}; + +template +struct ConvertPrecision> { + void operator()(ConvertContext & ctx) { + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + + constexpr size_t batch = 64; + const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch); + typedef float batch_type[batch]; + + src_t lbound, ubound; + std::tie(lbound, ubound) = ctx.range(); + + if (std::is_integral::value + || ctx.interimPrc.is_float()) { + parallel_for(iterations, [&](size_t i) { + batch_type tmp; + const size_t offset = i * batch; + const size_t current_batch_size = std::min(ctx.size - offset, batch); + for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32 + tmp[j] = static_cast(std::max(std::min(src[offset + j], ubound), lbound)); + jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 + }); + } else { + parallel_for(iterations, [&](size_t i) { + batch_type tmp; + const size_t offset = i * batch; + const size_t current_batch_size = std::min(ctx.size - offset, batch); + for (size_t j = 0; j < current_batch_size; ++j) // src_t -> fp32 + tmp[j] = static_cast(std::trunc(std::max(std::min(src[offset + j], ubound), lbound))); + jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 + }); + } + + ctx.converted = true; + } +}; + +template +struct ConvertPrecision> { + void operator()(ConvertContext & ctx) { + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + + constexpr size_t batch = 64; + const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch); + typedef float batch_type[batch]; + + float lbound, ubound; + std::tie(lbound, ubound) = ctx.range(); + + if (ctx.interimPrc.is_float() + || std::is_integral::value) { + parallel_for(iterations, [&](size_t i) { + batch_type tmp; + const size_t offset = i * batch; + const size_t current_batch_size = std::min(ctx.size - offset, batch); + jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 + for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t + dst[offset + j] = static_cast(std::max(std::min(tmp[j], ubound), lbound)); + }); + } else { + parallel_for(iterations, [&](size_t i) { + batch_type tmp; + const size_t offset = i * batch; + const size_t current_batch_size = std::min(ctx.size - offset, batch); + jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 + for (size_t j = 0; j < current_batch_size; ++j) // fp32 -> dst_t + dst[offset + j] = static_cast(std::trunc(std::max(std::min(tmp[j], ubound), lbound))); + }); + } + + ctx.converted = true; + } +}; + +template<> +struct ConvertPrecision> { + void operator()(ConvertContext & ctx) { + auto src = static_cast(ctx.srcPtr); + auto dst = static_cast(ctx.dstPtr); + + constexpr size_t batch = 64; + const size_t iterations = MKLDNNPlugin::div_up(ctx.size, batch); + typedef float batch_type[batch]; + + float lbound, ubound; + std::tie(lbound, ubound) = ctx.range(); + + if (ctx.interimPrc.is_float()) { + cpu_memcpy(dst, src, ctx.size * sizeof(ov::float16)); + } else { + parallel_for(iterations, [&](size_t i) { + batch_type tmp; + const size_t offset = i * batch; + const size_t current_batch_size = std::min(ctx.size - offset, batch); + jit_convert(src + offset, tmp, current_batch_size); // fp16 -> fp32 + for (size_t j = 0; j < current_batch_size; ++j) // truncate fp32 + tmp[j] = std::trunc(std::max(std::min(tmp[j], ubound), lbound)); + jit_convert(tmp, dst + offset, current_batch_size); // fp32 -> fp16 + }); + } + + ctx.converted = true; + } +}; + +bool isConversionTruncatesRange(const Precision & from, const Precision & to) { + return to.bitsSize() < from.bitsSize() + || (from.is_float() && !to.is_float()) // float -> integral + || (from.isSigned() != to.isSigned()) // signed <-> unsigned + || (to == Precision::BOOL && from != to); // T -> bool +} + } // namespace #define MKLDNN_CVT(ST, DT) OV_CASE2(Precision::ST, Precision::DT, PrecisionInfo::value_type, PrecisionInfo::value_type) -void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) { - using namespace MKLDNNPlugin; +#define MKLDNN_CVT_LIST \ + MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), MKLDNN_CVT(U8, U32), \ + MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), MKLDNN_CVT(U8, FP32), \ + MKLDNN_CVT(U8, FP16), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, FP64), MKLDNN_CVT(U8, BOOL), \ + MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), MKLDNN_CVT(I8, U32), \ + MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), MKLDNN_CVT(I8, FP32), \ + MKLDNN_CVT(I8, FP16), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, FP64), MKLDNN_CVT(I8, BOOL), \ + MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), MKLDNN_CVT(U16, U32), \ + MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), MKLDNN_CVT(U16, FP32), \ + MKLDNN_CVT(U16, FP16), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, FP64), MKLDNN_CVT(U16, BOOL), \ + MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), MKLDNN_CVT(I16, U32), \ + MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), MKLDNN_CVT(I16, FP32), \ + MKLDNN_CVT(I16, FP16), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, FP64), MKLDNN_CVT(I16, BOOL), \ + MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), MKLDNN_CVT(U32, I16), \ + MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), \ + MKLDNN_CVT(U32, FP16), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, FP64), MKLDNN_CVT(U32, BOOL), \ + MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), MKLDNN_CVT(I32, I16), \ + MKLDNN_CVT(I32, U32), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), MKLDNN_CVT(I32, FP32), \ + MKLDNN_CVT(I32, FP16), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, FP64), MKLDNN_CVT(I32, BOOL), \ + MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), MKLDNN_CVT(U64, I16), \ + MKLDNN_CVT(U64, U32), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), MKLDNN_CVT(U64, FP32), \ + MKLDNN_CVT(U64, FP16), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, FP64), MKLDNN_CVT(U64, BOOL), \ + MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), MKLDNN_CVT(I64, I16), \ + MKLDNN_CVT(I64, U32), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), MKLDNN_CVT(I64, FP32), \ + MKLDNN_CVT(I64, FP16), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, FP64), MKLDNN_CVT(I64, BOOL), \ + MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), MKLDNN_CVT(FP32, I16), \ + MKLDNN_CVT(FP32, U32), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), MKLDNN_CVT(FP32, I64), \ + MKLDNN_CVT(FP32, FP16), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, FP64), MKLDNN_CVT(FP32, BOOL), \ + MKLDNN_CVT(FP16, U8), MKLDNN_CVT(FP16, I8), MKLDNN_CVT(FP16, U16), MKLDNN_CVT(FP16, I16), \ + MKLDNN_CVT(FP16, U32), MKLDNN_CVT(FP16, I32), MKLDNN_CVT(FP16, U64), MKLDNN_CVT(FP16, I64), \ + MKLDNN_CVT(FP16, FP32), MKLDNN_CVT(FP16, BF16), MKLDNN_CVT(FP16, FP64), MKLDNN_CVT(FP16, BOOL), \ + MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), MKLDNN_CVT(BF16, I16), \ + MKLDNN_CVT(BF16, U32), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), MKLDNN_CVT(BF16, I64), \ + MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, FP16), MKLDNN_CVT(BF16, FP64), MKLDNN_CVT(BF16, BOOL), \ + MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), MKLDNN_CVT(FP64, I16), \ + MKLDNN_CVT(FP64, U32), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), MKLDNN_CVT(FP64, I64), \ + MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, FP16), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), \ + MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), MKLDNN_CVT(BOOL, I16), \ + MKLDNN_CVT(BOOL, U32), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), MKLDNN_CVT(BOOL, I64), \ + MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, FP16), MKLDNN_CVT(BOOL, BF16), MKLDNN_CVT(BOOL, FP64), \ + MKLDNN_CVT(U8, U8), MKLDNN_CVT(I8, I8), MKLDNN_CVT(U16, U16), MKLDNN_CVT(I16, I16), \ + MKLDNN_CVT(U32, U32), MKLDNN_CVT(I32, I32), MKLDNN_CVT(U64, U64), MKLDNN_CVT(I64, I64), \ + MKLDNN_CVT(FP32, FP32), MKLDNN_CVT(FP16, FP16), MKLDNN_CVT(BF16, BF16), MKLDNN_CVT(FP64, FP64), \ + MKLDNN_CVT(BOOL, BOOL) +void cpu_convert(const void *srcPtr, void *dstPtr, Precision srcPrc, Precision dstPrc, const size_t size) { + cpu_convert(srcPtr, dstPtr, srcPrc, dstPrc, dstPrc, size); +} + +void cpu_convert(const void *srcPtr, + void *dstPtr, + InferenceEngine::Precision srcPrc, + InferenceEngine::Precision interimPrc, + InferenceEngine::Precision dstPrc, + const size_t size) { if (srcPtr == nullptr || dstPtr == nullptr) IE_THROW() << "cpu_convert has null data pointer"; - if (srcPrc == dstPrc) { - cpu_memcpy(dstPtr, srcPtr, size*dstPrc.size()); - return; + if (srcPrc == dstPrc && srcPrc == interimPrc) { + cpu_memcpy(dstPtr, srcPtr, size * dstPrc.size()); + } else { + ConvertContext ctx = { + srcPtr, + dstPtr, + size, + interimPrc, + dstPrc, + false + }; + OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), MKLDNN_CVT_LIST); + if (!ctx.converted) + IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc; } - - ConvertContext ctx = { srcPtr, dstPtr, size, false }; - - OV_SWITCH(MKLDNNPlugin, ConvertPrecision, ctx, std::tie(srcPrc, dstPrc), - MKLDNN_CVT(U8, I8), MKLDNN_CVT(U8, U16), MKLDNN_CVT(U8, I16), - MKLDNN_CVT(U8, I32), MKLDNN_CVT(U8, U64), MKLDNN_CVT(U8, I64), - MKLDNN_CVT(U8, FP32), MKLDNN_CVT(U8, BF16), MKLDNN_CVT(U8, BOOL), - MKLDNN_CVT(I8, U8), MKLDNN_CVT(I8, U16), MKLDNN_CVT(I8, I16), - MKLDNN_CVT(I8, I32), MKLDNN_CVT(I8, U64), MKLDNN_CVT(I8, I64), - MKLDNN_CVT(I8, FP32), MKLDNN_CVT(I8, BF16), MKLDNN_CVT(I8, BOOL), - MKLDNN_CVT(U16, U8), MKLDNN_CVT(U16, I8), MKLDNN_CVT(U16, I16), - MKLDNN_CVT(U16, I32), MKLDNN_CVT(U16, U64), MKLDNN_CVT(U16, I64), - MKLDNN_CVT(U16, FP32), MKLDNN_CVT(U16, BF16), MKLDNN_CVT(U16, BOOL), - MKLDNN_CVT(I16, U8), MKLDNN_CVT(I16, I8), MKLDNN_CVT(I16, U16), - MKLDNN_CVT(I16, I32), MKLDNN_CVT(I16, U64), MKLDNN_CVT(I16, I64), - MKLDNN_CVT(I16, FP32), MKLDNN_CVT(I16, BF16), MKLDNN_CVT(I16, BOOL), - MKLDNN_CVT(I32, U8), MKLDNN_CVT(I32, I8), MKLDNN_CVT(I32, U16), - MKLDNN_CVT(I32, I16), MKLDNN_CVT(I32, U64), MKLDNN_CVT(I32, I64), - MKLDNN_CVT(I32, FP32), MKLDNN_CVT(I32, BF16), MKLDNN_CVT(I32, BOOL), - MKLDNN_CVT(U64, U8), MKLDNN_CVT(U64, I8), MKLDNN_CVT(U64, U16), - MKLDNN_CVT(U64, I16), MKLDNN_CVT(U64, I32), MKLDNN_CVT(U64, I64), - MKLDNN_CVT(U64, FP32), MKLDNN_CVT(U64, BF16), MKLDNN_CVT(U64, BOOL), - MKLDNN_CVT(I64, U8), MKLDNN_CVT(I64, I8), MKLDNN_CVT(I64, U16), - MKLDNN_CVT(I64, I16), MKLDNN_CVT(I64, I32), MKLDNN_CVT(I64, U64), - MKLDNN_CVT(I64, FP32), MKLDNN_CVT(I64, BF16), MKLDNN_CVT(I64, BOOL), - MKLDNN_CVT(FP32, U8), MKLDNN_CVT(FP32, I8), MKLDNN_CVT(FP32, U16), - MKLDNN_CVT(FP32, I16), MKLDNN_CVT(FP32, I32), MKLDNN_CVT(FP32, U64), - MKLDNN_CVT(FP32, I64), MKLDNN_CVT(FP32, BF16), MKLDNN_CVT(FP32, BOOL), - MKLDNN_CVT(BF16, U8), MKLDNN_CVT(BF16, I8), MKLDNN_CVT(BF16, U16), - MKLDNN_CVT(BF16, I16), MKLDNN_CVT(BF16, I32), MKLDNN_CVT(BF16, U64), - MKLDNN_CVT(BF16, I64), MKLDNN_CVT(BF16, FP32), MKLDNN_CVT(BF16, BOOL), - MKLDNN_CVT(BOOL, U8), MKLDNN_CVT(BOOL, I8), MKLDNN_CVT(BOOL, U16), - MKLDNN_CVT(BOOL, I16), MKLDNN_CVT(BOOL, I32), MKLDNN_CVT(BOOL, U64), - MKLDNN_CVT(BOOL, I64), MKLDNN_CVT(BOOL, FP32), MKLDNN_CVT(BOOL, BF16), - MKLDNN_CVT(FP64, U8), MKLDNN_CVT(FP64, I8), MKLDNN_CVT(FP64, U16), - MKLDNN_CVT(FP64, I16), MKLDNN_CVT(FP64, I32), MKLDNN_CVT(FP64, U64), - MKLDNN_CVT(FP64, I64), MKLDNN_CVT(FP64, FP32), MKLDNN_CVT(FP64, BF16), MKLDNN_CVT(FP64, BOOL), - MKLDNN_CVT(U32, U8), MKLDNN_CVT(U32, I8), MKLDNN_CVT(U32, U16), - MKLDNN_CVT(U32, I16), MKLDNN_CVT(U32, I32), MKLDNN_CVT(U32, U64), - MKLDNN_CVT(U32, I64), MKLDNN_CVT(U32, FP32), MKLDNN_CVT(U32, BF16), MKLDNN_CVT(U32, BOOL)); - - if (!ctx.converted) - IE_THROW() << "cpu_convert can't convert from: " << srcPrc << " precision to: " << dstPrc; } #undef MKLDNN_CVT +#undef MKLDNN_CVT_LIST diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h index dd4ef59a38b..8ed46cab7a0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/cpu_convert.h @@ -19,5 +19,32 @@ * number of elements in buffers to be converted * @return none. */ +void cpu_convert(const void *srcPtr, + void *dstPtr, + InferenceEngine::Precision srcPrc, + InferenceEngine::Precision dstPrc, + const size_t size); -void cpu_convert(const void *srcPtr, void *dstPtr, InferenceEngine::Precision srcPrc, InferenceEngine::Precision dstPrc, const size_t size); +/** + * @brief Copy size elements from buffer specified srcPtr pointer to buffer specified dstPtr. + * If the precisions srcPrc and dstPrc are different, a conversion from srcPrc to dstPrc is performed. + * @param srcPtr + * pointer to the buffer to convert from + * @param dstPtr + * pointer to the buffer to convert to + * @param srcPrc + * precision the buffer from which convert + * @param interimPrc + * intermediate precision used for type truncation + * @param dstPrc + * precision the buffer to which convert + * @param size + * number of elements in buffers to be converted + * @return none. + */ +void cpu_convert(const void *srcPtr, + void *dstPtr, + InferenceEngine::Precision srcPrc, + InferenceEngine::Precision interimPrc, + InferenceEngine::Precision dstPrc, + const size_t size); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp index 2faf969c1ea..2a20f45ff29 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp @@ -7,7 +7,8 @@ #include "common/cpu_convert.h" #include "common/blocked_desc_creator.h" #include -#include "utils/ngraph_utils.hpp" +#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -26,14 +27,17 @@ bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) { +MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) { std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Convert node with name '" + getName() + "'"; } else { IE_THROW(NotImplemented) << errorMessage; } + + auto convert = ov::as_type_ptr(op); + origPrc = details::convertPrecision(convert->get_destination_type()); } std::vector MKLDNNConvertNode::shapeInfer() const { @@ -42,7 +46,8 @@ std::vector MKLDNNConvertNode::shapeInfer() const { MKLDNNConvertNode::MKLDNNConvertNode(const Shape &shape, const InferenceEngine::Precision &inPrc, const InferenceEngine::Precision &outPrc, const std::string &nodeName, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode("Convert", nodeName, eng, cache) { + : MKLDNNNode("Convert", nodeName, eng, cache) + , origPrc(outPrc) { inputShapes.push_back(shape); addOriginalInputPrecision(inPrc); outputShapes.push_back(shape); @@ -147,7 +152,13 @@ void MKLDNNConvertNode::execute(mkldnn::stream strm) { void* srcPtr = parentMem.GetPtr(); void* dstPtr = childMem.GetPtr(); - cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount); + + cpu_convert(srcPtr, + dstPtr, + parentMem.getDesc().getPrecision(), + origPrc, + childMem.getDesc().getPrecision(), + parentPaddElemCount); } bool MKLDNNConvertNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h index bffb2447280..08042187788 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h @@ -49,6 +49,7 @@ public: private: MemoryDescPtr input; MemoryDescPtr output; + InferenceEngine::Precision origPrc; std::string errorPrefix; }; diff --git a/src/bindings/python/tests/test_onnx/test_backend.py b/src/bindings/python/tests/test_onnx/test_backend.py index a3f88676852..7385c5057ef 100644 --- a/src/bindings/python/tests/test_onnx/test_backend.py +++ b/src/bindings/python/tests/test_onnx/test_backend.py @@ -101,16 +101,9 @@ tests_expected_to_fail = [ ( xfail_issue_FLOAT_LIKE, "OnnxBackendNodeModelTest.test_cast_BFLOAT16_to_FLOAT_cpu", - "OnnxBackendNodeModelTest.test_cast_FLOAT16_to_DOUBLE_cpu", - "OnnxBackendNodeModelTest.test_cast_FLOAT16_to_FLOAT_cpu", "OnnxBackendNodeModelTest.test_cast_FLOAT_to_BFLOAT16_cpu", "OnnxBackendNodeModelTest.test_castlike_BFLOAT16_to_FLOAT_expanded_cpu", - "OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_DOUBLE_expanded_cpu", - "OnnxBackendNodeModelTest.test_castlike_FLOAT16_to_FLOAT_expanded_cpu", "OnnxBackendNodeModelTest.test_castlike_FLOAT_to_BFLOAT16_expanded_cpu", - "OnnxBackendNodeModelTest.test_max_float16_cpu", - "OnnxBackendNodeModelTest.test_min_float16_cpu", - "OnnxBackendNodeModelTest.test_mod_mixed_sign_float16_cpu", ), ( xfail_issue_49207, diff --git a/src/bindings/python/tests/test_onnx/test_zoo_models.py b/src/bindings/python/tests/test_onnx/test_zoo_models.py index ee16fa4724a..ac455d76168 100644 --- a/src/bindings/python/tests/test_onnx/test_zoo_models.py +++ b/src/bindings/python/tests/test_onnx/test_zoo_models.py @@ -171,12 +171,6 @@ if len(zoo_models) > 0: test_cases = backend_test.test_cases["OnnxBackendModelExecutionTest"] if tests.MODEL_ZOO_XFAIL: execution_xfail_list = [ - # New Python API - fp16 blob - (xfail_issue_67415, "test_MSFT_opset7_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"), - (xfail_issue_67415, "test_MSFT_opset7_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"), - (xfail_issue_67415, "test_MSFT_opset8_fp16_inception_v1_onnxzoo_lotus_inception_v1_cpu"), - (xfail_issue_67415, "test_MSFT_opset8_fp16_shufflenet_onnxzoo_lotus_shufflenet_cpu"), - # ONNX Model Zoo (xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_encoder_12_t5_encoder_cpu"), (xfail_issue_39669, "test_onnx_model_zoo_text_machine_comprehension_t5_model_t5_decoder_with_lm_head_12_t5_decoder_with_lm_head_cpu"), diff --git a/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp b/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp index a3f27eb8ea2..945f06ef6e8 100644 --- a/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp +++ b/src/inference/src/cpp_interfaces/interface/ie_iplugin_internal.cpp @@ -309,16 +309,15 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptrget_output_size()); for (const auto& param : function->get_parameters()) { - auto new_param = param->copy_with_new_inputs({}); + auto new_param = ov::as_type_ptr(param->copy_with_new_inputs({})); new_param->set_friendly_name(param->get_friendly_name()); if (add_operation_names) new_param->output(0).get_tensor().add_names({new_param->get_friendly_name()}); // WA: use CNNNetwork's precisions since plugins sometimes override their precisions // after transformation pipeline is run - new_param->set_output_type( - 0, - InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision()), - new_param->get_output_partial_shape(0)); + new_param->set_element_type( + InferenceEngine::details::convertPrecision(inputsInfo.at(new_param->get_friendly_name())->getPrecision())); + new_param->validate_and_infer_types(); const_params.emplace_back(new_param); } for (const auto& result : function->get_results()) { @@ -326,10 +325,9 @@ void IInferencePlugin::SetExeNetworkInfo(const std::shared_ptrget_output_partial_shape(0)); const std::string param_name = ngraph::op::util::create_ie_output_name(result->input_value(0)); fake_param->set_friendly_name(param_name); - fake_param->set_output_type( - 0, - InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision()), - fake_param->get_output_partial_shape(0)); + fake_param->set_element_type( + InferenceEngine::details::convertPrecision(outputsInfo.at(param_name)->getPrecision())); + fake_param->validate_and_infer_types(); auto new_result = result->copy_with_new_inputs({fake_param}); new_result->set_friendly_name(result->get_friendly_name()); if (add_operation_names) { diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp index 5e27ee86b77..641d25dacb8 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/blob_tests/set_blob.cpp @@ -8,8 +8,15 @@ using namespace BehaviorTestsDefinitions; using namespace InferenceEngine; -const std::vector precisionSet = {Precision::FP32, Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32, Precision::BOOL, - Precision::I64, Precision::U64}; +const std::vector precisionSet = { + Precision::U8, Precision::I8, + Precision::U16, Precision::I16, + Precision::U32, Precision::I32, + Precision::U64, Precision::I64, + Precision::BF16, Precision::FP16, + Precision::FP32, Precision::FP64, + Precision::BOOL +}; const std::vector typeSet = {setType::INPUT, setType::OUTPUT, setType::BOTH}; diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp index 03be6f86285..868f22db789 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/single_layer_tests/conversion.cpp @@ -18,24 +18,26 @@ const std::vector conversionOpTypes = { const std::vector> inShape = {{1, 2, 3, 4}}; const std::vector netPrecisions = { - // Ticket: 59594 - // InferenceEngine::Precision::I4, - InferenceEngine::Precision::I8, - InferenceEngine::Precision::I16, - InferenceEngine::Precision::I32, - InferenceEngine::Precision::I64, - // Ticket: 59594 - // InferenceEngine::Precision::BIN, - // InferenceEngine::Precision::BOOL, - // InferenceEngine::Precision::U4, InferenceEngine::Precision::U8, + InferenceEngine::Precision::I8, InferenceEngine::Precision::U16, - // Ticket: 59594 - // InferenceEngine::Precision::U32, + InferenceEngine::Precision::I16, + InferenceEngine::Precision::U32, + InferenceEngine::Precision::I32, InferenceEngine::Precision::U64, + InferenceEngine::Precision::I64, InferenceEngine::Precision::BF16, InferenceEngine::Precision::FP16, - InferenceEngine::Precision::FP32}; + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP64, + InferenceEngine::Precision::BOOL, + InferenceEngine::Precision::MIXED, + InferenceEngine::Precision::Q78, + InferenceEngine::Precision::U4, + InferenceEngine::Precision::I4, + InferenceEngine::Precision::BIN, + InferenceEngine::Precision::CUSTOM, +}; INSTANTIATE_TEST_SUITE_P(smoke_ConversionLayerTest, ConversionLayerTest, diff --git a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index e9d0e22bc4e..b6857ff3671 100644 --- a/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/src/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -104,17 +104,6 @@ std::vector disabledTestPatterns() { // CPU plugin does not support some precisions R"(smoke_CachingSupportCase_CPU/LoadNetworkCacheTestBase.CompareWithRefImpl/ReadConcatSplitAssign_f32_batch1_CPU)", - // CPU plugin does not support some precisions - R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(i8|u32).*)", - R"(.*Behavior.*OVExecGraphImportExportTest.*elementType=(f16).*)", - R"(.*EltwiseLayerTest.*NetType=f16.*)", - - // TODO: CVS-66526 overrides i/o precisions in execution graph - // as WA we used GetInputsInfo() precisions instead of ngraph ones - // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i16|u16).*)", - // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedFunction.*type=(i64|u64).*)", - // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i16|u16).*)", - // R"(.*smoke_BehaviorTests.*OVExecGraphImportExportTest.*importExportedIENetwork.*type=(i64|u64).*)", // CPU does not support dynamic rank // Issue: CVS-66778 @@ -168,7 +157,18 @@ std::vector disabledTestPatterns() { R"(.*CTCLossLayerCPUTest.*ctcMergeRepeated=1.*)", // Issue: 71756 R"(.*Deconv_.*D_(Blocked|DW|1x1)_.*DeconvolutionLayerCPUTest\.CompareWithRefs.*inFmts=(nChw16c|nCdhw16c)_outFmts=(nChw16c|nCdhw16c)_primitive=jit_avx512_.*Fused=Multiply\(PerChannel\)\.Add\(PerChannel\).*)", - R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)" + R"(.*smoke_GroupDeconv_(2|3)D_Blocked_BF16.*S=(\(2\.2\)|\(2\.2\.2\))_PB=(\(0\.0\)|\(0\.0\.0\))_PE=(\(0\.0\)|\(0\.0\.0\))_D=(\(1\.1\)|\(1\.1\.1\))_.*_O=64_G=4.*)", + // Issue: 72150 + R"(.*smoke_SetBlobCPU/SetBlobTest.CompareWithRefs/Type=.*_Device=CPU_PrecisionInNet=BOOL.*)", + // Issue: 59594 + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BOOL.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*MIXED.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*Q78.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*U4.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*I4.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*BIN.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*CUSTOM.*)", + R"(smoke_ConversionLayerTest/ConversionLayerTest.CompareWithRefs.*UNSPECIFIED.*)", }; #define FIX_62820 0 diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp index 65aac7c4766..2478213bd7d 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_executable_network/exec_graph_info.hpp @@ -108,11 +108,15 @@ TEST_P(OVExecGraphImportExportTest, importExportedFunction) { importedExecNet.input(0).get_tensor().get_partial_shape()); EXPECT_EQ(function->input(0).get_tensor().get_element_type(), importedExecNet.input(0).get_tensor().get_element_type()); + EXPECT_EQ(function->input(0).get_element_type(), + importedExecNet.input(0).get_tensor().get_element_type()); EXPECT_EQ(function->input(1).get_tensor().get_names(), importedExecNet.input(1).get_tensor().get_names()); EXPECT_EQ(function->input(1).get_tensor().get_partial_shape(), importedExecNet.input(1).get_tensor().get_partial_shape()); EXPECT_EQ(function->input(1).get_tensor().get_element_type(), importedExecNet.input(1).get_tensor().get_element_type()); + EXPECT_EQ(function->input(1).get_element_type(), + importedExecNet.input(1).get_tensor().get_element_type()); EXPECT_EQ(importedExecNet.input(0).get_node(), importedExecNet.input("data1").get_node()); EXPECT_NE(importedExecNet.input(1).get_node(), importedExecNet.input("data1").get_node()); EXPECT_EQ(importedExecNet.input(1).get_node(), importedExecNet.input("data2").get_node()); @@ -125,11 +129,15 @@ TEST_P(OVExecGraphImportExportTest, importExportedFunction) { importedExecNet.output(0).get_tensor().get_partial_shape()); EXPECT_EQ(function->output(0).get_tensor().get_element_type(), importedExecNet.output(0).get_tensor().get_element_type()); + EXPECT_EQ(function->output(0).get_element_type(), + importedExecNet.output(0).get_tensor().get_element_type()); EXPECT_EQ(function->output(1).get_tensor().get_names(), importedExecNet.output(1).get_tensor().get_names()); EXPECT_EQ(function->output(1).get_tensor().get_partial_shape(), importedExecNet.output(1).get_tensor().get_partial_shape()); EXPECT_EQ(function->output(1).get_tensor().get_element_type(), importedExecNet.output(1).get_tensor().get_element_type()); + EXPECT_EQ(function->output(1).get_element_type(), + importedExecNet.output(1).get_tensor().get_element_type()); EXPECT_EQ(importedExecNet.output(0).get_node(), importedExecNet.output("relu").get_node()); EXPECT_NE(importedExecNet.output(1).get_node(), importedExecNet.output("relu").get_node()); EXPECT_EQ(importedExecNet.output(1).get_node(), importedExecNet.output("concat").get_node()); diff --git a/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp b/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp index 073b78c66c5..ad8e0555683 100644 --- a/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp +++ b/src/tests/functional/plugin/shared/src/blob_tests/set_blob.cpp @@ -43,15 +43,19 @@ std::string SetBlobTest::getTestCaseName(testing::TestParamInfo o inline void fillBlob(Blob::Ptr &blob) { switch (blob->getTensorDesc().getPrecision()) { #define CASE(X) case X: CommonTestUtils::fill_data_random(blob); break; - CASE(InferenceEngine::Precision::FP32) - CASE(InferenceEngine::Precision::U8) - CASE(InferenceEngine::Precision::U16) - CASE(InferenceEngine::Precision::I8) - CASE(InferenceEngine::Precision::I16) - CASE(InferenceEngine::Precision::I64) - CASE(InferenceEngine::Precision::U64) - CASE(InferenceEngine::Precision::I32) - CASE(InferenceEngine::Precision::BOOL) + CASE(Precision::U8) + CASE(Precision::I8) + CASE(Precision::U16) + CASE(Precision::I16) + CASE(Precision::U32) + CASE(Precision::I32) + CASE(Precision::U64) + CASE(Precision::I64) + CASE(Precision::BF16) + CASE(Precision::FP16) + CASE(Precision::FP32) + CASE(Precision::FP64) + CASE(Precision::BOOL) #undef CASE default: IE_THROW() << "Can't fill blob with precision: " << blob->getTensorDesc().getPrecision(); diff --git a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp index 67dc3d5cb2c..7292ae0eb78 100644 --- a/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp +++ b/src/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp @@ -131,54 +131,54 @@ inline void callCompare(const std::pair(reinterpret_cast(expectedBuffer), - actualBuffer, size, threshold, abs_threshold); - break; - case ngraph::element::Type_t::i32: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), - actualBuffer, size, threshold, abs_threshold); - break; - case ngraph::element::Type_t::i16: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + case ngraph::element::Type_t::boolean: + case ngraph::element::Type_t::u8: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), actualBuffer, size, threshold, abs_threshold); break; case ngraph::element::Type_t::i8: LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), actualBuffer, size, threshold, abs_threshold); break; - case ngraph::element::Type_t::u64: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + case ngraph::element::Type_t::u16: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), actualBuffer, size, threshold, abs_threshold); break; + case ngraph::element::Type_t::i16: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + actualBuffer, size, threshold, abs_threshold); + break; case ngraph::element::Type_t::u32: LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), actualBuffer, size, threshold, abs_threshold); break; - case ngraph::element::Type_t::u16: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), - actualBuffer, size, threshold, abs_threshold); - break; - case ngraph::element::Type_t::boolean: - case ngraph::element::Type_t::u8: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + case ngraph::element::Type_t::i32: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), actualBuffer, size, threshold, abs_threshold); break; - case ngraph::element::Type_t::f64: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), - actualBuffer, size, threshold, abs_threshold); + case ngraph::element::Type_t::u64: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + actualBuffer, size, threshold, abs_threshold); break; - case ngraph::element::Type_t::f32: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), - actualBuffer, size, threshold, abs_threshold); + case ngraph::element::Type_t::i64: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + actualBuffer, size, threshold, abs_threshold); + break; + case ngraph::element::Type_t::bf16: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + actualBuffer, size, threshold, abs_threshold); break; case ngraph::element::Type_t::f16: LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), actualBuffer, size, threshold, abs_threshold); break; - case ngraph::element::Type_t::bf16: - LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), - actualBuffer, size, threshold, abs_threshold); + case ngraph::element::Type_t::f32: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + actualBuffer, size, threshold, abs_threshold); + break; + case ngraph::element::Type_t::f64: + LayerTestsCommon::Compare(reinterpret_cast(expectedBuffer), + actualBuffer, size, threshold, abs_threshold); break; case ngraph::element::Type_t::i4: { auto expectedOut = ngraph::helpers::convertOutputPrecision( @@ -230,14 +230,9 @@ void LayerTestsCommon::Compare(const std::pairsize(); switch (precision) { - case InferenceEngine::Precision::FP32: - callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); - break; - case InferenceEngine::Precision::I32: - callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); - break; - case InferenceEngine::Precision::I64: - callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); + case InferenceEngine::Precision::BOOL: + case InferenceEngine::Precision::U8: + callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); break; case InferenceEngine::Precision::I8: callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); @@ -248,19 +243,30 @@ void LayerTestsCommon::Compare(const std::pair(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); break; - case InferenceEngine::Precision::BOOL: - case InferenceEngine::Precision::U8: - callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); + case InferenceEngine::Precision::U32: + callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); + break; + case InferenceEngine::Precision::I32: + callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); break; case InferenceEngine::Precision::U64: callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); break; + case InferenceEngine::Precision::I64: + callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); + break; case InferenceEngine::Precision::BF16: callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); break; case InferenceEngine::Precision::FP16: callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); break; + case InferenceEngine::Precision::FP32: + callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); + break; + case InferenceEngine::Precision::FP64: + callCompare(expected, reinterpret_cast(actualBuffer), size, threshold, abs_threshold); + break; default: FAIL() << "Comparator for " << precision << " precision isn't supported"; } diff --git a/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp b/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp index 0c5e7a387a7..1320abbd362 100644 --- a/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp +++ b/src/tests/functional/shared_test_classes/src/single_layer/conversion.cpp @@ -28,6 +28,9 @@ std::string ConversionLayerTest::getTestCaseName(const testing::TestParamInfo> inputShape; From bd2e3de2953e4e379ec5cc41949a8c1af9088a49 Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Thu, 16 Dec 2021 23:48:10 +0900 Subject: [PATCH 27/27] Create LSTMCell-1 (#9073) --- .../template_plugin/backend/evaluates_map.cpp | 24 + .../template_plugin/backend/opset_int_tbl.hpp | 1 + .../functional/op_reference/lstm_cell.cpp | 589 ++++++++++++++---- 3 files changed, 477 insertions(+), 137 deletions(-) diff --git a/docs/template_plugin/backend/evaluates_map.cpp b/docs/template_plugin/backend/evaluates_map.cpp index 789cff5b4b1..c6a864f9727 100644 --- a/docs/template_plugin/backend/evaluates_map.cpp +++ b/docs/template_plugin/backend/evaluates_map.cpp @@ -1961,6 +1961,30 @@ bool evaluate(const shared_ptr& op, const HostTensorVector& out return true; } +template +bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, const HostTensorVector& inputs) { + using T = typename element_type_traits::value_type; + runtime::reference::lstm_cell(inputs[0]->get_data_ptr(), + inputs[0]->get_shape(), + inputs[1]->get_data_ptr(), + inputs[1]->get_shape(), + inputs[2]->get_data_ptr(), + inputs[2]->get_shape(), + inputs[3]->get_data_ptr(), + inputs[3]->get_shape(), + inputs[4]->get_data_ptr(), + inputs[4]->get_shape(), + inputs[5]->get_data_ptr(), + inputs[5]->get_shape(), + outputs[0]->get_data_ptr(), + outputs[1]->get_data_ptr(), + op->get_activations()[0], + op->get_activations()[1], + op->get_activations()[2], + op->get_clip()); + return true; +} + template bool evaluate(const shared_ptr& op, const HostTensorVector& outputs, const HostTensorVector& inputs) { using T = typename element_type_traits::value_type; diff --git a/docs/template_plugin/backend/opset_int_tbl.hpp b/docs/template_plugin/backend/opset_int_tbl.hpp index fe1230d79d4..287bf9a0d11 100644 --- a/docs/template_plugin/backend/opset_int_tbl.hpp +++ b/docs/template_plugin/backend/opset_int_tbl.hpp @@ -20,6 +20,7 @@ NGRAPH_OP(Gelu, op::v0) NGRAPH_OP(GRN, op::v0) NGRAPH_OP(HardSigmoid, op::v0) NGRAPH_OP(LRN, ngraph::op::v0) +NGRAPH_OP(LSTMCell, op::v0) NGRAPH_OP(MVN, ngraph::op::v0) NGRAPH_OP(NormalizeL2, op::v0) NGRAPH_OP(PriorBox, ngraph::op::v0) diff --git a/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp b/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp index c28bbf0ba94..493224da17b 100644 --- a/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp +++ b/docs/template_plugin/tests/functional/op_reference/lstm_cell.cpp @@ -4,7 +4,8 @@ #include -#include "openvino/op/lstm_cell.hpp" +#include "openvino/opsets/opset4.hpp" +#include "openvino/opsets/opset1.hpp" #include "base_reference_test.hpp" using namespace reference_tests; @@ -12,13 +13,6 @@ using namespace ov; namespace { struct LSTMCellParams { - LSTMCellParams( - int32_t batchSize, int32_t inputSize, int32_t hiddenSize, int32_t gatesCount, - const Tensor& X, const Tensor& W, const Tensor& R, const Tensor& H_t, const Tensor& C_t, const Tensor& B, - const Tensor& Ho, const Tensor& Co, const std::string& testcaseName = "") : - batchSize(batchSize), inputSize(inputSize), hiddenSize(hiddenSize), gatesCount(gatesCount), - X(X), W(W), R(R), H_t(H_t), C_t(C_t), B(B), Ho(Ho), Co(Co), testcaseName(testcaseName) {} - int32_t batchSize; int32_t inputSize; int32_t hiddenSize; @@ -34,6 +28,22 @@ struct LSTMCellParams { std::string testcaseName; }; +struct Builder : ParamsBuilder { + REFERENCE_TESTS_ADD_SET_PARAM(Builder, batchSize); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, inputSize); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, hiddenSize); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, gatesCount); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, X); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, W); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, R); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, H_t); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, C_t); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, B); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, Ho); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, Co); + REFERENCE_TESTS_ADD_SET_PARAM(Builder, testcaseName); +}; + class ReferenceLSTMCellTest : public testing::TestWithParam, public CommonReferenceTest { public: void SetUp() override { @@ -63,26 +73,24 @@ public: result << "_hoType=" << param.Ho.type; result << "_hoShape=" << param.Ho.shape; result << "_coType=" << param.Co.type; + result << "_coShape=" << param.Co.shape; if (param.testcaseName != "") { - result << "_coShape=" << param.Co.shape; result << "_=" << param.testcaseName; - } else { - result << "_coShape=" << param.Co.shape; } return result.str(); } private: static std::shared_ptr CreateFunction(const LSTMCellParams& params) { - const auto X = std::make_shared(params.X.type, params.X.shape); - const auto W = std::make_shared(params.W.type, params.W.shape); - const auto R = std::make_shared(params.R.type, params.R.shape); - const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); - const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); - const auto B = std::make_shared(params.B.type, params.B.shape); + const auto X = std::make_shared(params.X.type, params.X.shape); + const auto W = std::make_shared(params.W.type, params.W.shape); + const auto R = std::make_shared(params.R.type, params.R.shape); + const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); + const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); + const auto B = std::make_shared(params.B.type, params.B.shape); const auto lstm_cell = - std::make_shared(X, + std::make_shared(X, H_t, C_t, op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC), @@ -107,15 +115,15 @@ public: private: static std::shared_ptr CreateFunction(const LSTMCellParams& params) { - const auto X = std::make_shared(params.X.type, params.X.shape); - const auto W = std::make_shared(params.W.type, params.W.shape); - const auto R = std::make_shared(params.R.type, params.R.shape); - const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); - const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); - const auto B = std::make_shared(params.B.type, params.B.shape); + const auto X = std::make_shared(params.X.type, params.X.shape); + const auto W = std::make_shared(params.W.type, params.W.shape); + const auto R = std::make_shared(params.R.type, params.R.shape); + const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); + const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); + const auto B = std::make_shared(params.B.type, params.B.shape); const auto lstm_cell = - std::make_shared(X, + std::make_shared(X, H_t, C_t, op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC), @@ -142,15 +150,15 @@ private: static std::shared_ptr CreateFunction(const LSTMCellParams& params) { const float clip_threshold = 3.5f; - const auto X = std::make_shared(params.X.type, params.X.shape); - const auto W = std::make_shared(params.W.type, params.W.shape); - const auto R = std::make_shared(params.R.type, params.R.shape); - const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); - const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); - const auto B = std::make_shared(params.B.type, params.B.shape); + const auto X = std::make_shared(params.X.type, params.X.shape); + const auto W = std::make_shared(params.W.type, params.W.shape); + const auto R = std::make_shared(params.R.type, params.R.shape); + const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); + const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); + const auto B = std::make_shared(params.B.type, params.B.shape); const auto lstm_cell = - std::make_shared(X, + std::make_shared(X, H_t, C_t, W, @@ -179,36 +187,130 @@ TEST_P(ReferenceLSTMCellTestBiasClip, CompareWithRefs) { Exec(); } +class ReferenceLSTMCellV1Test : public ReferenceLSTMCellTest { +private: + static std::shared_ptr CreateFunction(const LSTMCellParams& params) { + const auto X = std::make_shared(params.X.type, params.X.shape); + const auto W = std::make_shared(params.W.type, params.W.shape); + const auto R = std::make_shared(params.R.type, params.R.shape); + const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); + const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); + const auto B = std::make_shared(params.B.type, params.B.shape); + + const auto lstm_cell = + std::make_shared(X, + H_t, + C_t, + op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC), + op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC), + op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC), + params.hiddenSize); + + auto function = std::make_shared(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B}); + return function; + } +}; + +class ReferenceLSTMCellV1TestBiasDefaultAttrs : public ReferenceLSTMCellTestBiasDefaultAttrs { +private: + static std::shared_ptr CreateFunction(const LSTMCellParams& params) { + const auto X = std::make_shared(params.X.type, params.X.shape); + const auto W = std::make_shared(params.W.type, params.W.shape); + const auto R = std::make_shared(params.R.type, params.R.shape); + const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); + const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); + const auto B = std::make_shared(params.B.type, params.B.shape); + + const auto lstm_cell = + std::make_shared(X, + H_t, + C_t, + op::util::convert_lstm_node_format(W, op::util::LSTMWeightsFormat::IOFC), + op::util::convert_lstm_node_format(R, op::util::LSTMWeightsFormat::IOFC), + op::util::convert_lstm_node_format(B, op::util::LSTMWeightsFormat::IOFC), + params.hiddenSize); + + auto function = std::make_shared(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B}); + return function; + } +}; + +class ReferenceLSTMCellV1TestBiasClip : public ReferenceLSTMCellTestBiasClip { +private: + static std::shared_ptr CreateFunction(const LSTMCellParams& params) { + const float clip_threshold = 3.5f; + + const auto X = std::make_shared(params.X.type, params.X.shape); + const auto W = std::make_shared(params.W.type, params.W.shape); + const auto R = std::make_shared(params.R.type, params.R.shape); + const auto H_t = std::make_shared(params.H_t.type, params.H_t.shape); + const auto C_t = std::make_shared(params.C_t.type, params.C_t.shape); + const auto B = std::make_shared(params.B.type, params.B.shape); + + const auto lstm_cell = + std::make_shared(X, + H_t, + C_t, + W, + R, + B, + params.hiddenSize, + op::LSTMWeightsFormat::IFCO, + std::vector{"sigmoid", "tanh", "tanh"}, + std::vector{}, + std::vector{}, + clip_threshold); + + auto function = std::make_shared(lstm_cell->outputs(), ParameterVector{X, H_t, C_t, W, R, B}); + return function; + } +}; + +TEST_P(ReferenceLSTMCellV1Test, CompareWithRefs) { + Exec(); +} + +TEST_P(ReferenceLSTMCellV1TestBiasDefaultAttrs, CompareWithRefs) { + Exec(); +} + +TEST_P(ReferenceLSTMCellV1TestBiasClip, CompareWithRefs) { + Exec(); +} + template std::vector generateParams() { using T = typename element_type_traits::value_type; std::vector params { - LSTMCellParams( - 2, 3, 3, 4, - Tensor(ET, {2, 3}, std::vector{ - 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}), - Tensor(ET, {4 * 3, 3}, std::vector{ - 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, - 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, - 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, - 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, - 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, - 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}), - Tensor(ET, {4 * 3, 3}, std::vector{ - 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, - 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, - 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, - 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, - 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, - 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}), - Tensor(ET, {2, 3}, std::vector{ - 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}), - Tensor(ET, {2, 3}, std::vector{ - 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}), - Tensor(ET, {4 * 3}, std::vector(4 * 3, 0.f)), - Tensor(ET, {2, 3}, std::vector{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f}), - Tensor(ET, {2, 3}, std::vector{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f}), - "lstm_cell_zero_bias_default_attrs"), + Builder {} + .batchSize(2) + .inputSize(3) + .hiddenSize(3) + .gatesCount(4) + .X(Tensor(ET, {2, 3}, std::vector{ + 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f})) + .W(Tensor(ET, {4 * 3, 3}, std::vector{ + 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, + 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, + 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, + 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, + 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, + 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f})) + .R(Tensor(ET, {4 * 3, 3}, std::vector{ + 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, + 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, + 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, + 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, + 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, + 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f})) + .H_t(Tensor(ET, {2, 3}, std::vector{ + 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f})) + .C_t(Tensor(ET, {2, 3}, std::vector{ + 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f})) + .B(Tensor(ET, {4 * 3}, std::vector(4 * 3, 0.f))) + .Ho(Tensor(ET, {2, 3}, std::vector{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f})) + .Co(Tensor(ET, {2, 3}, std::vector{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f})) + .testcaseName("lstm_cell_zero_bias_default_attrs") }; return params; } @@ -232,53 +334,56 @@ template std::vector generateParamsBiasDefaultAttrs() { using T = typename element_type_traits::value_type; std::vector params { - LSTMCellParams( - 2, 3, 3, 4, - Tensor(ET, {2, 3}, std::vector{ - 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}), - Tensor(ET, {4 * 3, 3}, std::vector{ - 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, - 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, - 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, - 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, - 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, - 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}), - Tensor(ET, {4 * 3, 3}, std::vector{ - 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, - 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, - 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, - 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, - 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, - 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}), - Tensor(ET, {2, 3}, std::vector{ - 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}), - Tensor(ET, {2, 3}, std::vector{ - 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}), - Tensor(ET, {4 * 3}, std::vector{1.07393714f, - 1.15248052f, - 1.16671345f, - 0.21450312f, - 1.2380678f, - 1.51688835f, - 0.46718366f, - 0.91810346f, - 1.1274234f, - 0.51022074f, - 1.11389844f, - 0.74174305f}), - Tensor(ET, {2, 3}, std::vector{0.81014400720596313, + Builder {} + .batchSize(2) + .inputSize(3) + .hiddenSize(3) + .gatesCount(4) + .X(Tensor(ET, {2, 3}, std::vector{ + 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f})) + .W(Tensor(ET, {4 * 3, 3}, std::vector{ + 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, + 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, + 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, + 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, + 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, + 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f})) + .R(Tensor(ET, {4 * 3, 3}, std::vector{ + 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, + 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, + 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, + 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, + 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, + 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f})) + .H_t(Tensor(ET, {2, 3}, std::vector{ + 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f})) + .C_t(Tensor(ET, {2, 3}, std::vector{ + 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f})) + .B(Tensor(ET, {4 * 3}, std::vector{1.07393714f, + 1.15248052f, + 1.16671345f, + 0.21450312f, + 1.2380678f, + 1.51688835f, + 0.46718366f, + 0.91810346f, + 1.1274234f, + 0.51022074f, + 1.11389844f, + 0.74174305f})) + .Ho(Tensor(ET, {2, 3}, std::vector{0.81014400720596313, 0.76665538549423218, 0.82509011030197144, 0.6479143500328064, 0.66586339473724365, - 0.74838578701019287}), - Tensor(ET, {2, 3}, std::vector{1.6800162792205811, + 0.74838578701019287})) + .Co(Tensor(ET, {2, 3}, std::vector{1.6800162792205811, 1.1150213479995728, 1.4578367471694946, 1.0649888515472412, 0.93761754035949707, - 1.3659683465957642}), - "lstm_cell_bias_default_attrs"), + 1.3659683465957642})) + .testcaseName("lstm_cell_bias_default_attrs"), }; return params; } @@ -302,53 +407,56 @@ template std::vector generateParamsBiasClip() { using T = typename element_type_traits::value_type; std::vector params { - LSTMCellParams( - 2, 3, 3, 4, - Tensor(ET, {2, 3}, std::vector{ - 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f}), - Tensor(ET, {4 * 3, 3}, std::vector{ - 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, - 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, - 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, - 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, - 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, - 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f}), - Tensor(ET, {4 * 3, 3}, std::vector{ - 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, - 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, - 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, - 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, - 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, - 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f}), - Tensor(ET, {2, 3}, std::vector{ - 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f}), - Tensor(ET, {2, 3}, std::vector{ - 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f}), - Tensor(ET, {4 * 3}, std::vector{1.07393714f, - 1.15248052f, - 1.16671345f, - 0.21450312f, - 1.2380678f, - 1.51688835f, - 0.46718366f, - 0.91810346f, - 1.1274234f, - 0.51022074f, - 1.11389844f, - 0.74174305f}), - Tensor(ET, {2, 3}, std::vector{0.81014400720596313, + Builder {} + .batchSize(2) + .inputSize(3) + .hiddenSize(3) + .gatesCount(4) + .X(Tensor(ET, {2, 3}, std::vector{ + 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f})) + .W(Tensor(ET, {4 * 3, 3}, std::vector{ + 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, + 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, + 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, + 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, + 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, + 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f})) + .R(Tensor(ET, {4 * 3, 3}, std::vector{ + 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, + 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, + 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, + 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, + 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, + 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f})) + .H_t(Tensor(ET, {2, 3}, std::vector{ + 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f})) + .C_t(Tensor(ET, {2, 3}, std::vector{ + 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f})) + .B(Tensor(ET, {4 * 3}, std::vector{1.07393714f, + 1.15248052f, + 1.16671345f, + 0.21450312f, + 1.2380678f, + 1.51688835f, + 0.46718366f, + 0.91810346f, + 1.1274234f, + 0.51022074f, + 1.11389844f, + 0.74174305f})) + .Ho(Tensor(ET, {2, 3}, std::vector{0.81014400720596313, 0.76665538549423218, 0.82387429475784302, 0.6479143500328064, 0.66586339473724365, - 0.74838578701019287}), - Tensor(ET, {2, 3}, std::vector{1.6800162792205811, + 0.74838578701019287})) + .Co(Tensor(ET, {2, 3}, std::vector{1.6800162792205811, 1.1150213479995728, 1.4510968923568726, 1.0649888515472412, 0.93761754035949707, - 1.3659683465957642}), - "lstm_cell_bias_clip"), + 1.3659683465957642})) + .testcaseName("lstm_cell_bias_clip"), }; return params; } @@ -376,4 +484,211 @@ INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTe INSTANTIATE_TEST_SUITE_P(smoke_LSTMCell_With_Hardcoded_Refs, ReferenceLSTMCellTestBiasClip, testing::ValuesIn(generateCombinedParamsBiasClip()), ReferenceLSTMCellTest::getTestCaseName); -} // namespace + +template +std::vector generateParamsV1() { + using T = typename element_type_traits::value_type; + std::vector params { + Builder {} + .batchSize(2) + .inputSize(3) + .hiddenSize(3) + .gatesCount(4) + .X(Tensor(ET, {2, 3}, std::vector{ + 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f})) + .W(Tensor(ET, {4 * 3, 3}, std::vector{ + 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, + 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, + 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, + 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, + 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, + 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f})) + .R(Tensor(ET, {4 * 3, 3}, std::vector{ + 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, + 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, + 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, + 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, + 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, + 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f})) + .H_t(Tensor(ET, {2, 3}, std::vector{ + 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f})) + .C_t(Tensor(ET, {2, 3}, std::vector{ + 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f})) + .B(Tensor(ET, {4 * 3}, std::vector(4 * 3, 0.f))) + .Ho(Tensor(ET, {2, 3}, std::vector{0.81457126f, 0.61109227f, 0.769522f, 0.52239674f, 0.4324641f, 0.63183f})) + .Co(Tensor(ET, {2, 3}, std::vector{1.4444952f, 0.9635685f, 1.2875274f, 0.8053419f, 0.7184521f, 0.95803297f})) + .testcaseName("lstm_cell_v1_zero_bias_default_attrs") + }; + return params; +} + +std::vector generateCombinedParamsV1() { + const std::vector> generatedParams { + generateParamsV1(), + generateParamsV1(), + generateParamsV1(), + generateParamsV1(), + }; + std::vector combinedParams; + + for (const auto& params : generatedParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +template +std::vector generateParamsBiasDefaultAttrsV1() { + using T = typename element_type_traits::value_type; + std::vector params { + Builder {} + .batchSize(2) + .inputSize(3) + .hiddenSize(3) + .gatesCount(4) + .X(Tensor(ET, {2, 3}, std::vector{ + 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f})) + .W(Tensor(ET, {4 * 3, 3}, std::vector{ + 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, + 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, + 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, + 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, + 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, + 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f})) + .R(Tensor(ET, {4 * 3, 3}, std::vector{ + 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, + 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, + 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, + 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, + 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, + 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f})) + .H_t(Tensor(ET, {2, 3}, std::vector{ + 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f})) + .C_t(Tensor(ET, {2, 3}, std::vector{ + 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f})) + .B(Tensor(ET, {4 * 3}, std::vector{1.07393714f, + 1.15248052f, + 1.16671345f, + 0.21450312f, + 1.2380678f, + 1.51688835f, + 0.46718366f, + 0.91810346f, + 1.1274234f, + 0.51022074f, + 1.11389844f, + 0.74174305f})) + .Ho(Tensor(ET, {2, 3}, std::vector{0.81014400720596313, + 0.76665538549423218, + 0.82509011030197144, + 0.6479143500328064, + 0.66586339473724365, + 0.74838578701019287})) + .Co(Tensor(ET, {2, 3}, std::vector{1.6800162792205811, + 1.1150213479995728, + 1.4578367471694946, + 1.0649888515472412, + 0.93761754035949707, + 1.3659683465957642})) + .testcaseName("lstm_cell_v1_bias_default_attrs"), + }; + return params; +} + +std::vector generateCombinedParamsBiasDefaultAttrsV1() { + const std::vector> generatedParams { + generateParamsBiasDefaultAttrsV1(), + generateParamsBiasDefaultAttrsV1(), + generateParamsBiasDefaultAttrsV1(), + generateParamsBiasDefaultAttrsV1(), + }; + std::vector combinedParams; + + for (const auto& params : generatedParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +template +std::vector generateParamsBiasClipV1() { + using T = typename element_type_traits::value_type; + std::vector params { + Builder {} + .batchSize(2) + .inputSize(3) + .hiddenSize(3) + .gatesCount(4) + .X(Tensor(ET, {2, 3}, std::vector{ + 0.81342685f, 0.84108883f, 0.8152282f, 0.46893653f, 0.0901856f, 0.37088776f})) + .W(Tensor(ET, {4 * 3, 3}, std::vector{ + 3.3330739e-01f, 3.6229487e-04f, 4.6773660e-01f, 4.3046016e-01f, 7.3950343e-02f, 3.8063636e-01f, + 9.6921772e-01f, 9.6897459e-01f, 6.2964785e-01f, 3.1134409e-01f, 8.4709978e-01f, 9.4928098e-01f, + 6.1676943e-01f, 6.6020679e-01f, 1.9072217e-01f, 8.8032126e-02f, 4.0472135e-01f, 6.8342745e-01f, + 8.3432144e-01f, 4.4928190e-01f, 7.9524308e-01f, 5.3966165e-01f, 8.5936421e-01f, 8.3136767e-01f, + 5.5125546e-02f, 4.7791195e-01f, 3.5788772e-01f, 6.7507404e-01f, 2.1716513e-01f, 2.7473119e-01f, + 3.3999152e-02f, 9.6835363e-01f, 3.7581277e-01f, 2.4026000e-01f, 6.7418844e-01f, 3.4199652e-01f})) + .R(Tensor(ET, {4 * 3, 3}, std::vector{ + 0.0987983f, 0.52032113f, 0.5848073f, 0.5356095f, 0.74497133f, 0.73260087f, + 0.1700787f, 0.45684233f, 0.1495722f, 0.42734373f, 0.4433832f, 0.25906256f, + 0.03854987f, 0.47480518f, 0.37215272f, 0.99890584f, 0.74019486f, 0.3518967f, + 0.6881257f, 0.8170279f, 0.54088944f, 0.81225616f, 0.14619833f, 0.42941234f, + 0.86843914f, 0.45967972f, 0.6237719f, 0.11074839f, 0.6029616f, 0.3149305f, + 0.46504205f, 0.5843412f, 0.8733427f, 0.7687243f, 0.07074859f, 0.39188156f})) + .H_t(Tensor(ET, {2, 3}, std::vector{ + 0.77956f, 0.5331557f, 0.04297554f, 0.7962175f, 0.7635707f, 0.11989366f})) + .C_t(Tensor(ET, {2, 3}, std::vector{ + 0.8488452f, 0.18851636f, 0.5020695f, 0.29716516f, 0.06740791f, 0.45384037f})) + .B(Tensor(ET, {4 * 3}, std::vector{1.07393714f, + 1.15248052f, + 1.16671345f, + 0.21450312f, + 1.2380678f, + 1.51688835f, + 0.46718366f, + 0.91810346f, + 1.1274234f, + 0.51022074f, + 1.11389844f, + 0.74174305f})) + .Ho(Tensor(ET, {2, 3}, std::vector{0.81014400720596313, + 0.76665538549423218, + 0.82387429475784302, + 0.6479143500328064, + 0.66586339473724365, + 0.74838578701019287})) + .Co(Tensor(ET, {2, 3}, std::vector{1.6800162792205811, + 1.1150213479995728, + 1.4510968923568726, + 1.0649888515472412, + 0.93761754035949707, + 1.3659683465957642})) + .testcaseName("lstm_cell_v1_bias_clip"), + }; + return params; +} + +std::vector generateCombinedParamsBiasClipV1() { + const std::vector> generatedParams { + generateParamsBiasClipV1(), + generateParamsBiasClipV1(), + generateParamsBiasClipV1(), + generateParamsBiasClipV1(), + }; + std::vector combinedParams; + + for (const auto& params : generatedParams) { + combinedParams.insert(combinedParams.end(), params.begin(), params.end()); + } + return combinedParams; +} + +INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1Test, + testing::ValuesIn(generateCombinedParamsV1()), ReferenceLSTMCellV1Test::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasDefaultAttrs, + testing::ValuesIn(generateCombinedParamsBiasDefaultAttrsV1()), ReferenceLSTMCellV1Test::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_LSTMCellV1_With_Hardcoded_Refs, ReferenceLSTMCellV1TestBiasClip, + testing::ValuesIn(generateCombinedParamsBiasClipV1()), ReferenceLSTMCellV1Test::getTestCaseName); +} // namespace \ No newline at end of file