diff --git a/src/plugins/intel_gpu/src/graph/broadcast.cpp b/src/plugins/intel_gpu/src/graph/broadcast.cpp index 6eb9be47488..02a57e35b93 100644 --- a/src/plugins/intel_gpu/src/graph/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/broadcast.cpp @@ -82,10 +82,13 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* static_cast(target_shape.data())); const_data.emplace(1, target_shape_tensor); ov::op::v3::shape_infer(&op, input_shapes, output_shapes, const_data); - } else { - // Pattern shape is set as second input. Even though the input is scalar, the shape should be propagaterd as dynamic - auto output_rank = input_shapes[0].size(); - output_shapes[0] = ShapeType::dynamic(std::max(output_rank, static_cast(1))); + } else if (impl_param.input_layouts.size() >= 2) { + auto input1 = impl_param.get_input_layout(1); + int output_rank = input1.get().size(); + if (input1.is_static()) { + output_rank = input1.get_dim(0); // target shape rank is set as second input. + } + output_shapes[0] = ShapeType::dynamic(std::max(output_rank, static_cast(1))); } format output_format = format::adjust_to_rank(input0_layout.format, output_shapes[0].size()); @@ -95,6 +98,88 @@ std::vector broadcast_inst::calc_output_layouts(broadcast_node const& /* template std::vector broadcast_inst::calc_output_layouts(broadcast_node const& node, const kernel_impl_params& impl_param); +std::vector broadcast_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) { + ov::PartialShape ps; + + auto orig_input_layout = orig_impl_param.get_input_layout(); + auto updated_param = orig_impl_param; + const auto& primitive = updated_param.typed_desc(); + + // Extend input dimensions with ones + auto i_layout = updated_param.input_layouts[0]; + auto o_layout = updated_param.output_layouts[0]; + + auto input_shape = i_layout.get_shape(); + auto output_shape = o_layout.get_shape(); + + if (primitive->axes_mapping.empty()) { + auto broadcastable = [&](layout a, layout b) { + auto dims_a = a.get_dims(); + auto dims_b = b.get_dims(); + size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size(); + + for (size_t i = 0; i < min_size; i++) { + if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) { + return false; + } + } + return true; + }; + + auto input_rank = input_shape.size(); + auto output_rank = output_shape.size(); + + if (!broadcastable(i_layout, o_layout)) { + input_shape.insert(input_shape.begin(), output_rank - input_rank, 1ul); + } + } else { + // If axis_mapping is specified, then ones are inserted according to it. + ov::Shape tmp_shape; + int prev_axis = -1; + int next_axis = -1; + size_t currentRank = 0; + int axe_idx = 0; + for (auto& axis : primitive->axes_mapping) { + prev_axis = next_axis; + next_axis = static_cast(axis); + + int ones_count = std::max(next_axis - prev_axis - 1, 0); + tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul); + tmp_shape.push_back(input_shape[axe_idx]); // Consider the Broadcast kernel 'broadcast' input to output shape + + currentRank += ones_count + 1; + axe_idx += 1; + } + + // insert 1 to match with output shape + if (o_layout.get_rank() > tmp_shape.size()) { + tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul); + } + input_shape = tmp_shape; + } + + ps = ov::PartialShape(input_shape); + + + if (ps.size() < 4) { + ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1)); + } + + layout l(ps, data_types::i32, format::get_default_format(ps.size())); + return l.transform(format::bfwzyx).to_shape(); +} + +std::vector broadcast_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) { + ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape(); + + if (ps.size() < 4) { + ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1)); + } + + layout l(ps, data_types::i32, format::get_default_format(ps.size())); + return l.transform(format::bfwzyx).to_shape(); +} + std::string broadcast_inst::to_string(broadcast_node const& node) { auto desc = node.get_primitive(); auto node_info = node.desc_to_json(); diff --git a/src/plugins/intel_gpu/src/graph/gemm.cpp b/src/plugins/intel_gpu/src/graph/gemm.cpp index 7f22c554179..87306eb5b5f 100644 --- a/src/plugins/intel_gpu/src/graph/gemm.cpp +++ b/src/plugins/intel_gpu/src/graph/gemm.cpp @@ -111,6 +111,28 @@ std::vector gemm_inst::calc_output_layouts(gemm_node const& /*node*/, co template std::vector gemm_inst::calc_output_layouts(gemm_node const& node, const kernel_impl_params& impl_param); +std::vector gemm_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) { + ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape(); + + if (ps.size() < 4) { + ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1)); + } + + layout l(ps, data_types::i32, format::get_default_format(ps.size())); + return l.transform(format::bfwzyx).to_shape(); +} + +std::vector gemm_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) { + ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape(); + + if (ps.size() < 4) { + ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1)); + } + + layout l(ps, data_types::i32, format::get_default_format(ps.size())); + return l.transform(format::bfwzyx).to_shape(); +} + std::vector gemm_inst::transform_input_layouts(const std::shared_ptr primitive, const std::vector& input_layouts, const layout& output_layout) { diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp index 0891739133f..f801aa9f6f5 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/broadcast.cpp @@ -49,6 +49,104 @@ struct broadcast_impl : typed_primitive_impl_ocl { } } + // Extend input dimensions with ones + auto i_layout = impl_param.input_layouts[0]; + auto o_layout = impl_param.output_layouts[0]; + if (i_layout.is_static() && o_layout.is_static()) { + auto data_shape = i_layout.get_shape(); + auto output_shape = o_layout.get_shape(); + + if (primitive->axes_mapping.empty()) { + auto broadcastable = [&](layout a, layout b) { + auto dims_a = a.get_dims(); + auto dims_b = b.get_dims(); + size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size(); + + for (size_t i = 0; i < min_size; i++) { + if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) { + return false; + } + } + return true; + }; + + auto input_rank = data_shape.size(); + auto output_rank = output_shape.size(); + + if (!broadcastable(i_layout, o_layout)) { + data_shape.insert(data_shape.begin(), output_rank - input_rank, 1ul); + } + } else { + // If axis_mapping is specified, then ones are inserted according to it. + ov::Shape tmp_shape; + int prev_axis = -1; + int next_axis = -1; + size_t currentRank = 0; + int axe_idx = 0; + for (auto& axis : primitive->axes_mapping) { + prev_axis = next_axis; + next_axis = static_cast(axis); + + int ones_count = std::max(next_axis - prev_axis - 1, 0); + tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul); + tmp_shape.push_back(data_shape[axe_idx]); // Consider the Broadcast kernel 'broadcast' input to output shape + + currentRank += ones_count + 1; + axe_idx += 1; + } + + if (o_layout.get_rank() > tmp_shape.size()) { + tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul); + } + data_shape = tmp_shape; + } + + layout new_layout = i_layout; + new_layout.format = format::adjust_to_rank(i_layout.format, data_shape.size()); + new_layout.set_partial_shape(data_shape); + params.inputs[0] = convert_data_tensor(new_layout); + } else { + // dynamic input + if (primitive->axes_mapping.empty()) { + ov::PartialShape i_shape = i_layout.get_partial_shape(); + ov::PartialShape o_shape = o_layout.get_partial_shape(); + + auto i_rank = i_shape.size(); + auto o_rank = o_shape.size(); + i_shape.insert(i_shape.begin(), o_rank - i_rank, 1ul); + + layout new_layout = i_layout; + new_layout.format = format::adjust_to_rank(i_layout.format, i_shape.size()); + new_layout.set_partial_shape(i_shape); + params.inputs[0] = convert_data_tensor(new_layout); + } else { + // insert 1 to extend dimensions by axes_mapping + ov::Shape tmp_shape; + size_t idx = 0; + for (auto& axis : primitive->axes_mapping) { + if (idx == axis) { + tmp_shape.insert(tmp_shape.begin() + idx, 1, -1); + idx += 1; + } else { + tmp_shape.insert(tmp_shape.begin() + idx, axis - idx, 1); + idx = axis; + tmp_shape.insert(tmp_shape.begin() + idx, 1, -1); + idx += 1; + } + } + + // insert 1 to match with output shape + if (o_layout.get_rank() > tmp_shape.size()) { + tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul); + } + + layout new_layout = i_layout; + new_layout.format = format::adjust_to_rank(i_layout.format, tmp_shape.size()); + new_layout.set_partial_shape(tmp_shape); + params.inputs[0] = convert_data_tensor(new_layout); + } + } + return {params, optional_params}; } diff --git a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h index 9b339aadd5c..9ef379f409c 100644 --- a/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/broadcast_inst.h @@ -37,6 +37,8 @@ public: template static std::vector calc_output_layouts(broadcast_node const& /*node*/, const kernel_impl_params& impl_param); static layout calc_output_layout(broadcast_node const& node, kernel_impl_params const& impl_param); + static std::vector extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx); + static std::vector extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx); static std::string to_string(broadcast_node const& node); typed_primitive_inst(network& network, broadcast_node const& node); }; diff --git a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h index 7998d5a3415..85d01e50357 100644 --- a/src/plugins/intel_gpu/src/graph/include/gemm_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/gemm_inst.h @@ -32,6 +32,8 @@ public: template static std::vector calc_output_layouts(gemm_node const& /*node*/, const kernel_impl_params& impl_param); static layout calc_output_layout(gemm_node const& node, kernel_impl_params const& impl_param); + static std::vector extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx); + static std::vector extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx); static std::string to_string(gemm_node const& node); static std::vector transform_input_layouts(const std::shared_ptr primitive, diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h index 334a3390fd4..31e4e4e24b9 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_inst.h @@ -406,6 +406,26 @@ public: return std::move(orig_impl_param); } + static std::vector extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) { + ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape(); + + if (ps.size() < 4) { + ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1)); + } + layout l(ps, data_types::i32, format::get_default_format(ps.size())); + return l.transform(format::bfwzyx).to_shape(); + } + + static std::vector extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) { + ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape(); + + if (ps.size() < 4) { + ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1)); + } + layout l(ps, data_types::i32, format::get_default_format(ps.size())); + return l.transform(format::bfwzyx).to_shape(); + } + typed_primitive_inst_base(network& network, typed_node const& node) : typed_primitive_inst_base(network, node, do_allocate_memory(node)) {} diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type.h b/src/plugins/intel_gpu/src/graph/include/primitive_type.h index 5d3bf4b0a99..449d36e59e9 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type.h @@ -44,6 +44,8 @@ struct primitive_type { virtual layout calc_output_layout(const program_node& node, const kernel_impl_params& params) const = 0; virtual std::vector calc_output_layouts(const program_node& node, const kernel_impl_params& impl_param) const = 0; virtual kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const = 0; + virtual std::vector extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) const = 0; + virtual std::vector extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) const = 0; virtual std::string to_string(const program_node& node) const = 0; }; } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h index 3b0f8b2c658..3f1b9ef9f29 100644 --- a/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h +++ b/src/plugins/intel_gpu/src/graph/include/primitive_type_base.h @@ -89,6 +89,12 @@ struct primitive_type_base : primitive_type { kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const override { return typed_primitive_inst::get_fake_aligned_params(orig_impl_param); } + std::vector extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) const override { + return typed_primitive_inst::extend_input_shape_to_6d(orig_impl_param, input_idx); + } + std::vector extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) const override { + return typed_primitive_inst::extend_output_shape_to_6d(orig_impl_param, output_idx); + } std::string to_string(const cldnn::program_node& node) const override { OPENVINO_ASSERT(node.type() == this, "[GPU] primitive_type_base::to_string: primitive type mismatch"); return typed_primitive_inst::to_string(node); diff --git a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h index 5d6a8dfd381..fd8375838b1 100644 --- a/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/shape_of_inst.h @@ -35,6 +35,8 @@ public: template static std::vector calc_output_layouts(shape_of_node const& /*node*/, const kernel_impl_params& impl_param); static layout calc_output_layout(shape_of_node const& node, kernel_impl_params const& impl_param); + static std::vector extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx); + static std::vector extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx); static std::string to_string(shape_of_node const& node); typed_primitive_inst(network& network, shape_of_node const& node); diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index 30f8f4267bc..3aa1a6b4fa1 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -267,21 +267,6 @@ void primitive_inst::realloc_if_needed() { void primitive_inst::update_impl() { GPU_DEBUG_PROFILED_STAGE(instrumentation::pipeline_stage::update_implementation); auto prev_impl_str = _impl != nullptr ? _impl->get_kernel_name() : "nullptr"; - auto extend_to_6d = [this](ov::PartialShape ps) -> std::vector { - // For shape_of we extend shape with 1-s to 6d rank to make kernel simpler - if (_node->is_type()) { - ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1)); - return ps.to_shape(); - } - if (ps.size() < 4) { - if (_node->is_type()) - ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1)); - else - ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1)); - } - layout l(ps, data_types::i32, format::get_default_format(ps.size())); - return l.transform(format::bfwzyx).to_shape(); - }; auto get_layout_key = [&](const kernel_impl_params& params) -> size_t { size_t seed = 0; @@ -301,21 +286,23 @@ void primitive_inst::update_impl() { return seed; }; - auto update_shape_info = [this, extend_to_6d, prev_impl_str](const kernel_impl_params& params) { + auto update_shape_info = [this, prev_impl_str](const kernel_impl_params& params) { mem_lock lock(_shape_info_memory, _network.get_stream()); size_t offset = 0; for (size_t i = 0; i < _node->get_dependencies().size(); i++) { if (_node->get_dependency(i).get_output_layout().is_dynamic()) { - auto input_shape = extend_to_6d(params.get_input_layout(i).get_partial_shape()); + auto input_shape = _node->type()->extend_input_shape_to_6d(params, i); for (size_t j = 0; j < input_shape.size(); j++) lock[offset++] = static_cast(input_shape[j]); } } - if (_node->get_output_layout().is_dynamic()) { - auto output_shape = extend_to_6d(params.get_output_layout().get_partial_shape()); - for (size_t j = 0; j < output_shape.size(); j++) - lock[offset++] = static_cast(output_shape[j]); + for (size_t i = 0; i < _node->get_output_layouts().size(); i++) { + if (_node->get_output_layout(i).is_dynamic()) { + auto output_shape = _node->type()->extend_output_shape_to_6d(params, i); + for (size_t j = 0; j < output_shape.size(); j++) + lock[offset++] = static_cast(output_shape[j]); + } } std::stringstream s; s << "shapes: "; @@ -365,6 +352,7 @@ void primitive_inst::update_impl() { _impl = _dynamic_impl->clone(); _impl->update_dispatch_data(updated_params); + update_shape_info(updated_params); } else { _impl = _node->type()->choose_impl(*_node, updated_params); diff --git a/src/plugins/intel_gpu/src/graph/shape_of.cpp b/src/plugins/intel_gpu/src/graph/shape_of.cpp index 4a54e6e0ebc..88742eeaeb9 100644 --- a/src/plugins/intel_gpu/src/graph/shape_of.cpp +++ b/src/plugins/intel_gpu/src/graph/shape_of.cpp @@ -47,6 +47,18 @@ std::vector shape_of_inst::calc_output_layouts(shape_of_node const& /*no template std::vector shape_of_inst::calc_output_layouts(shape_of_node const& node, const kernel_impl_params& impl_param); +std::vector shape_of_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) { + ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape(); + ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1)); + return ps.to_shape(); +} + +std::vector shape_of_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) { + ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape(); + ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1)); + return ps.to_shape(); +} + std::string shape_of_inst::to_string(shape_of_node const& node) { auto node_info = node.desc_to_json(); auto desc = node.get_primitive(); diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp index c83c2e0e3ba..18774d79a09 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/broadcast/broadcast_kernel_base.cpp @@ -33,26 +33,45 @@ BroadcastKernelBase::DispatchData BroadcastKernelBase::SetDefault(const broadcas static std::string GetInputBlockND(const broadcast_params& params) { const auto& input = params.inputs[0]; - auto input_dims = input.LogicalDims(); - std::reverse(input_dims.begin(), input_dims.end()); - const int rank = static_cast(input_dims.size()); - std::vector block_nd(rank + 1); - std::vector block_nd_s(rank + 1); - block_nd[rank] = 1; - block_nd_s[rank] = "1"; - for (int idx = (rank - 1); idx >= 0; idx--) { - block_nd[idx] = input_dims[idx] * block_nd[idx + 1]; - block_nd_s[idx] = "(" + toCodeString(input.GetDims()[idx], rank - idx) + " * " + block_nd_s[idx + 1] + ")"; - } std::stringstream s; - for (int i = 0; i < (rank + 1); i++) { - if (i < rank) { - s << (input.is_dynamic() ? block_nd_s[i] : std::to_string(block_nd[i])) << ","; - } else { - s << (input.is_dynamic() ? block_nd_s[i] : std::to_string(block_nd[i])); + auto input_dims = input.LogicalDims(); + std::reverse(input_dims.begin(), input_dims.end()); + + if (input.is_dynamic()) { + const int rank = static_cast(input_dims.size()); + std::vector block_nd_s(rank + 1); + block_nd_s[rank] = "1"; + for (int idx = (rank - 1); idx >= 0; idx--) { + int shape_info_idx = idx; + if (idx >= 2) { + shape_info_idx += (6 - rank); + } + block_nd_s[idx] = "(" + toCodeString(input.GetDims()[rank - idx - 1], shape_info_idx) + " * " + block_nd_s[idx + 1] + ")"; + } + + for (int i = 0; i < (rank + 1); i++) { + s << block_nd_s[i]; + if (i < rank) { + s << ","; + } + } + } else { + const int rank = static_cast(input_dims.size()); + std::vector block_nd(rank + 1); + block_nd[rank] = 1; + for (int idx = (rank - 1); idx >= 0; idx--) { + block_nd[idx] = input_dims[idx] * block_nd[idx + 1]; + } + + for (int i = 0; i < (rank + 1); i++) { + s << std::to_string(block_nd[i]); + if (i < rank) { + s << ","; + } } } + auto str_result = s.str(); return str_result; } diff --git a/src/plugins/intel_gpu/src/plugin/ops/broadcast.cpp b/src/plugins/intel_gpu/src/plugin/ops/broadcast.cpp index 0bd0e0e591b..2944de98491 100644 --- a/src/plugins/intel_gpu/src/plugin/ops/broadcast.cpp +++ b/src/plugins/intel_gpu/src/plugin/ops/broadcast.cpp @@ -50,23 +50,6 @@ static void CreateCommonBroadcastOp(Program& p, const std::shared_ptr(axis); - - int ones_count = std::max(next_axis - prev_axis - 1, 0); - tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul); - tmp_shape.push_back(outputShape[axis]); - - currentRank += ones_count + 1; - } - inputShape = tmp_shape; } auto targetShape = tensor_from_dims(inputShape); diff --git a/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/broadcast.cpp b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/broadcast.cpp new file mode 100644 index 00000000000..7df131bbb29 --- /dev/null +++ b/src/tests/functional/plugin/gpu/single_layer_tests/dynamic/broadcast.cpp @@ -0,0 +1,413 @@ +// Copyright (C) 2018-2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shared_test_classes/single_layer/broadcast.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "ie_precision.hpp" +#include "ngraph_functions/builders.hpp" +#include +#include + +using namespace ngraph; +using namespace InferenceEngine; +using namespace ov::test; + +namespace GPULayerTestsDefinitions { + +typedef std::tuple< + std::vector, // Shapes + std::vector, // Target shapes + std::vector, // Axes mapping + ov::op::BroadcastType, // Broadcast mode + ov::element::Type_t, // Network precision + std::vector, // Const inputs + std::string // Device name +> BroadcastLayerTestParamsSet; + +class BroadcastLayerGPUTest : public testing::WithParamInterface, + virtual public SubgraphBaseTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + std::vector inputShapes; + std::vector targetShapes, axesMapping; + ov::op::BroadcastType mode; + ov::element::Type_t netPrecision; + std::vector isConstInputs; + std::string deviceName; + std::tie(inputShapes, targetShapes, axesMapping, mode, netPrecision, isConstInputs, deviceName) = obj.param; + + std::ostringstream result; + result << "IS=("; + for (const auto& shape : inputShapes) { + result << CommonTestUtils::partialShape2str({shape.first}) << "_"; + } + result << ")_TS=("; + for (const auto& shape : inputShapes) { + for (const auto& item : shape.second) { + result << CommonTestUtils::vec2str(item) << "_"; + } + } + result << "targetShape=" << CommonTestUtils::vec2str(targetShapes) << "_"; + result << "axesMapping=" << CommonTestUtils::vec2str(axesMapping) << "_"; + result << "mode=" << mode << "_"; + result << "netPrec=" << netPrecision << "_"; + result << "constIn=(" << (isConstInputs[0] ? "True" : "False") << "." << (isConstInputs[1] ? "True" : "False") << ")_"; + result << "trgDevice=" << deviceName; + + return result.str(); + } + +protected: + std::vector targetShape, axesMapping; + + void SetUp() override { + std::vector inputShapes; + ov::op::BroadcastType mode; + ov::element::Type_t netPrecision; + std::vector isConstInput; + std::tie(inputShapes, targetShape, axesMapping, mode, netPrecision, isConstInput, targetDevice) = this->GetParam(); + + bool isTargetShapeConst = isConstInput[0]; + bool isAxesMapConst = isConstInput[1]; + + const auto targetShapeRank = targetShape.size(); + const auto axesMappingRank = axesMapping.size(); + + if (inputShapes.front().first.rank() != 0) { + inputDynamicShapes.push_back(inputShapes.front().first); + if (!isTargetShapeConst) { + inputDynamicShapes.push_back({ static_cast(targetShape.size()) }); + } + if (!isAxesMapConst) { + inputDynamicShapes.push_back({ static_cast(axesMapping.size()) }); + } + } + const size_t targetStaticShapeSize = inputShapes.front().second.size(); + targetStaticShapes.resize(targetStaticShapeSize); + for (size_t i = 0lu; i < targetStaticShapeSize; ++i) { + targetStaticShapes[i].push_back(inputShapes.front().second[i]); + if (!isTargetShapeConst) + targetStaticShapes[i].push_back({ targetShape.size() }); + if (!isAxesMapConst) + targetStaticShapes[i].push_back({ axesMapping.size() }); + } + + ov::ParameterVector functionParams; + if (inputDynamicShapes.empty()) { + functionParams.push_back(std::make_shared(netPrecision, targetStaticShapes.front().front())); + } else { + functionParams.push_back(std::make_shared(netPrecision, inputDynamicShapes.front())); + if (!isTargetShapeConst) { + functionParams.push_back(std::make_shared(ov::element::i64, inputDynamicShapes[1])); + functionParams.back()->set_friendly_name("targetShape"); + } + if (!isAxesMapConst) { + functionParams.push_back(std::make_shared(ov::element::i64, inputDynamicShapes.back())); + functionParams.back()->set_friendly_name("axesMapping"); + } + } + functionParams.front()->set_friendly_name("data"); + + auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes(functionParams)); + + std::shared_ptr broadcastOp; + if (mode == ov::op::BroadcastType::EXPLICIT) { + std::shared_ptr targetShapeOp; + std::shared_ptr axesMappingOp; + if (isTargetShapeConst) { + targetShapeOp = ov::op::v0::Constant::create(ov::element::i64, {targetShapeRank}, targetShape); + } else { + targetShapeOp = functionParams[1]; + } + if (isAxesMapConst) { + axesMappingOp = ov::op::v0::Constant::create(ov::element::i64, {axesMappingRank}, axesMapping); + } else { + axesMappingOp = functionParams.size() > 2 ? functionParams[2] : functionParams[1]; + } + broadcastOp = std::make_shared(paramOuts[0], + targetShapeOp, + axesMappingOp, + mode); + } else if (mode == ov::op::BroadcastType::NUMPY) { + if (isTargetShapeConst) { + auto targetShapeConst = ov::op::v0::Constant::create(ov::element::i64, {targetShapeRank}, targetShape); + broadcastOp = std::make_shared(paramOuts[0], + targetShapeConst, + mode); + } else { + broadcastOp = std::make_shared(paramOuts[0], + paramOuts[1], + mode); + } + } + + auto makeFunction = [](ParameterVector ¶ms, const std::shared_ptr &lastNode) { + ResultVector results; + + for (int i = 0; i < lastNode->get_output_size(); i++) + results.push_back(std::make_shared(lastNode->output(i))); + + return std::make_shared(results, params, "BroadcastLayerGPUTest"); + }; + + function = makeFunction(functionParams, broadcastOp); + } + + void generate_inputs(const std::vector& targetInputStaticShapes) override { + inputs.clear(); + const auto& funcInputs = function->inputs(); + for (size_t i = 0lu; i < funcInputs.size(); i++) { + const auto& funcInput = funcInputs[i]; + ov::Tensor tensor; + if (funcInput.get_node()->get_friendly_name() == "targetShape") { + tensor = ov::Tensor{ov::element::i64, targetInputStaticShapes[i]}; + auto data = tensor.data::value_type>(); + for (size_t i = 0lu; i < targetShape.size(); i++) { + data[i] = targetShape[i]; + } + } else if (funcInput.get_node()->get_friendly_name() == "axesMapping") { + tensor = ov::Tensor{ov::element::i64, targetInputStaticShapes[i]}; + auto data = tensor.data::value_type>(); + for (size_t i = 0lu; i < axesMapping.size(); i++) { + data[i] = axesMapping[i]; + } + } else { + if (funcInput.get_element_type().is_real()) { + tensor = ov::test::utils::create_and_fill_tensor( + funcInput.get_element_type(), targetInputStaticShapes[i], 10, 0, 1000); + } else { + tensor = ov::test::utils::create_and_fill_tensor(funcInput.get_element_type(), targetInputStaticShapes[i]); + } + } + inputs.insert({funcInput.get_node_shared_ptr(), tensor}); + } + } +}; + +TEST_P(BroadcastLayerGPUTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + run(); +} + +namespace { + +const std::vector inputPrecisionsFloat = { + ov::element::f32, +}; + +const std::vector inputPrecisionsInt = { + ov::element::i32, +}; + +const std::vector> inputConstants = { + {true, true}, + {false, true}, +#if 0 // axes map input doesn't supported parameter input + {true, false}, + {false, false}, +#endif +}; + +// ============================================================================== +// 1D +const std::vector> dynamicInputShapes1D_explicit = { + { + { {-1}, {{7}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_1d_explicit_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes1D_explicit), + ::testing::ValuesIn(std::vector>{{4, 7, 3}, {2, 7, 4, 3, 6}}), + ::testing::Values(std::vector{1}), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(inputPrecisionsFloat), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +const std::vector> dynamicInputShapes1D = { + { + { {-1}, {{1}, {7}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_1d_numpy_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes1D), + ::testing::ValuesIn(std::vector>{{7}, {2, 4, 7}, {2, 3, 4, 7}}), + ::testing::Values(std::vector{}), + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(inputPrecisionsInt), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +// ============================================================================== +// 2D +const std::vector> dynamicInputShapes2D_explicit = { + { + { {-1, -1}, {{3, 5}} } + } +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_2d_explicit_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes2D_explicit), + ::testing::ValuesIn(std::vector>{{3, 4, 5}, {3, 6, 5, 7}}), + ::testing::Values(std::vector{0, 2}), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(inputPrecisionsInt), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +const std::vector> dynamicInputShapes2D = { + { + { {-1, -1}, {{3, 1}, {3, 5}} } + } +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_2d_numpy_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes2D), + ::testing::ValuesIn(std::vector>{{3, 5}, {2, 3, 5}}), + ::testing::Values(std::vector{}), + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(inputPrecisionsFloat), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +// ============================================================================== +// 3D +const std::vector> dynamicInputShapes3D_explicit = { + { + { {-1, -1, -1}, {{4, 5, 6}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_3d_explicit_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes3D_explicit), + ::testing::ValuesIn(std::vector>{{4, 5, 6}, {4, 5, 6, 2, 3}}), + ::testing::Values(std::vector{0, 1, 2}), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(inputPrecisionsFloat), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +const std::vector> dynamicInputShapes3D = { + { + { {-1, -1, -1}, {{4, 5, 1}, {1, 5, 1}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_3d_numpy_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes3D), + ::testing::ValuesIn(std::vector>{{4, 5, 6}, {2, 4, 5, 1}}), + ::testing::Values(std::vector{}), + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(inputPrecisionsInt), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +// ============================================================================== +// 4D +const std::vector> dynamicInputShapes4D_explicit = { + { + { {-1, -1, -1, -1}, {{1, 16, 1, 7}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_4d_explicit_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes4D_explicit), + ::testing::ValuesIn(std::vector>{{1, 16, 2, 1, 7}, {1, 16, 2, 1, 7, 3}}), + ::testing::Values(std::vector{0, 1, 3, 4}), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(inputPrecisionsInt), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +const std::vector> dynamicInputShapes4D = { + { + { {-1, -1, -1, -1}, {{2, 1, 1, 3}, {1, 4, 1, 3}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_4d_numpy_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes4D), + ::testing::ValuesIn(std::vector>{{2, 4, 1, 3}, {3, 2, 2, 4, 1, 3}}), + ::testing::Values(std::vector{}), + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(inputPrecisionsFloat), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +// ============================================================================== +// 5D +const std::vector> dynamicInputShapes5D_explicit = { + { + { {-1, -1, -1, -1, -1}, {{2, 3, 4, 5, 6}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_5d_explicit_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes5D_explicit), + ::testing::ValuesIn(std::vector>{{2, 3, 4, 5, 6}}), + ::testing::Values(std::vector{0, 1, 2, 3, 4}), + ::testing::Values(ov::op::BroadcastType::EXPLICIT), + ::testing::ValuesIn(inputPrecisionsInt), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +const std::vector> dynamicInputShapes5D = { + { + { {-1, -1, -1, -1, -1}, {{8, 1, 1, 7, 1}, {8, 4, 1, 7, 3}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_5d_numpy_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes5D), + ::testing::ValuesIn(std::vector>{{8, 4, 1, 7, 3}, {8, 4, 5, 7, 3}}), + ::testing::Values(std::vector{}), + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(inputPrecisionsFloat), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); +// ============================================================================== +// 6D +const std::vector> dynamicInputShapes6D = { + { + { {-1, -1, -1, -1, -1, -1}, {{8, 1, 1, 7, 1, 3}, {8, 4, 1, 7, 16, 3}} } + }, +}; +INSTANTIATE_TEST_CASE_P(smoke_broadcast_6d_numpy_compareWithRefs_dynamic, + BroadcastLayerGPUTest, + ::testing::Combine( + ::testing::ValuesIn(dynamicInputShapes6D), + ::testing::ValuesIn(std::vector>{{8, 4, 1, 7, 16, 3}, {8, 4, 5, 7, 16, 3}}), + ::testing::Values(std::vector{}), + ::testing::Values(ov::op::BroadcastType::NUMPY), + ::testing::ValuesIn(inputPrecisionsInt), + ::testing::ValuesIn(inputConstants), + ::testing::Values(CommonTestUtils::DEVICE_GPU)), + BroadcastLayerGPUTest::getTestCaseName); + +} // namespace + +} // namespace GPULayerTestsDefinitions