[GPU] Fixed shape infer dep for blocked format (#14646)

* Fixed shape infer dep for blocked format

* Set preferred format for shape_of to be made from input rank
- ShapeOf should get the original output layout of the previous node, which is not reorderd.
This commit is contained in:
Taylor Yeonbok Lee 2022-12-18 20:03:23 -08:00 committed by GitHub
parent a7d0b7b010
commit 05159b8cfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 1 deletions

View File

@ -251,6 +251,9 @@ void remove_redundant_reorders::run(program& p) {
if (!node->is_type<reorder>()) // only care for reorders
continue;
if (node->is_dynamic())
continue;
auto& r_node = node->as<reorder>();
bool no_output_optimization = remove_output_reorders ?

View File

@ -452,7 +452,6 @@ void minimize_local_reorders(program& p, std::map<program_node*, format::type>&
for (auto node : p.get_processing_order()) {
if (!node->is_in_data_flow())
continue;
auto preferred_format = lo.get_preferred_format(*node);
if (preferred_format != format::any) {

View File

@ -12,6 +12,7 @@
#include "resample_inst.h"
#include "reshape_inst.h"
#include "arg_max_min_inst.h"
#include "shape_of_inst.h"
#include "generic_layer.hpp"
#include <sstream>
@ -1653,6 +1654,22 @@ format layout_optimizer::get_preferred_format(program_node& node) {
auto output_layout = node.get_output_layout();
bool use_onednn_impls = _optimization_attributes.use_onednn_impls;
bool allow_new_shape_infer = node.get_program().get_options().get<build_option_type::allow_new_shape_infer>()->enabled();
if (allow_new_shape_infer) {
if (node.is_type<shape_of>())
return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank());
for (auto u : node.get_users()) {
for (auto dep_idx : u->get_shape_infer_dependencies()) {
if (u->get_dependencies().size() <= dep_idx)
continue;
if (u->get_dependency(dep_idx).get_unique_id() == node.get_unique_id()) {
expected = format::get_default_format(output_layout.get_rank(), false, false);
return expected;
}
}
}
}
if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
expected = _forcing_map.at(node.id()).first;
} else if (node.is_type<convolution>()) {

View File

@ -6,6 +6,7 @@
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/broadcast.hpp>
#include <intel_gpu/primitives/eltwise.hpp>
#include <intel_gpu/primitives/data.hpp>
#include "broadcast_inst.h"
@ -81,6 +82,87 @@ INSTANTIATE_TEST_SUITE_P(smoke, broadcast_test_two_inputs,
}
}));
class broadcast_test_two_inputs_blocked_format : public testing::TestWithParam<broadcast_test_params> { };
TEST_P(broadcast_test_two_inputs_blocked_format, shape_infer) {
auto p = GetParam();
auto& engine = get_test_engine();
auto data_mem = engine.allocate_memory(p.data_layout);
auto in1_mem = engine.allocate_memory(p.target_shape_layout);
auto in2_mem = engine.allocate_memory(p.target_shape_layout);
// data ------------|
// shape1 (blocked)- eltwise (plain)-- broadcast
// shape2 (blocked) /
// Expectation: eltwise's result is to be used as shape_mem of broadcast, and it should be plain format
topology topology;
topology.add(input_layout("data", layout{ov::PartialShape::dynamic(p.data_layout.get_rank()), p.data_layout.data_type, p.data_layout.format}),
input_layout("shape_input_1", layout{ov::PartialShape::dynamic(p.target_shape_layout.get_rank()), p.target_shape_layout.data_type, p.target_shape_layout.format}),
input_layout("shape_input_2", layout{ov::PartialShape::dynamic(p.target_shape_layout.get_rank()), p.target_shape_layout.data_type, p.target_shape_layout.format}),
eltwise("target_shape", input_info("shape_input_1"), input_info("shape_input_2"), eltwise_mode::sum, ov::op::AutoBroadcastType::NUMPY),
broadcast("output", input_info("data"), input_info("target_shape"), p.axes_mapping_data, p.mode)
);
build_options options;
options.set_option(build_option::optimize_data(true));
options.set_option(build_option::allow_new_shape_infer(true));
std::vector<int32_t> input_data(p.data_layout.get_linear_size(), 1);
network network(engine, topology, options);
set_values(data_mem, input_data);
set_values(in1_mem, p.target_shape_data);
set_values(in2_mem, p.target_shape_data);
network.set_input_data("data", data_mem);
network.set_input_data("shape_input_1", in1_mem);
network.set_input_data("shape_input_2", in2_mem);
auto outputs = network.execute();
auto output = outputs.at("output").get_memory();
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
ASSERT_EQ(output->get_layout(), p.expected_layout);
}
INSTANTIATE_TEST_SUITE_P(smoke, broadcast_test_two_inputs_blocked_format,
testing::ValuesIn(std::vector<broadcast_test_params>{
{
layout{ov::PartialShape{8}, data_types::i32, format::b_fs_yx_fsv16}, //data layout
layout{ov::PartialShape{4}, data_types::i64, format::b_fs_yx_fsv16},
{4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0}, ov::op::BroadcastType::EXPLICIT,
layout{ov::PartialShape{8, 64, 22, 16}, data_types::i32, format::b_fs_yx_fsv16}
},
{
layout{ov::PartialShape{16, 1, 1, 1}, data_types::i32, format::b_fs_yx_fsv16}, //data layout
layout{ov::PartialShape{4}, data_types::i64, format::b_fs_yx_fsv16},
{8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{}, ov::op::BroadcastType::NUMPY,
layout{ov::PartialShape{16, 50, 24, 20}, data_types::i32, format::b_fs_yx_fsv16}
},
{
layout{ov::PartialShape{16}, data_types::i32, format::b_fs_zyx_fsv16}, //data layout
layout{ov::PartialShape{5}, data_types::i64, format::b_fs_zyx_fsv16},
{8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0}, ov::op::BroadcastType::EXPLICIT,
layout{ov::PartialShape{16, 2, 50, 24, 20}, data_types::i32, format::b_fs_zyx_fsv16}
}
}));
class broadcast_test_single_input : public testing::TestWithParam<broadcast_test_params> { };
TEST_P(broadcast_test_single_input, shape_infer) {