[GPU] Fix layout propagation logic (#17199)
This commit is contained in:
parent
14a14ecd76
commit
6ed85178d5
@ -33,6 +33,15 @@ layout dft_inst::calc_output_layout(const dft_node& node, const kernel_impl_para
|
||||
std::string dft_inst::to_string(const dft_node& node) {
|
||||
auto desc = node.get_primitive();
|
||||
auto node_info = node.desc_to_json();
|
||||
json_composite dft_info;
|
||||
dft_info.add("axes", desc->axes);
|
||||
dft_info.add("signal_size", desc->signal_size);
|
||||
dft_info.add("output_shape", desc->output_shape);
|
||||
dft_info.add("direction", desc->direction == dft_direction::forward ? "forward" : "inverse");
|
||||
dft_info.add("mode", desc->mode == dft_mode::real ? "real" : "complex");
|
||||
|
||||
node_info->add("dft info", dft_info);
|
||||
|
||||
std::ostringstream os;
|
||||
node_info->dump(os);
|
||||
return os.str();
|
||||
|
@ -512,7 +512,9 @@ void minimize_local_reorders(program& p, std::map<program_node*, format::type>&
|
||||
continue;
|
||||
|
||||
for (auto new_fmt : local_formats) {
|
||||
if (fmt_map.at(node) != format::any && format::dimension(fmt_map.at(node)) != format::dimension(new_fmt))
|
||||
// Avoid setting of formats which will require transform from higher rank to smaller one which requires dimension squeeze
|
||||
// TODO: Needs to be updated once we improve layout assignment logic
|
||||
if (fmt_map.at(node) != format::any && format::dimension(fmt_map.at(node)) > format::dimension(new_fmt))
|
||||
continue;
|
||||
fmt_map.at(node) = new_fmt;
|
||||
|
||||
|
@ -5,11 +5,14 @@
|
||||
#include "test_utils.h"
|
||||
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
|
||||
#include "intel_gpu/graph/program.hpp"
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
|
||||
#include "data_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
#include "dft_inst.h"
|
||||
#include "gather_inst.h"
|
||||
#include "border_inst.h"
|
||||
#include "pass_manager.h"
|
||||
#include "to_string_utils.h"
|
||||
|
||||
@ -69,6 +72,89 @@ TEST(reorder_inputs, propagation) {
|
||||
ASSERT_EQ(pool_node.get_output_layout().format.value, conv_pref);
|
||||
}
|
||||
|
||||
TEST(reorder_inputs, mixed_ranks_irdft) {
|
||||
// Topology:
|
||||
// transpose -> (5d) -> irdft -> (4d) -> eltwise
|
||||
// Expected: (bfzyx) -> irdft -> (bfyx)
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", layout{ { 1, 120, 2, 64, 33 }, data_types::f16, format::bfzyx }));
|
||||
topology.add(input_layout("eltw_input", layout{ { 1, 120, 64, 64 }, data_types::f16, format::bfyx }));
|
||||
topology.add(permute("permute", input_info("input"), { 0, 1, 3, 4, 2 }));
|
||||
topology.add(dft("dft", input_info("permute"), {2, 3}, {64, 64}, {1, 120, 64, 64}, dft_direction::inverse, dft_mode::real));
|
||||
topology.add(eltwise("eltwise", input_info("dft"), input_info("eltw_input"), eltwise_mode::sum));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
|
||||
program::ptr prog = nullptr;
|
||||
ASSERT_NO_THROW(prog = program::build_program(engine, topology, config));
|
||||
ASSERT_NE(prog, nullptr);
|
||||
|
||||
auto prog_impl = prog.get();
|
||||
|
||||
auto& dft_node = prog_impl->get_node("dft");
|
||||
|
||||
ASSERT_EQ(dft_node.get_input_layouts()[0].format, format::bfzyx);
|
||||
ASSERT_EQ(dft_node.get_output_layout().format, format::bfyx);
|
||||
}
|
||||
|
||||
TEST(reorder_inputs, mixed_ranks_gather) {
|
||||
// Topology:
|
||||
// (4d) -> conv -> (4d) -> border -> (4d) -> gather -> (5d) -> gather -> (6d) -> permute (6d)
|
||||
// In case when preferred format for convolution is selected as byxf (in the test it's enforced)
|
||||
// it could be propagated to border and gathers, but dimensions are handled incorrectly
|
||||
// and the second gather may complain that axis >= rank
|
||||
// So here we expect that input format for gather is aligned with actual output rank and format
|
||||
|
||||
auto& engine = get_test_engine();
|
||||
auto data1_mem = engine.allocate_memory(layout{ { 3, 128, 1, 1 }, data_types::i32, format::bfyx });
|
||||
auto data2_mem = engine.allocate_memory(layout{ { 3, 55, 1, 1 }, data_types::i32, format::bfyx });
|
||||
auto weights_mem = engine.allocate_memory(layout{ { 2, 256, 3, 3 }, data_types::f16, format::bfyx });
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", layout{ { 1, 256, 128, 55 }, data_types::f16, format::bfyx }));
|
||||
topology.add(data("weights", weights_mem));
|
||||
topology.add(data("data1", data1_mem));
|
||||
topology.add(data("data2", data2_mem));
|
||||
topology.add(convolution("conv",
|
||||
input_info("input"),
|
||||
{ "weights" },
|
||||
1,
|
||||
ov::Strides{1, 1},
|
||||
ov::CoordinateDiff{0, 0},
|
||||
ov::Strides{1, 1},
|
||||
ov::CoordinateDiff{0, 0},
|
||||
ov::CoordinateDiff{0, 0}));
|
||||
topology.add(border("pad", { input_info("conv") }, 0, ov::CoordinateDiff{0, 0, 1, 1}, ov::CoordinateDiff{0, 0, 1, 1}));
|
||||
topology.add(gather("gather1", input_info("pad"), input_info("data1"), 2, { 1, 2, 3, 128, 57 }, 0, false));
|
||||
topology.add(gather("gather2", input_info("gather1"), input_info("data2"), 4, { 1, 2, 3, 128, 3, 55 }, 0, false));
|
||||
topology.add(permute("permute", input_info("gather2"), {0, 1, 2, 4, 3, 5}));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
ov::intel_gpu::ImplementationDesc conv_impl = { format::byxf, "" };
|
||||
ov::intel_gpu::ImplementationDesc permute_impl = { format::bfwzyx, "" };
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"conv", conv_impl}, { "permute", permute_impl} }));
|
||||
|
||||
program::ptr prog = nullptr;
|
||||
prog = program::build_program(engine, topology, config);
|
||||
ASSERT_NE(prog, nullptr);
|
||||
|
||||
auto prog_impl = prog.get();
|
||||
|
||||
auto& gather1_node = prog_impl->get_node("gather1");
|
||||
auto& gather2_node = prog_impl->get_node("gather2");
|
||||
|
||||
ASSERT_EQ(gather1_node.get_input_layouts()[0].format, format::bfzyx);
|
||||
ASSERT_EQ(gather1_node.get_output_layout().format, format::bfzyx);
|
||||
|
||||
ASSERT_EQ(gather2_node.get_input_layouts()[0].format, format::bfwzyx);
|
||||
ASSERT_EQ(gather2_node.get_output_layout().format, format::bfwzyx);
|
||||
}
|
||||
|
||||
TEST(reorder_inputs, impl_forcing_basic_format) {
|
||||
auto& engine = get_test_engine();
|
||||
auto input = engine.allocate_memory({ data_types::f32, format::bfyx, { 1, 2, 4, 1 } });
|
||||
|
Loading…
Reference in New Issue
Block a user