[GPU] Fix reshape split for dynamic models + accuracy fix for SAM (#16911)
This commit is contained in:
committed by
GitHub
parent
9247906879
commit
f736c71feb
@@ -149,6 +149,11 @@ void handle_reshape::run(program& p) {
|
||||
auto new_reshape = std::make_shared<reshape>("reorder:_reshape_split_" + user->id() + "_" + node->id(),
|
||||
input_node.id(),
|
||||
output_shape);
|
||||
new_reshape->special_zero = prim->special_zero;
|
||||
new_reshape->output_partial_shape = prim->output_partial_shape;
|
||||
new_reshape->output_pattern = prim->output_pattern;
|
||||
new_reshape->mode = prim->mode;
|
||||
new_reshape->input = prim->input;
|
||||
auto& new_reshape_node = p.get_or_create(new_reshape);
|
||||
user->replace_dependency(0, input_node);
|
||||
p.add_intermediate(new_reshape_node, *user, 0);
|
||||
|
||||
@@ -23,16 +23,6 @@ namespace cldnn {
|
||||
|
||||
inline std::string bool_to_str(bool cond) { return cond ? "true" : "false"; }
|
||||
|
||||
inline std::string get_extr_type(const std::string& str) {
|
||||
auto begin = str.find('<');
|
||||
auto end = str.find('>');
|
||||
|
||||
if (begin == std::string::npos || end == std::string::npos)
|
||||
return {};
|
||||
|
||||
return str.substr(begin + 1, (end - begin) - 1);
|
||||
}
|
||||
|
||||
inline std::string dt_to_str(data_types dt) {
|
||||
return data_type_traits::name(dt);
|
||||
}
|
||||
|
||||
@@ -846,7 +846,7 @@ memory::ptr primitive_inst::allocate_output(engine& _engine, memory_pool& pool,
|
||||
allocation_type type, bool reusable, bool reset = true) {
|
||||
OPENVINO_ASSERT(!layout.is_dynamic() || layout.has_upper_bound(), "[GPU] Can't allocate output for dynamic layout without upper bound");
|
||||
// Use layout with max tensor for dynamic shape with upper bound
|
||||
auto static_layout = cldnn::layout(layout.data_type, layout.format, layout.get_tensor(), layout.data_padding);
|
||||
auto static_layout = cldnn::layout(layout.get_partial_shape().get_max_shape(), layout.data_type, layout.format, layout.data_padding);
|
||||
if (_node.get_program().get_config().get_property(ov::intel_gpu::enable_memory_pool))
|
||||
return pool.get_memory(static_layout, id, net_id, dependencies, type, reusable, reset);
|
||||
return pool.get_memory(static_layout, type, reset);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "to_string_utils.h"
|
||||
#include "data_inst.h"
|
||||
#include "condition_inst.h"
|
||||
#include "data_inst.h"
|
||||
#include "json_object.h"
|
||||
|
||||
#include <algorithm>
|
||||
@@ -170,40 +171,18 @@ void dump_graph_init(std::ofstream& graph,
|
||||
const program& program,
|
||||
std::function<bool(program_node const&)> const& filter) {
|
||||
const std::string invalid_layout_msg = "(invalid layout)";
|
||||
const auto extr_oformat = [&invalid_layout_msg](const program_node* ptr) {
|
||||
if (!ptr->is_valid_output_layout())
|
||||
return invalid_layout_msg;
|
||||
|
||||
auto output_layout = ptr->get_output_layout();
|
||||
std::string out = output_layout.format.to_string();
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
const auto extr_odt = [&invalid_layout_msg](const program_node* ptr) {
|
||||
if (!ptr->is_valid_output_layout())
|
||||
return invalid_layout_msg;
|
||||
|
||||
auto output_layout = ptr->get_output_layout();
|
||||
std::string out = dt_to_str(output_layout.data_type);
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
const auto dump_mem_info = [&invalid_layout_msg](const program_node* ptr) {
|
||||
std::string out = "size_info: ";
|
||||
std::string out = "layout_info: ";
|
||||
if (!ptr->is_valid_output_layout()) {
|
||||
return out + invalid_layout_msg;
|
||||
}
|
||||
|
||||
auto out_layout = ptr->get_output_layout();
|
||||
auto tensor_str = out_layout.to_string();
|
||||
auto padding = out_layout.data_padding;
|
||||
out += tensor_str;
|
||||
if (!padding) {
|
||||
out += " (nonpadded)";
|
||||
if (!out_layout.data_padding) {
|
||||
out += " " + out_layout.to_short_string();
|
||||
} else {
|
||||
out += "\nl: " + padding.lower_size().to_string() + "\nu: " + padding.upper_size().to_string();
|
||||
out += " " + out_layout.to_string();
|
||||
}
|
||||
|
||||
return out;
|
||||
@@ -218,23 +197,20 @@ void dump_graph_init(std::ofstream& graph,
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wpotentially-evaluated-expression"
|
||||
#endif
|
||||
auto& node_type = typeid(*node);
|
||||
std::string node_type_name = get_extr_type(node_type.name());
|
||||
graph << " " << get_node_id(node) << "[label=\"" << node->id() << ":\n"
|
||||
<< node_type_name << "\n out format: " + extr_oformat(node)
|
||||
<< "\n out data_type: " + extr_odt(node)
|
||||
std::string node_type_name = node->get_primitive()->type_string();
|
||||
graph << " " << get_node_id(node) << "[label=\"" << node->id() << ":"
|
||||
<< "\\ntype: " << node_type_name
|
||||
<< "\\nprocessing number: " << program.get_processing_order().get_processing_number(node)
|
||||
<< "\\n color:" << (node->is_reusing_memory() ? std::to_string(node->get_reused_memory_color()) : "none")
|
||||
<< (node->can_be_optimized() ? "\\n optimized out" : "");
|
||||
|
||||
if (node_type_name != "struct cldnn::data" && node_type_name != "struct cldnn::input_layout" &&
|
||||
!node->can_be_optimized()) {
|
||||
if (!node->is_type<data>()) {
|
||||
graph << "\\n Selected kernel: "
|
||||
<< (node->get_selected_impl() == nullptr ? "none"
|
||||
: node->get_selected_impl()->get_kernel_name()) + " / "
|
||||
<< node->get_preferred_impl_type()
|
||||
<< "\n" + dump_mem_info(node);
|
||||
<< node->get_preferred_impl_type();
|
||||
}
|
||||
graph << "\n" + dump_mem_info(node);
|
||||
graph << "\"";
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
@@ -30,7 +30,15 @@ JitConstants ReduceKernelBase::GetJitConstants(const reduce_params& params) cons
|
||||
|
||||
const auto& output = params.outputs[0];
|
||||
if (output.is_dynamic()) {
|
||||
size_t output_tensor_offset = 1 + GetFusedPrimitiveInputsCount(params);
|
||||
size_t output_tensor_offset = params.inputs[0].is_dynamic() ? 1 : 0;
|
||||
for (size_t i = 0; i < params.fused_ops.size(); i++) {
|
||||
auto& fused_op_inputs = params.fused_ops[i].tensors;
|
||||
|
||||
for (auto& t : fused_op_inputs) {
|
||||
if (t.is_dynamic())
|
||||
output_tensor_offset++;
|
||||
}
|
||||
}
|
||||
DimensionAccessHelper dims(output, output_tensor_offset);
|
||||
jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", toVectorMulString({dims.x,
|
||||
dims.y,
|
||||
|
||||
@@ -18,8 +18,8 @@ using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
struct reduce_test_params {
|
||||
cldnn::tensor in_shape;
|
||||
cldnn::tensor out_shape;
|
||||
ov::PartialShape in_shape;
|
||||
ov::PartialShape out_shape;
|
||||
cldnn::data_types data_type;
|
||||
cldnn::format input_format;
|
||||
data_types default_type;
|
||||
@@ -34,9 +34,12 @@ struct reduce_test_params {
|
||||
|
||||
class ReduceFusingTest : public ::BaseFusingTest<reduce_test_params> {
|
||||
public:
|
||||
void execute(reduce_test_params& p) {
|
||||
void execute(reduce_test_params& p, bool is_dynamic = false) {
|
||||
auto input_prim = get_mem(get_input_layout(p));
|
||||
|
||||
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
|
||||
cfg_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
|
||||
|
||||
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, cfg_fused);
|
||||
|
||||
@@ -52,70 +55,60 @@ public:
|
||||
if (axis >= static_cast<int64_t>(rank))
|
||||
throw std::runtime_error("Unsupported reduce test case");
|
||||
|
||||
switch (axis) {
|
||||
case 0: // batch
|
||||
p.out_shape.batch[0] = 1;
|
||||
break;
|
||||
case 1: // feature
|
||||
p.out_shape.feature[0] = 1;
|
||||
break;
|
||||
case 2:
|
||||
p.out_shape.spatial[rank - 3] = 1;
|
||||
break;
|
||||
case 3:
|
||||
p.out_shape.spatial[rank - 4] = 1;
|
||||
break;
|
||||
case 4:
|
||||
p.out_shape.spatial[rank - 5] = 1;
|
||||
break;
|
||||
case 5:
|
||||
p.out_shape.spatial[rank - 6] = 1;
|
||||
break;
|
||||
}
|
||||
p.out_shape[axis] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
layout get_dynamic_input_layout(reduce_test_params& p) {
|
||||
return layout{ ov::PartialShape::dynamic(p.in_shape.size()), p.data_type, p.input_format };
|
||||
}
|
||||
|
||||
layout get_input_layout(reduce_test_params& p) {
|
||||
return layout{ p.data_type, p.input_format, p.in_shape };
|
||||
return layout{ p.in_shape, p.data_type, p.input_format };
|
||||
}
|
||||
|
||||
layout get_output_layout(reduce_test_params& p) {
|
||||
return layout{ p.out_shape, p.data_type, p.input_format };
|
||||
}
|
||||
|
||||
layout get_per_channel_layout(reduce_test_params& p) {
|
||||
return layout{ p.default_type, p.default_format, tensor{ 1, p.in_shape.feature[0], 1, 1 } };
|
||||
return layout{ {1, p.in_shape[1], 1, 1}, p.default_type, p.default_format };
|
||||
}
|
||||
|
||||
};
|
||||
} // namespace
|
||||
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
/* ---------------------------------------- Reduce cases ----------------------------------------------- */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
#define CASE_REDUCE_F32_0 { 3, 7, 5, 7 }, { 3, 7, 5, 7 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_1 { 3, 7, 5, 7 }, { 3, 7, 5, 7 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_2 { 2, 4, 8, 4, 4 }, { 2, 4, 8, 4, 4 }, data_types::f32, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_3 { 16, 16, 16, 8, 8, 8 }, { 16, 16, 16, 8, 8, 8 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_0 { 3, 7, 7, 5 }, { 3, 7, 7, 5 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_1 { 3, 7, 7, 5 }, { 3, 7, 7, 5 }, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_2 { 2, 4, 4, 4, 8 }, { 2, 4, 4, 4, 8 }, data_types::f32, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_3 { 16, 16, 8, 8, 8, 16 }, { 16, 16, 8, 8, 8, 16 }, data_types::f32, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F32_4 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::f32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_REDUCE_F16_0 { 3, 7, 5, 7 }, { 3, 7, 5, 7 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_0 { 3, 7, 7, 5 }, { 3, 7, 7, 5 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_1 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::f16, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_2 { 2, 4, 8, 4, 4 }, { 2, 4, 8, 4, 4 }, data_types::f16, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_3 { 3, 5, 3, 5, 7, 7 }, { 3, 5, 3, 5, 7, 7 }, data_types::f16, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_2 { 2, 4, 4, 4, 8 }, { 2, 4, 4, 4, 8 }, data_types::f16, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_3 { 3, 5, 7, 7, 5, 3 }, { 3, 5, 7, 7, 5, 3 }, data_types::f16, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_F16_4 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::f16, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_REDUCE_I32_0 { 3, 7, 5, 7 }, { 3, 7, 5, 7 }, data_types::i32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_0 { 3, 7, 7, 5 }, { 3, 7, 7, 5 }, data_types::i32, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_1 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::i32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_2 { 2, 4, 8, 4, 4 }, { 2, 4, 8, 4, 4 }, data_types::i32, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_3 { 3, 5, 3, 5, 7, 7 }, { 3, 5, 3, 5, 7, 7 }, data_types::i32, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_4 { 3, 5, 3, 5, 7, 7 }, { 3, 5, 3, 5, 7, 7 }, data_types::i32, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_2 { 2, 4, 4, 4, 8 }, { 2, 4, 4, 4, 8 }, data_types::i32, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_3 { 3, 5, 7, 7, 5, 3 }, { 3, 5, 7, 7, 5, 3 }, data_types::i32, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I32_4 { 3, 5, 7, 7, 5, 3 }, { 3, 5, 7, 7, 5, 3 }, data_types::i32, format::bfwzyx, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_REDUCE_I8_0 { 3, 7, 5, 7 }, { 3, 7, 5, 7 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_0 { 3, 7, 7, 5 }, { 3, 7, 7, 5 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_1 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::i8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_2 { 2, 4, 8, 4, 4 }, { 2, 4, 8, 4, 4 }, data_types::i8, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_3 { 3, 5, 3, 5, 7, 7 }, { 3, 5, 3, 5, 7, 7 }, data_types::i8, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_2 { 2, 4, 4, 4, 8 }, { 2, 4, 4, 4, 8 }, data_types::i8, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_3 { 3, 5, 7, 7, 5, 3 }, { 3, 5, 7, 7, 5, 3 }, data_types::i8, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_I8_4 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::i8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_REDUCE_U8_0 { 3, 7, 5, 7 }, { 3, 7, 5, 7 },data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_0 { 3, 7, 7, 5 }, { 3, 7, 7, 5 },data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_1 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::u8, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_2 { 2, 4, 8, 4, 4 }, { 2, 4, 8, 4, 4 }, data_types::u8, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_3 { 3, 5, 3, 5, 7, 7 }, { 3, 5, 3, 5, 7, 7 }, data_types::u8, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_2 { 2, 4, 4, 4, 8 }, { 2, 4, 4, 4, 8 }, data_types::u8, format::bfzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_3 { 3, 5, 7, 7, 5, 3 }, { 3, 5, 7, 7, 5, 3 }, data_types::u8, format::bfwzyx, data_types::f32, format::bfyx
|
||||
#define CASE_REDUCE_U8_4 { 2, 8, 4, 4 }, { 2, 8, 4, 4 }, data_types::u8, format::b_fs_yx_fsv16, data_types::f32, format::bfyx
|
||||
|
||||
class reduce_eltwise_activation_quantize : public ReduceFusingTest {};
|
||||
@@ -272,6 +265,24 @@ TEST_P(reduce_scale_activation, per_channel) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(reduce_scale_activation, dynamic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_dynamic_input_layout(p)),
|
||||
data("scale_data", get_mem(get_per_channel_layout(p), -0.125f)),
|
||||
reduce("reduce", input_info("input"), p.reduce_mode, p.reduce_axes, p.keep_dims),
|
||||
eltwise("scale", { input_info("reduce"), input_info("scale_data") }, eltwise_mode::prod),
|
||||
activation("activation", input_info("scale"), activation_func::cos),
|
||||
reorder("output_reorder", input_info("activation"), p.default_format, data_types::f32)
|
||||
);
|
||||
// Activation won't be fused because onednn doesn't support cos activation
|
||||
if (engine.get_device_info().supports_immad)
|
||||
p.expected_fused_primitives++;
|
||||
|
||||
tolerance = 1e-02f;
|
||||
execute(p, true);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, reduce_scale_activation, ::testing::ValuesIn(std::vector<reduce_test_params>{
|
||||
reduce_test_params{ CASE_REDUCE_F32_0, 2, 4, reduce_mode::max, { 3, 2, 0 }, true, "reduce_gpu_b_fs_yx_fsv16" },
|
||||
reduce_test_params{ CASE_REDUCE_F32_1, 2, 4, reduce_mode::sum, { 3, 2, 0 }, true, "reduce_ref" },
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include "data_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "reshape_inst.h"
|
||||
#include "reorder_inst.h"
|
||||
#include "broadcast_inst.h"
|
||||
#include "pass_manager.h"
|
||||
#include "to_string_utils.h"
|
||||
|
||||
@@ -90,3 +92,104 @@ TEST(handle_reshape, skip_reorder_node_to_split_when_onndnn_not_support) {
|
||||
|
||||
ASSERT_TRUE(prog->get_node("matmul").get_dependency(0).get_output_layout().data_type == data_types::f16);
|
||||
}
|
||||
|
||||
TEST(handle_reshape, correct_parameters_propagation) {
|
||||
auto& engine = get_test_engine();
|
||||
auto data0_layout = engine.allocate_memory({ ov::PartialShape{}, data_types::f16, format::bfyx });
|
||||
auto data1_layout = engine.allocate_memory({ ov::PartialShape{1, 12}, data_types::f16, format::bfyx });
|
||||
auto in_layout = layout{ ov::PartialShape{1, 2, 3, 4}, data_types::f16, format::bfyx };
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", in_layout));
|
||||
topology.add(data("data0", data0_layout));
|
||||
topology.add(data("data1", data1_layout));
|
||||
topology.add(eltwise("e1", input_info("input"), input_info("data0"), eltwise_mode::sum));
|
||||
topology.add(reshape("reshape", input_info("e1"), false, {2, 12}, {2, 12}));
|
||||
topology.add(eltwise("e2", input_info("reshape"), input_info("data1"), eltwise_mode::sum));
|
||||
topology.add(reorder("reorder", input_info("reshape"), format::bfyx, data_types::f32));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, true);
|
||||
|
||||
layout_optimizer lo(true);
|
||||
|
||||
program_wrapper::apply_opt_pass<handle_reshape>(*prog);
|
||||
|
||||
ASSERT_NE(prog, nullptr);
|
||||
ASSERT_TRUE(has_node_with_type<reshape>(*prog));
|
||||
|
||||
ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
|
||||
|
||||
auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape();
|
||||
auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape();
|
||||
|
||||
ov::PartialShape expected_out_shape{2, 12};
|
||||
|
||||
// handle_reshape may do reshape split, so ensure that output shape on all branches is correct
|
||||
ASSERT_EQ(out_shape0, expected_out_shape);
|
||||
ASSERT_EQ(out_shape1, expected_out_shape);
|
||||
}
|
||||
|
||||
TEST(handle_reshape, reshape_input_reorder) {
|
||||
auto& engine = get_test_engine();
|
||||
auto shape_memory = engine.allocate_memory({ ov::PartialShape{5}, data_types::i32, format::bfyx });
|
||||
auto in0_layout = layout{ ov::PartialShape{1, -1, 16, 64, 64}, data_types::f16, format::bfzyx };
|
||||
auto in0_memory = engine.allocate_memory(layout{ ov::PartialShape{1, 2, 16, 64, 64}, data_types::f16, format::bfzyx });
|
||||
auto in1_layout = layout{ ov::PartialShape{-1, 16, 64, 64}, data_types::f16, format::bfyx };
|
||||
auto in1_memory = engine.allocate_memory({ ov::PartialShape{2, 16, 64, 64}, data_types::f16, format::bfyx });
|
||||
|
||||
auto in0 = generate_random_1d<FLOAT16>(in0_memory->count(), -10, 10);
|
||||
auto in1 = generate_random_1d<FLOAT16>(in1_memory->count(), -10, 10);
|
||||
set_values<FLOAT16>(in0_memory, in0);
|
||||
set_values<int32_t>(shape_memory, {1, 2, 16, 64, 64});
|
||||
set_values<FLOAT16>(in1_memory, in1);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input0", in0_layout));
|
||||
topology.add(input_layout("target_shape", shape_memory->get_layout()));
|
||||
topology.add(broadcast("broadcast", input_info("input0"), input_info("target_shape"), {}, ov::op::BroadcastType::BIDIRECTIONAL));
|
||||
topology.add(reshape("reshape", input_info("broadcast"), true, {-1, 16, 64, 64}, {-1, 16, 64, 64}));
|
||||
topology.add(input_layout("input1", in1_layout));
|
||||
topology.add(eltwise("eltw", input_info("reshape"), input_info("input1"), eltwise_mode::sum));
|
||||
topology.add(reorder("reorder", input_info("eltw"), format::bfyx, data_types::f32));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto prog = program::build_program(engine, topology, config);
|
||||
|
||||
ASSERT_NE(prog, nullptr);
|
||||
ASSERT_TRUE(has_node_with_type<reshape>(*prog));
|
||||
|
||||
ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
|
||||
auto reshape_layout_in = prog->get_node("reshape").get_input_layouts()[0];
|
||||
auto reshape_layout_out = prog->get_node("reshape").get_output_layout();
|
||||
|
||||
// At this moment transfomations insert reorder before reshape which
|
||||
// converts tensor to default format with rank = reshape_out_rank
|
||||
// Likely in the future we'll update that reorder so it will use reshape_input_rank
|
||||
// After that expected in format will be bfzyx
|
||||
ASSERT_EQ(reshape_layout_in.format, format::bfyx);
|
||||
ASSERT_EQ(reshape_layout_out.format, format::bfyx);
|
||||
|
||||
ov::PartialShape expected_out_shape{-1, 16, 64, 64};
|
||||
ASSERT_EQ(reshape_layout_out.get_partial_shape(), expected_out_shape);
|
||||
|
||||
network net(prog);
|
||||
|
||||
net.set_input_data("input0", in0_memory);
|
||||
net.set_input_data("input1", in1_memory);
|
||||
net.set_input_data("target_shape", shape_memory);
|
||||
auto output = net.execute();
|
||||
|
||||
auto out_mem = output.at("reorder").get_memory();
|
||||
mem_lock<float> lock(out_mem, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < out_mem->count(); i++) {
|
||||
float expected = static_cast<float>(in0[i]) + static_cast<float>(in1[i]);
|
||||
float actual = lock[i];
|
||||
ASSERT_EQ(expected, actual) << " i = " << i;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user