[GPU] Fix bug in shape agnosic kernel for fused cases (#17533)

* Fix bug in shape agnosic kernel for fused cases

* Fix gather unittest to use ov::Shape as input

* Minor change (just changed line locatiions)
This commit is contained in:
Taylor Yeonbok Lee
2023-05-16 13:23:28 -07:00
committed by GitHub
parent 75fa11c80f
commit 87a39fb007
2 changed files with 86 additions and 21 deletions

View File

@@ -605,7 +605,7 @@ struct dep_info {
// method in binary_convolution_kernel_generic.cpp.
struct fused_operation_desc {
std::shared_ptr<fuse_params> op_params;
size_t dep_idx_start;
int32_t dep_idx_start;
size_t dep_size;
MultiDataTensor tensors;
DataTensor output_tensor;
@@ -622,6 +622,9 @@ struct fused_operation_desc {
return p;
}
bool has_outer_dep() {
return dep_idx_start != -1;
}
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -668,6 +671,16 @@ struct base_params : public Params {
}
}
}
for (auto& fd : fused_ops) {
if (!fd.has_outer_dep())
continue;
auto& fused_op_inputs = fd.tensors;
for (auto& fused_input : fused_op_inputs) {
fused_input.SetDynamicShapeOffset(offset);
if (fused_input.is_dynamic())
offset += DataTensor::max_rank();
}
}
for (auto& out : outputs) {
out.SetDynamicShapeOffset(offset);
if (out.is_dynamic()) {

View File

@@ -18,8 +18,8 @@ using namespace ::tests;
namespace {
struct gather_test_params {
tensor dictionary_shape;
tensor indices_shape;
ov::Shape dictionary_shape;
ov::Shape indices_shape;
ov::Shape out_shape;
int64_t axis;
data_types data_type;
@@ -32,22 +32,37 @@ struct gather_test_params {
class GatherPrimitiveFusingTest : public ::BaseFusingTest<gather_test_params> {
public:
void execute(gather_test_params& p) {
void execute(gather_test_params& p, bool is_dynamic = false) {
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
cfg_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
cfg_fused.set_property(ov::intel_gpu::optimize_data(true));
auto input_prim = get_mem(get_input_layout(p));
auto indices_prim = get_mem(get_indices_layout(p), 0, static_cast<int>(get_axis_dim(p) - 1));
auto elt_input_prim = get_mem(get_per_channel_layout(p), -10, 10);
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
network network_fused(this->engine, this->topology_fused, cfg_fused);
network_fused.set_input_data("input", input_prim);
network_not_fused.set_input_data("input", input_prim);
if (is_dynamic) {
network_fused.set_input_data("gather_indices", indices_prim);
network_fused.set_input_data("eltwise_data", elt_input_prim);
network_not_fused.set_input_data("gather_indices", indices_prim);
network_not_fused.set_input_data("eltwise_data", elt_input_prim);
}
compare(network_not_fused, network_fused, p);
}
layout get_input_layout(gather_test_params& p) {
return layout{ p.data_type, p.input_format, p.dictionary_shape };
layout get_input_layout(gather_test_params& p, bool is_dynamic = false) {
if (is_dynamic) {
return layout{ ov::PartialShape::dynamic(p.dictionary_shape.size()), p.data_type, p.input_format };
} else {
return layout{ ov::PartialShape(p.dictionary_shape), p.data_type, p.input_format };
}
}
layout get_indices_layout(gather_test_params& p) {
return layout{ p.data_type, format::bfyx, p.indices_shape };
return layout{ ov::PartialShape(p.indices_shape), p.data_type, format::bfyx };
}
size_t get_axis_dim(gather_test_params& p) {
@@ -55,8 +70,20 @@ public:
return in_layout.get_dims()[p.axis];
}
layout get_per_channel_layout(gather_test_params& p) {
return layout{ p.default_type, p.default_format, tensor{ 1, static_cast<tensor::value_type>(p.out_shape[1]), 1, 1 } };
layout get_per_channel_layout(gather_test_params& p, bool is_dynamic = false) {
std::vector<ov::Dimension> dims;
for (size_t i = 0; i < p.out_shape.size(); ++i) {
if (i != 1) {
dims.push_back(ov::Dimension(1));
} else {
if (is_dynamic) {
dims.push_back(ov::Dimension(-1));
} else {
dims.push_back(ov::Dimension(p.out_shape[1]));
}
}
}
return layout{ ov::PartialShape(dims), p.default_type, p.default_format };
}
};
} // namespace
@@ -65,17 +92,20 @@ public:
/* ------------------------------------------ Gather cases --------------------------------------------- */
/* ----------------------------------------------------------------------------------------------------- */
#define CASE_GATHER_FP32_1 { 2, 3, 1, 4 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_2 { 3, 2, 1, 2 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_3 { 3, 1, 1, 2 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_1 { 2, 3, 4, 1 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_2 { 3, 2, 2, 1 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_3 { 3, 1, 2, 1 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_4 { 5, 3, 2, 2 }, { 3, 1, 1, 1 }, { 5, 2, 3, 2 }, 2, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_5 { 2, 3, 1, 2 }, { 1, 3, 1, 1 }, { 2, 3, 1, 3 }, 2, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_5 { 2, 3, 2, 1 }, { 3, 1, 1, 1 }, { 2, 3, 3, 1 }, 2, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP32_6 { 2, 3, 4 }, { 4 }, { 4, 3, 4 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
#define CASE_GATHER_FP16_1 { 2, 3, 1, 4 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_2 { 3, 2, 1, 2 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_3 { 3, 1, 1, 2 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_4 { 5, 3, 2, 2 }, { 3, 1, 1, 1 }, { 5, 2, 3, 2 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_5 { 2, 3, 1, 2 }, { 1, 3, 1, 1 }, { 2, 3, 1, 3 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_1 { 2, 3, 4, 1 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_2 { 3, 2, 2, 1 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_3 { 3, 1, 2, 1 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_4 { 5, 2, 2, 2 }, { 3, 1, 1, 1 }, { 5, 2, 3, 2 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_5 { 2, 3, 2, 1 }, { 3, 1, 1, 1 }, { 2, 3, 3, 1 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_6 { 3, 2, 2 }, { 2, 3 }, { 2, 3, 2, 2 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_FP16_7 { 2, 5, 2, 4 }, { 3, 2, 1}, { 2, 3, 2, 1, 2, 4 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx
#define CASE_GATHER_5D_FP32_1 { 2, 3, 1, 4, 1 }, { 4, 1, 1, 1 }, { 4, 3, 1, 4, 1 }, 0, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_GATHER_5D_FP32_2 { 2, 3, 2, 2, 2 }, { 2, 1, 1, 1 }, { 2, 2, 2, 2, 2 }, 1, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
@@ -83,10 +113,10 @@ public:
#define CASE_GATHER_5D_FP32_4 { 2, 3, 1, 4, 4 }, { 2, 1, 1, 1 }, { 2, 3, 2, 4, 1 }, 2, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_GATHER_5D_FP32_5 { 3, 1, 5, 2, 1 }, { 2, 1, 1, 1 }, { 3, 1, 1, 2, 2 }, 4, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
#define CASE_GATHER_5D_FP16_1 { 3, 2, 1, 2, 1 }, { 2, 1, 1, 1 }, { 2, 2, 1, 2, 2 }, 0, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_1 { 3, 2, 1, 2, 1 }, { 2, 1, 1, 1 }, { 2, 2, 1, 2, 1 }, 0, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_2 { 1, 3, 1, 2, 1 }, { 2, 1, 1, 1 }, { 1, 2, 1, 2, 1 }, 1, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_3 { 2, 3, 1, 3, 3 }, { 1, 2, 1, 1 }, { 2, 3, 3, 2, 1 }, 3, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_4 { 3, 2, 2, 2, 2 }, { 2, 1, 1, 1 }, { 3, 2, 2, 2, 2 }, 2, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_3 { 2, 3, 3, 3, 1 }, { 1, 2, 1, 1 }, { 2, 3, 3, 1, 2 }, 3, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_4 { 3, 2, 2, 2, 2 }, { 2, 3, 1, 1 }, { 3, 2, 2, 3, 2 }, 2, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
#define CASE_GATHER_5D_FP16_5 { 1, 1, 2, 1, 1 }, { 3, 1, 1, 1 }, { 1, 1, 1, 1, 3 }, 4, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
class gather_quantize : public GatherPrimitiveFusingTest {};
@@ -177,3 +207,25 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_eltwise_activation, ::testing::Valu
gather_test_params{ CASE_GATHER_5D_FP16_4, 2, 4 },
gather_test_params{ CASE_GATHER_5D_FP16_5, 2, 4 },
}));
class gather_eltwise_activation_dynamic : public GatherPrimitiveFusingTest {};
TEST_P(gather_eltwise_activation_dynamic, basic) {
auto p = GetParam();
create_topologies(
input_layout("input", get_input_layout(p, true)),
input_layout("gather_indices", layout{ ov::PartialShape::dynamic(p.indices_shape.size()), p.data_type, format::bfyx }),
input_layout("eltwise_data", get_per_channel_layout(p, true)),
gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.out_shape),
activation("activation", input_info("gather_prim"), activation_func::abs),
eltwise("eltwise", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::prod),
reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
);
tolerance = 1e-5f;
execute(p, true);
}
INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_eltwise_activation_dynamic, ::testing::ValuesIn(std::vector<gather_test_params>{
gather_test_params{ CASE_GATHER_FP32_6, 4, 6 },
gather_test_params{ CASE_GATHER_FP16_6, 4, 6 },
gather_test_params{ CASE_GATHER_FP16_7, 4, 6 },
}));