[GPU] Fix bug in shape agnosic kernel for fused cases (#17533)
* Fix bug in shape agnosic kernel for fused cases * Fix gather unittest to use ov::Shape as input * Minor change (just changed line locatiions)
This commit is contained in:
committed by
GitHub
parent
75fa11c80f
commit
87a39fb007
@@ -605,7 +605,7 @@ struct dep_info {
|
||||
// method in binary_convolution_kernel_generic.cpp.
|
||||
struct fused_operation_desc {
|
||||
std::shared_ptr<fuse_params> op_params;
|
||||
size_t dep_idx_start;
|
||||
int32_t dep_idx_start;
|
||||
size_t dep_size;
|
||||
MultiDataTensor tensors;
|
||||
DataTensor output_tensor;
|
||||
@@ -622,6 +622,9 @@ struct fused_operation_desc {
|
||||
|
||||
return p;
|
||||
}
|
||||
bool has_outer_dep() {
|
||||
return dep_idx_start != -1;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -668,6 +671,16 @@ struct base_params : public Params {
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto& fd : fused_ops) {
|
||||
if (!fd.has_outer_dep())
|
||||
continue;
|
||||
auto& fused_op_inputs = fd.tensors;
|
||||
for (auto& fused_input : fused_op_inputs) {
|
||||
fused_input.SetDynamicShapeOffset(offset);
|
||||
if (fused_input.is_dynamic())
|
||||
offset += DataTensor::max_rank();
|
||||
}
|
||||
}
|
||||
for (auto& out : outputs) {
|
||||
out.SetDynamicShapeOffset(offset);
|
||||
if (out.is_dynamic()) {
|
||||
|
||||
@@ -18,8 +18,8 @@ using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
struct gather_test_params {
|
||||
tensor dictionary_shape;
|
||||
tensor indices_shape;
|
||||
ov::Shape dictionary_shape;
|
||||
ov::Shape indices_shape;
|
||||
ov::Shape out_shape;
|
||||
int64_t axis;
|
||||
data_types data_type;
|
||||
@@ -32,22 +32,37 @@ struct gather_test_params {
|
||||
|
||||
class GatherPrimitiveFusingTest : public ::BaseFusingTest<gather_test_params> {
|
||||
public:
|
||||
void execute(gather_test_params& p) {
|
||||
void execute(gather_test_params& p, bool is_dynamic = false) {
|
||||
cfg_not_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
|
||||
cfg_fused.set_property(ov::intel_gpu::allow_new_shape_infer(is_dynamic));
|
||||
cfg_fused.set_property(ov::intel_gpu::optimize_data(true));
|
||||
auto input_prim = get_mem(get_input_layout(p));
|
||||
auto indices_prim = get_mem(get_indices_layout(p), 0, static_cast<int>(get_axis_dim(p) - 1));
|
||||
auto elt_input_prim = get_mem(get_per_channel_layout(p), -10, 10);
|
||||
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, cfg_fused);
|
||||
network_fused.set_input_data("input", input_prim);
|
||||
network_not_fused.set_input_data("input", input_prim);
|
||||
if (is_dynamic) {
|
||||
network_fused.set_input_data("gather_indices", indices_prim);
|
||||
network_fused.set_input_data("eltwise_data", elt_input_prim);
|
||||
network_not_fused.set_input_data("gather_indices", indices_prim);
|
||||
network_not_fused.set_input_data("eltwise_data", elt_input_prim);
|
||||
}
|
||||
|
||||
compare(network_not_fused, network_fused, p);
|
||||
}
|
||||
|
||||
layout get_input_layout(gather_test_params& p) {
|
||||
return layout{ p.data_type, p.input_format, p.dictionary_shape };
|
||||
layout get_input_layout(gather_test_params& p, bool is_dynamic = false) {
|
||||
if (is_dynamic) {
|
||||
return layout{ ov::PartialShape::dynamic(p.dictionary_shape.size()), p.data_type, p.input_format };
|
||||
} else {
|
||||
return layout{ ov::PartialShape(p.dictionary_shape), p.data_type, p.input_format };
|
||||
}
|
||||
}
|
||||
|
||||
layout get_indices_layout(gather_test_params& p) {
|
||||
return layout{ p.data_type, format::bfyx, p.indices_shape };
|
||||
return layout{ ov::PartialShape(p.indices_shape), p.data_type, format::bfyx };
|
||||
}
|
||||
|
||||
size_t get_axis_dim(gather_test_params& p) {
|
||||
@@ -55,8 +70,20 @@ public:
|
||||
return in_layout.get_dims()[p.axis];
|
||||
}
|
||||
|
||||
layout get_per_channel_layout(gather_test_params& p) {
|
||||
return layout{ p.default_type, p.default_format, tensor{ 1, static_cast<tensor::value_type>(p.out_shape[1]), 1, 1 } };
|
||||
layout get_per_channel_layout(gather_test_params& p, bool is_dynamic = false) {
|
||||
std::vector<ov::Dimension> dims;
|
||||
for (size_t i = 0; i < p.out_shape.size(); ++i) {
|
||||
if (i != 1) {
|
||||
dims.push_back(ov::Dimension(1));
|
||||
} else {
|
||||
if (is_dynamic) {
|
||||
dims.push_back(ov::Dimension(-1));
|
||||
} else {
|
||||
dims.push_back(ov::Dimension(p.out_shape[1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
return layout{ ov::PartialShape(dims), p.default_type, p.default_format };
|
||||
}
|
||||
};
|
||||
} // namespace
|
||||
@@ -65,17 +92,20 @@ public:
|
||||
/* ------------------------------------------ Gather cases --------------------------------------------- */
|
||||
/* ----------------------------------------------------------------------------------------------------- */
|
||||
|
||||
#define CASE_GATHER_FP32_1 { 2, 3, 1, 4 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_2 { 3, 2, 1, 2 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_3 { 3, 1, 1, 2 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_1 { 2, 3, 4, 1 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_2 { 3, 2, 2, 1 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_3 { 3, 1, 2, 1 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_4 { 5, 3, 2, 2 }, { 3, 1, 1, 1 }, { 5, 2, 3, 2 }, 2, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_5 { 2, 3, 1, 2 }, { 1, 3, 1, 1 }, { 2, 3, 1, 3 }, 2, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_5 { 2, 3, 2, 1 }, { 3, 1, 1, 1 }, { 2, 3, 3, 1 }, 2, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
#define CASE_GATHER_FP32_6 { 2, 3, 4 }, { 4 }, { 4, 3, 4 }, 0, data_types::f32, format::bfyx, data_types::f32, format::bfyx
|
||||
|
||||
#define CASE_GATHER_FP16_1 { 2, 3, 1, 4 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_2 { 3, 2, 1, 2 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_3 { 3, 1, 1, 2 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_4 { 5, 3, 2, 2 }, { 3, 1, 1, 1 }, { 5, 2, 3, 2 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_5 { 2, 3, 1, 2 }, { 1, 3, 1, 1 }, { 2, 3, 1, 3 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_1 { 2, 3, 4, 1 }, { 4, 1, 1, 1 }, { 4, 3, 4, 1 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_2 { 3, 2, 2, 1 }, { 2, 3, 1, 1 }, { 2, 3, 2, 2 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_3 { 3, 1, 2, 1 }, { 2, 1, 1, 1 }, { 3, 2, 2, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_4 { 5, 2, 2, 2 }, { 3, 1, 1, 1 }, { 5, 2, 3, 2 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_5 { 2, 3, 2, 1 }, { 3, 1, 1, 1 }, { 2, 3, 3, 1 }, 2, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_6 { 3, 2, 2 }, { 2, 3 }, { 2, 3, 2, 2 }, 0, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
#define CASE_GATHER_FP16_7 { 2, 5, 2, 4 }, { 3, 2, 1}, { 2, 3, 2, 1, 2, 4 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx
|
||||
|
||||
#define CASE_GATHER_5D_FP32_1 { 2, 3, 1, 4, 1 }, { 4, 1, 1, 1 }, { 4, 3, 1, 4, 1 }, 0, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP32_2 { 2, 3, 2, 2, 2 }, { 2, 1, 1, 1 }, { 2, 2, 2, 2, 2 }, 1, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
@@ -83,10 +113,10 @@ public:
|
||||
#define CASE_GATHER_5D_FP32_4 { 2, 3, 1, 4, 4 }, { 2, 1, 1, 1 }, { 2, 3, 2, 4, 1 }, 2, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP32_5 { 3, 1, 5, 2, 1 }, { 2, 1, 1, 1 }, { 3, 1, 1, 2, 2 }, 4, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
|
||||
|
||||
#define CASE_GATHER_5D_FP16_1 { 3, 2, 1, 2, 1 }, { 2, 1, 1, 1 }, { 2, 2, 1, 2, 2 }, 0, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_1 { 3, 2, 1, 2, 1 }, { 2, 1, 1, 1 }, { 2, 2, 1, 2, 1 }, 0, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_2 { 1, 3, 1, 2, 1 }, { 2, 1, 1, 1 }, { 1, 2, 1, 2, 1 }, 1, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_3 { 2, 3, 1, 3, 3 }, { 1, 2, 1, 1 }, { 2, 3, 3, 2, 1 }, 3, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_4 { 3, 2, 2, 2, 2 }, { 2, 1, 1, 1 }, { 3, 2, 2, 2, 2 }, 2, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_3 { 2, 3, 3, 3, 1 }, { 1, 2, 1, 1 }, { 2, 3, 3, 1, 2 }, 3, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_4 { 3, 2, 2, 2, 2 }, { 2, 3, 1, 1 }, { 3, 2, 2, 3, 2 }, 2, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
#define CASE_GATHER_5D_FP16_5 { 1, 1, 2, 1, 1 }, { 3, 1, 1, 1 }, { 1, 1, 1, 1, 3 }, 4, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
|
||||
|
||||
class gather_quantize : public GatherPrimitiveFusingTest {};
|
||||
@@ -177,3 +207,25 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_eltwise_activation, ::testing::Valu
|
||||
gather_test_params{ CASE_GATHER_5D_FP16_4, 2, 4 },
|
||||
gather_test_params{ CASE_GATHER_5D_FP16_5, 2, 4 },
|
||||
}));
|
||||
|
||||
class gather_eltwise_activation_dynamic : public GatherPrimitiveFusingTest {};
|
||||
TEST_P(gather_eltwise_activation_dynamic, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p, true)),
|
||||
input_layout("gather_indices", layout{ ov::PartialShape::dynamic(p.indices_shape.size()), p.data_type, format::bfyx }),
|
||||
input_layout("eltwise_data", get_per_channel_layout(p, true)),
|
||||
gather("gather_prim", input_info("input"), input_info("gather_indices"), p.axis, p.out_shape),
|
||||
activation("activation", input_info("gather_prim"), activation_func::abs),
|
||||
eltwise("eltwise", { input_info("activation"), input_info("eltwise_data") }, eltwise_mode::prod),
|
||||
reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p, true);
|
||||
}
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, gather_eltwise_activation_dynamic, ::testing::ValuesIn(std::vector<gather_test_params>{
|
||||
gather_test_params{ CASE_GATHER_FP32_6, 4, 6 },
|
||||
gather_test_params{ CASE_GATHER_FP16_6, 4, 6 },
|
||||
gather_test_params{ CASE_GATHER_FP16_7, 4, 6 },
|
||||
}));
|
||||
Reference in New Issue
Block a user