[GPU] Fix malfunction in crop static kernel in dynamic shape scenario (#16586)

* Fix malfunction in crop static kernel in dynamic shape execution

* Add unittest

* Fix lint errort
This commit is contained in:
Taylor Yeonbok Lee 2023-03-28 21:19:24 -07:00 committed by GitHub
parent 6c766a81b5
commit daf562832f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 29 additions and 14 deletions

View File

@ -46,6 +46,7 @@ static constexpr Property<bool, PropertyMutability::RW> optimize_data{"GPU_OPTIM
static constexpr Property<bool, PropertyMutability::RW> allow_static_input_reorder{"GPU_ALLOW_STATIC_INPUT_REORDER"};
static constexpr Property<bool, PropertyMutability::RW> partial_build_program{"GPU_PARTIAL_BUILD"};
static constexpr Property<bool, PropertyMutability::RW> allow_new_shape_infer{"GPU_ALLOW_NEW_SHAPE_INFER"};
static constexpr Property<bool, PropertyMutability::RW> use_only_static_kernels_for_dynamic_shape{"GPU_USE_ONLY_STATIC_KERNELS_FOR_DYNAMIC_SHAPE"};
static constexpr Property<std::string, PropertyMutability::RW> dump_graphs{"GPU_DUMP_GRAPHS"};
static constexpr Property<std::vector<std::string>, PropertyMutability::RW> custom_outputs{"GPU_CUSTOM_OUTPUTS"};
static constexpr Property<ImplForcingMap, PropertyMutability::RW> force_implementations{"GPU_FORCE_IMPLEMENTATIONS"};

View File

@ -34,9 +34,10 @@ void compile_graph::run(program& p) {
std::exception_ptr exception;
for (size_t idx = 0; idx < proc_order.size(); idx++) {
auto& node = *(std::next(proc_order.begin(), idx));
bool use_shape_agnostic_impl = !p.get_config().get_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape);
bool can_select_impl = !node->is_type<data>() &&
!(node->is_type<mutable_data>() && node->get_dependencies().empty()) &&
(!node->is_dynamic() || node->type()->does_dynamic_implementation_exist(*node));
(!node->is_dynamic() || (use_shape_agnostic_impl && node->type()->does_dynamic_implementation_exist(*node)));
// TODO: Remove this WA once we have shape agnostic reshape kernel
if (node->is_type<reshape>() && node->is_dynamic() && !node->can_be_optimized())

View File

@ -25,12 +25,11 @@ struct crop_impl : typed_primitive_impl_ocl<crop> {
public:
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
const auto& primitive = impl_param.typed_desc<crop>();
auto params = get_default_params<kernel_selector::eltwise_params>(impl_param, is_shape_agnostic);
auto optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(impl_param.get_program());
params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
if (impl_param.get_program().get_node(primitive->id).is_dynamic()) {
if (impl_param.is_dynamic() || is_shape_agnostic) {
// WA to always match compiled dynamic kernel with dispatch data
// W/O enforcing this option we may generate kernel for "broadcast" scneario due to umatched tensor dimensions
// but in runtime dispatch data will be generated for non-broadcast case as shapes are actually same.

View File

@ -68,7 +68,8 @@ void ExecutionConfig::set_default() {
std::make_tuple(ov::intel_gpu::dump_graphs, ""),
std::make_tuple(ov::intel_gpu::force_implementations, ImplForcingMap{}),
std::make_tuple(ov::intel_gpu::partial_build_program, false),
std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false));
std::make_tuple(ov::intel_gpu::allow_new_shape_infer, false),
std::make_tuple(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape, false));
}
void ExecutionConfig::register_property_impl(const std::pair<std::string, ov::Any>& property, PropertyVisibility visibility, BaseValidator::Ptr validator) {

View File

@ -1291,22 +1291,35 @@ TEST(crop_gpu, dynamic_i32_in2x3x2x2_crop_offsets) {
set_values(input, input_vec);
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("crop").get_memory();
cldnn::mem_lock<int32_t> output_ptr(output, get_test_stream());
network network1(engine, topology, config); // run with shape agnostic kernel
network1.set_input_data("input", input);
auto outputs1 = network1.execute();
auto output1 = outputs1.at("crop").get_memory();
cldnn::mem_lock<int32_t> output1_ptr(output1, get_test_stream());
for (int b = 0; b < crop_batch_num; ++b) { //B
for (int f = 0; f < crop_feature_num; ++f) { //F
for (int y = 0; y < crop_y_size; ++y) { //Y
for (int x = 0; x < crop_x_size; ++x) { //X
int linear_id = (b + batch_offset) * (feature_num * y_size * x_size) + (f + feature_offset) * (y_size * x_size) + (y + y_offset) * x_size + (x + x_offset);
int output_linear_id = b * (crop_feature_num * crop_y_size * crop_x_size) + f * (crop_y_size * crop_x_size) + y * crop_x_size + x;
ASSERT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
ASSERT_EQ(output1_ptr[output_linear_id], input_vec[linear_id]);
}
}
}
}
config.set_property(ov::intel_gpu::use_only_static_kernels_for_dynamic_shape(true));
network network2(engine, topology, config); // run with static kernel
network2.set_input_data("input", input);
auto outputs2 = network2.execute();
auto output2 = outputs2.at("crop").get_memory();
cldnn::mem_lock<int32_t> output2_ptr(output2, get_test_stream());
for (int b = 0; b < crop_batch_num; ++b) { //B
for (int f = 0; f < crop_feature_num; ++f) { //F
for (int y = 0; y < crop_y_size; ++y) { //Y
for (int x = 0; x < crop_x_size; ++x) { //X
int linear_id = (b + batch_offset) * (feature_num * y_size * x_size) + (f + feature_offset) * (y_size * x_size) + (y + y_offset) * x_size + (x + x_offset);
int output_linear_id = b * (crop_feature_num * crop_y_size * crop_x_size) + f * (crop_y_size * crop_x_size) + y * crop_x_size + x;
ASSERT_EQ(output2_ptr[output_linear_id], input_vec[linear_id]);
}
}
}