[GPU] Minor fixes for dynamic models (#15543)

This commit is contained in:
Vladimir Paramuzov 2023-02-13 07:34:41 +01:00 committed by GitHub
parent c0888b45da
commit 9b840a789c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 108 additions and 32 deletions

View File

@ -403,6 +403,10 @@ struct layout {
}
return l.size;
};
if (lhs.get_partial_shape().rank() != rhs.get_partial_shape().rank())
return false;
auto check_pshape = (lhs.is_dynamic() || rhs.is_dynamic()) ? (lhs.size == rhs.size) : (get_pshape(lhs) == get_pshape(rhs));
return lhs.data_type == rhs.data_type && lhs.format == rhs.format && check_pshape && lhs.data_padding == rhs.data_padding;
}

View File

@ -185,7 +185,7 @@ std::vector<layout> gemm_inst::transform_input_layouts(const std::shared_ptr<con
layouts[0].set_partial_shape(updated_input0_pshape);
layouts[1].set_partial_shape(updated_input1_pshape);
if (input_layouts.size() == 3) {
if (primitive->input_size() == 3) {
auto bias_pshape = input_layouts[2].get_partial_shape();
auto updated_bias_pshape = get_updated_input_shape(bias_pshape, weight_rank, output_rank, primitive->transpose_input1, false);
layouts[2].set_partial_shape(updated_bias_pshape);

View File

@ -54,7 +54,7 @@ void handle_reshape::run(program& p) {
while (node_itr != p.get_processing_order().end()) {
auto& node = (*node_itr++);
program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node) {
if (node.is_output() || node.get_users().size() > 1 || node.has_fused_primitives())
if (node.is_output() || node.get_users().size() > 1 || node.has_fused_primitives() || node.is_dynamic())
return;
auto& out_node = node.get_users().front();

View File

@ -593,6 +593,9 @@ void remove_redundant_reorders::run(program& p) {
auto& reshape_input_node = dep_node.as<reshape>();
if (reshape_node.is_dynamic())
continue;
bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() &&
!reshape_input_node.has_fused_primitives();
bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() &&

View File

@ -6,6 +6,7 @@
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/layout.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "meta_utils.h"
#include "primitive_type.h"
@ -84,7 +85,17 @@ struct primitive_type_base : primitive_type {
std::vector<cldnn::layout> calc_output_layouts(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override {
OPENVINO_ASSERT(node.type() == this, "primitive_type_base::calc_output_layouts: primitive type mismatch");
return typed_primitive_inst<PType>::template calc_output_layouts<ov::PartialShape>(node, impl_param);
for (auto& t : impl_param.input_layouts) {
GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " input tensor: " << t.to_short_string() << std::endl;
}
auto res = typed_primitive_inst<PType>::template calc_output_layouts<ov::PartialShape>(node, impl_param);
for (auto& t : res) {
GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " output tensor: " << t.to_short_string() << std::endl;
}
return res;
}
kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const override {
return typed_primitive_inst<PType>::get_fake_aligned_params(orig_impl_param);

View File

@ -91,7 +91,9 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
uint32_t primitive_inst::get_network_id() const { return _network.get_id(); }
void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout) const {
OPENVINO_ASSERT((mem.get_layout() == layout) || layout.is_dynamic(), "[GPU] Unexpected layout of input memory");
OPENVINO_ASSERT((mem.get_layout() == layout) || layout.is_dynamic(), "[GPU] Unexpected layout of input memory for ", id(), " node!\n",
"Node layout: ", layout.to_short_string(), "\n",
"Memory layout: ", mem.get_layout().to_short_string());
// check shared image/buffer compatibility, if applicable
auto params = mem.get_internal_params();

View File

@ -34,7 +34,6 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
auto desc = impl_param.typed_desc<strided_slice>();
auto input0_layout = impl_param.get_input_layout(0);
auto input0_shape = input0_layout.get<ShapeType>();
auto input0_rank = input0_shape.size();
auto& constant_mem = impl_param.memory_deps;
auto begin_data = desc->begin;
@ -49,10 +48,9 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
}
ov::op::v1::StridedSlice op;
ShapeType begin_shape = begin_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ begin_data.size() };
ShapeType end_shape = end_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ end_data.size() };
ShapeType strides_shape = strides_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ strides_data.size() };
ShapeType begin_shape = begin_data.empty() ? impl_param.get_input_layout(1).get<ShapeType>() : ov::Shape{ begin_data.size() };
ShapeType end_shape = end_data.empty() ? impl_param.get_input_layout(2).get<ShapeType>() : ov::Shape{ end_data.size() };
ShapeType strides_shape = strides_data.empty() ? impl_param.get_input_layout(3).get<ShapeType>() : ov::Shape{ strides_data.size() };
std::vector<ShapeType> output_shapes = {ShapeType{}};
std::vector<ShapeType> input_shapes = {

View File

@ -563,14 +563,9 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
std::string offset = toCodeString(_tensor.GetFirstElementOffset());
if (_tensor.LogicalSize() == 1 && !_tensor.is_dynamic()) {
// if tensor contains single element we can always return 0 for safe function
if (_tensor.PitchesDifferFromLogicalDims()) {
definitions.push_back({ safe_index_func_name, offset });
definitions.push_back({ index_func_name, offset });
} else {
definitions.push_back({ safe_index_func_name, "0" });
definitions.push_back({ index_func_name, "0" });
}
// if tensor contains single element we can always return first element offset for safe function
definitions.push_back({ safe_index_func_name, offset });
definitions.push_back({ index_func_name, offset });
} else if (_tensor.LogicalSize() == _tensor.Feature().v && !_tensor.is_dynamic()) {
// We support broadcast only if corresponding dimension is equal to 1.
// Otherwise, dimensions should be equal and using "f" should be safe.

View File

@ -277,10 +277,14 @@ std::string layout::to_string() const {
std::string layout::to_short_string() const {
std::stringstream s;
auto dump_shape = [](std::stringstream& stream, const ov::PartialShape& shape) {
for (size_t i = 0; i < shape.size(); i++) {
stream << shape[i];
if (i != shape.size() - 1)
stream << "x";
if (shape.rank().is_dynamic()) {
stream << "...";
} else {
for (size_t i = 0; i < shape.size(); i++) {
stream << shape[i];
if (i != shape.size() - 1)
stream << "x";
}
}
};

View File

@ -0,0 +1,52 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/graph/network.hpp"
#include "intel_gpu/graph/program.hpp"
#include "data_inst.h"
#include "eltwise_inst.h"
#include "reshape_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"
#include "program_wrapper.h"
#include <memory>
using namespace cldnn;
using namespace ::tests;
TEST(handle_reshape, dont_remove_reshape_that_changes_rank) {
auto& engine = get_test_engine();
auto data0_layout = engine.allocate_memory({ ov::PartialShape{}, data_types::f16, format::bfyx });
auto data1_layout = engine.allocate_memory({ ov::PartialShape{1}, data_types::f16, format::bfyx });
auto in_layout = layout{ ov::PartialShape::dynamic(0), data_types::f16, format::bfyx };
topology topology;
topology.add(input_layout("input", in_layout));
topology.add(data("data0", data0_layout));
topology.add(data("data1", data1_layout));
topology.add(eltwise("e1", input_info("input"), input_info("data0"), eltwise_mode::sum));
topology.add(reshape("reshape", input_info("e1"), false, {1}, {1}));
topology.add(eltwise("e2", input_info("reshape"), input_info("data1"), eltwise_mode::sum));
ExecutionConfig config;
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
auto prog = program::build_program(engine, topology, config, false, true);
layout_optimizer lo(true);
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
program_wrapper::apply_opt_pass<handle_reshape>(*prog);
ASSERT_NE(prog, nullptr);
ASSERT_TRUE(has_node_with_type<reshape>(*prog));
ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
}

View File

@ -105,7 +105,7 @@ INSTANTIATE_TEST_SUITE_P(smoke, crop_si_test,
{tensor({0,0,0,0,1,1,1}),tensor({0,0,0,0,1,1,1})},
{},
{{{4},data_types::f32,format::bfyx}},
{{{3},data_types::f32,format::bfyx}}, 0
{{{3, 1, 1, 1},data_types::f32,format::bfyx}}, 0 // TODO: update once shape infer impl is fixed and don't do rank extension
},
{
tensor({-1,-1,-1,-1,-1,-1,-1}),

View File

@ -54,7 +54,7 @@ INSTANTIATE_TEST_SUITE_P(smoke, reduce_test,
{
layout{ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx},
reduce_mode::max, {1}, false,
layout{ov::PartialShape{1}, data_types::f32, format::bfyx}
layout{ov::PartialShape{1, 1, 1}, data_types::f32, format::bfyx}
},
{
layout{ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx},

View File

@ -146,6 +146,14 @@ INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test_four_inputs,
{1, 0, 1}, {1, 0, 1}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0},
layout{ov::PartialShape{1, 1, 1024}, data_types::i64, format::bfyx}
},
{
layout{ov::PartialShape{200, 128}, data_types::i64, format::bfyx},
layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {0},
layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {15},
layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {1},
{0}, {0}, {}, {}, {},
layout{ov::PartialShape{15, 128}, data_types::i64, format::bfyx}
},
}));
} // shape_infer_tests

View File

@ -78,15 +78,14 @@ struct non_max_suppression_basic : public testing::Test {
DataType(0.3f),
};
const layout boxes_layout = layout(type_to_data_type<DataType>::value,
format::bfyx,
tensor(batch(batch_size), feature(boxes_num), spatial(1, 4)));
const layout scores_layout = layout(type_to_data_type<DataType>::value,
format::bfyx,
tensor(batch(batch_size), feature(classes_num), spatial(1, boxes_num)));
const layout selected_scores_layout =
layout(data_type, layout_format, tensor(batch(selected_indices_num), feature(3)));
const layout valid_outputs_layout = layout(cldnn::data_types::i32, layout_format, tensor(batch(1)));
const layout boxes_layout = layout(ov::PartialShape{batch_size, boxes_num, 4},
type_to_data_type<DataType>::value,
format::bfyx);
const layout scores_layout = layout(ov::PartialShape{batch_size, classes_num, boxes_num},
type_to_data_type<DataType>::value,
format::bfyx);
const layout selected_scores_layout = layout(ov::PartialShape{selected_indices_num, 3}, data_type, layout_format);
const layout valid_outputs_layout = layout(ov::PartialShape{1}, cldnn::data_types::i32, layout_format);
memory::ptr get_boxes_memory(engine& engine) {
auto mem = engine.allocate_memory(boxes_layout);