[GPU] Minor fixes for dynamic models (#15543)
This commit is contained in:
parent
c0888b45da
commit
9b840a789c
@ -403,6 +403,10 @@ struct layout {
|
|||||||
}
|
}
|
||||||
return l.size;
|
return l.size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (lhs.get_partial_shape().rank() != rhs.get_partial_shape().rank())
|
||||||
|
return false;
|
||||||
|
|
||||||
auto check_pshape = (lhs.is_dynamic() || rhs.is_dynamic()) ? (lhs.size == rhs.size) : (get_pshape(lhs) == get_pshape(rhs));
|
auto check_pshape = (lhs.is_dynamic() || rhs.is_dynamic()) ? (lhs.size == rhs.size) : (get_pshape(lhs) == get_pshape(rhs));
|
||||||
return lhs.data_type == rhs.data_type && lhs.format == rhs.format && check_pshape && lhs.data_padding == rhs.data_padding;
|
return lhs.data_type == rhs.data_type && lhs.format == rhs.format && check_pshape && lhs.data_padding == rhs.data_padding;
|
||||||
}
|
}
|
||||||
|
@ -185,7 +185,7 @@ std::vector<layout> gemm_inst::transform_input_layouts(const std::shared_ptr<con
|
|||||||
layouts[0].set_partial_shape(updated_input0_pshape);
|
layouts[0].set_partial_shape(updated_input0_pshape);
|
||||||
layouts[1].set_partial_shape(updated_input1_pshape);
|
layouts[1].set_partial_shape(updated_input1_pshape);
|
||||||
|
|
||||||
if (input_layouts.size() == 3) {
|
if (primitive->input_size() == 3) {
|
||||||
auto bias_pshape = input_layouts[2].get_partial_shape();
|
auto bias_pshape = input_layouts[2].get_partial_shape();
|
||||||
auto updated_bias_pshape = get_updated_input_shape(bias_pshape, weight_rank, output_rank, primitive->transpose_input1, false);
|
auto updated_bias_pshape = get_updated_input_shape(bias_pshape, weight_rank, output_rank, primitive->transpose_input1, false);
|
||||||
layouts[2].set_partial_shape(updated_bias_pshape);
|
layouts[2].set_partial_shape(updated_bias_pshape);
|
||||||
|
@ -54,7 +54,7 @@ void handle_reshape::run(program& p) {
|
|||||||
while (node_itr != p.get_processing_order().end()) {
|
while (node_itr != p.get_processing_order().end()) {
|
||||||
auto& node = (*node_itr++);
|
auto& node = (*node_itr++);
|
||||||
program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node) {
|
program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node) {
|
||||||
if (node.is_output() || node.get_users().size() > 1 || node.has_fused_primitives())
|
if (node.is_output() || node.get_users().size() > 1 || node.has_fused_primitives() || node.is_dynamic())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto& out_node = node.get_users().front();
|
auto& out_node = node.get_users().front();
|
||||||
|
@ -593,6 +593,9 @@ void remove_redundant_reorders::run(program& p) {
|
|||||||
|
|
||||||
auto& reshape_input_node = dep_node.as<reshape>();
|
auto& reshape_input_node = dep_node.as<reshape>();
|
||||||
|
|
||||||
|
if (reshape_node.is_dynamic())
|
||||||
|
continue;
|
||||||
|
|
||||||
bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() &&
|
bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() &&
|
||||||
!reshape_input_node.has_fused_primitives();
|
!reshape_input_node.has_fused_primitives();
|
||||||
bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() &&
|
bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() &&
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "intel_gpu/runtime/engine.hpp"
|
#include "intel_gpu/runtime/engine.hpp"
|
||||||
#include "intel_gpu/runtime/layout.hpp"
|
#include "intel_gpu/runtime/layout.hpp"
|
||||||
|
#include "intel_gpu/runtime/debug_configuration.hpp"
|
||||||
|
|
||||||
#include "meta_utils.h"
|
#include "meta_utils.h"
|
||||||
#include "primitive_type.h"
|
#include "primitive_type.h"
|
||||||
@ -84,7 +85,17 @@ struct primitive_type_base : primitive_type {
|
|||||||
std::vector<cldnn::layout> calc_output_layouts(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override {
|
std::vector<cldnn::layout> calc_output_layouts(const cldnn::program_node& node, const kernel_impl_params& impl_param) const override {
|
||||||
OPENVINO_ASSERT(node.type() == this, "primitive_type_base::calc_output_layouts: primitive type mismatch");
|
OPENVINO_ASSERT(node.type() == this, "primitive_type_base::calc_output_layouts: primitive type mismatch");
|
||||||
|
|
||||||
return typed_primitive_inst<PType>::template calc_output_layouts<ov::PartialShape>(node, impl_param);
|
for (auto& t : impl_param.input_layouts) {
|
||||||
|
GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " input tensor: " << t.to_short_string() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res = typed_primitive_inst<PType>::template calc_output_layouts<ov::PartialShape>(node, impl_param);
|
||||||
|
|
||||||
|
for (auto& t : res) {
|
||||||
|
GPU_DEBUG_TRACE_DETAIL << impl_param.desc->id << " output tensor: " << t.to_short_string() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const override {
|
kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const override {
|
||||||
return typed_primitive_inst<PType>::get_fake_aligned_params(orig_impl_param);
|
return typed_primitive_inst<PType>::get_fake_aligned_params(orig_impl_param);
|
||||||
|
@ -91,7 +91,9 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
|
|||||||
uint32_t primitive_inst::get_network_id() const { return _network.get_id(); }
|
uint32_t primitive_inst::get_network_id() const { return _network.get_id(); }
|
||||||
|
|
||||||
void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout) const {
|
void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout) const {
|
||||||
OPENVINO_ASSERT((mem.get_layout() == layout) || layout.is_dynamic(), "[GPU] Unexpected layout of input memory");
|
OPENVINO_ASSERT((mem.get_layout() == layout) || layout.is_dynamic(), "[GPU] Unexpected layout of input memory for ", id(), " node!\n",
|
||||||
|
"Node layout: ", layout.to_short_string(), "\n",
|
||||||
|
"Memory layout: ", mem.get_layout().to_short_string());
|
||||||
|
|
||||||
// check shared image/buffer compatibility, if applicable
|
// check shared image/buffer compatibility, if applicable
|
||||||
auto params = mem.get_internal_params();
|
auto params = mem.get_internal_params();
|
||||||
|
@ -34,7 +34,6 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
|
|||||||
auto desc = impl_param.typed_desc<strided_slice>();
|
auto desc = impl_param.typed_desc<strided_slice>();
|
||||||
auto input0_layout = impl_param.get_input_layout(0);
|
auto input0_layout = impl_param.get_input_layout(0);
|
||||||
auto input0_shape = input0_layout.get<ShapeType>();
|
auto input0_shape = input0_layout.get<ShapeType>();
|
||||||
auto input0_rank = input0_shape.size();
|
|
||||||
|
|
||||||
auto& constant_mem = impl_param.memory_deps;
|
auto& constant_mem = impl_param.memory_deps;
|
||||||
auto begin_data = desc->begin;
|
auto begin_data = desc->begin;
|
||||||
@ -49,10 +48,9 @@ std::vector<layout> strided_slice_inst::calc_output_layouts(strided_slice_node c
|
|||||||
}
|
}
|
||||||
|
|
||||||
ov::op::v1::StridedSlice op;
|
ov::op::v1::StridedSlice op;
|
||||||
|
ShapeType begin_shape = begin_data.empty() ? impl_param.get_input_layout(1).get<ShapeType>() : ov::Shape{ begin_data.size() };
|
||||||
ShapeType begin_shape = begin_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ begin_data.size() };
|
ShapeType end_shape = end_data.empty() ? impl_param.get_input_layout(2).get<ShapeType>() : ov::Shape{ end_data.size() };
|
||||||
ShapeType end_shape = end_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ end_data.size() };
|
ShapeType strides_shape = strides_data.empty() ? impl_param.get_input_layout(3).get<ShapeType>() : ov::Shape{ strides_data.size() };
|
||||||
ShapeType strides_shape = strides_data.empty() ? ov::Shape{ input0_rank } : ov::Shape{ strides_data.size() };
|
|
||||||
|
|
||||||
std::vector<ShapeType> output_shapes = {ShapeType{}};
|
std::vector<ShapeType> output_shapes = {ShapeType{}};
|
||||||
std::vector<ShapeType> input_shapes = {
|
std::vector<ShapeType> input_shapes = {
|
||||||
|
@ -563,14 +563,9 @@ JitDefinitions DataTensorJitConstant::GetDefinitions() const {
|
|||||||
|
|
||||||
std::string offset = toCodeString(_tensor.GetFirstElementOffset());
|
std::string offset = toCodeString(_tensor.GetFirstElementOffset());
|
||||||
if (_tensor.LogicalSize() == 1 && !_tensor.is_dynamic()) {
|
if (_tensor.LogicalSize() == 1 && !_tensor.is_dynamic()) {
|
||||||
// if tensor contains single element we can always return 0 for safe function
|
// if tensor contains single element we can always return first element offset for safe function
|
||||||
if (_tensor.PitchesDifferFromLogicalDims()) {
|
|
||||||
definitions.push_back({ safe_index_func_name, offset });
|
definitions.push_back({ safe_index_func_name, offset });
|
||||||
definitions.push_back({ index_func_name, offset });
|
definitions.push_back({ index_func_name, offset });
|
||||||
} else {
|
|
||||||
definitions.push_back({ safe_index_func_name, "0" });
|
|
||||||
definitions.push_back({ index_func_name, "0" });
|
|
||||||
}
|
|
||||||
} else if (_tensor.LogicalSize() == _tensor.Feature().v && !_tensor.is_dynamic()) {
|
} else if (_tensor.LogicalSize() == _tensor.Feature().v && !_tensor.is_dynamic()) {
|
||||||
// We support broadcast only if corresponding dimension is equal to 1.
|
// We support broadcast only if corresponding dimension is equal to 1.
|
||||||
// Otherwise, dimensions should be equal and using "f" should be safe.
|
// Otherwise, dimensions should be equal and using "f" should be safe.
|
||||||
|
@ -277,11 +277,15 @@ std::string layout::to_string() const {
|
|||||||
std::string layout::to_short_string() const {
|
std::string layout::to_short_string() const {
|
||||||
std::stringstream s;
|
std::stringstream s;
|
||||||
auto dump_shape = [](std::stringstream& stream, const ov::PartialShape& shape) {
|
auto dump_shape = [](std::stringstream& stream, const ov::PartialShape& shape) {
|
||||||
|
if (shape.rank().is_dynamic()) {
|
||||||
|
stream << "...";
|
||||||
|
} else {
|
||||||
for (size_t i = 0; i < shape.size(); i++) {
|
for (size_t i = 0; i < shape.size(); i++) {
|
||||||
stream << shape[i];
|
stream << shape[i];
|
||||||
if (i != shape.size() - 1)
|
if (i != shape.size() - 1)
|
||||||
stream << "x";
|
stream << "x";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
s << data_type_traits::name(data_type) << ":" << format.to_string() << ":";
|
s << data_type_traits::name(data_type) << ":" << format.to_string() << ":";
|
||||||
|
52
src/plugins/intel_gpu/tests/passes/handle_reshape.cpp
Normal file
52
src/plugins/intel_gpu/tests/passes/handle_reshape.cpp
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "test_utils.h"
|
||||||
|
|
||||||
|
#include "intel_gpu/runtime/engine.hpp"
|
||||||
|
|
||||||
|
#include "intel_gpu/graph/network.hpp"
|
||||||
|
#include "intel_gpu/graph/program.hpp"
|
||||||
|
#include "data_inst.h"
|
||||||
|
#include "eltwise_inst.h"
|
||||||
|
#include "reshape_inst.h"
|
||||||
|
#include "pass_manager.h"
|
||||||
|
#include "to_string_utils.h"
|
||||||
|
|
||||||
|
#include "program_wrapper.h"
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
using namespace cldnn;
|
||||||
|
using namespace ::tests;
|
||||||
|
|
||||||
|
TEST(handle_reshape, dont_remove_reshape_that_changes_rank) {
|
||||||
|
auto& engine = get_test_engine();
|
||||||
|
auto data0_layout = engine.allocate_memory({ ov::PartialShape{}, data_types::f16, format::bfyx });
|
||||||
|
auto data1_layout = engine.allocate_memory({ ov::PartialShape{1}, data_types::f16, format::bfyx });
|
||||||
|
auto in_layout = layout{ ov::PartialShape::dynamic(0), data_types::f16, format::bfyx };
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", in_layout));
|
||||||
|
topology.add(data("data0", data0_layout));
|
||||||
|
topology.add(data("data1", data1_layout));
|
||||||
|
topology.add(eltwise("e1", input_info("input"), input_info("data0"), eltwise_mode::sum));
|
||||||
|
topology.add(reshape("reshape", input_info("e1"), false, {1}, {1}));
|
||||||
|
topology.add(eltwise("e2", input_info("reshape"), input_info("data1"), eltwise_mode::sum));
|
||||||
|
|
||||||
|
ExecutionConfig config;
|
||||||
|
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||||
|
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||||
|
auto prog = program::build_program(engine, topology, config, false, true);
|
||||||
|
|
||||||
|
layout_optimizer lo(true);
|
||||||
|
|
||||||
|
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
|
||||||
|
program_wrapper::apply_opt_pass<handle_reshape>(*prog);
|
||||||
|
|
||||||
|
ASSERT_NE(prog, nullptr);
|
||||||
|
ASSERT_TRUE(has_node_with_type<reshape>(*prog));
|
||||||
|
|
||||||
|
ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
|
||||||
|
}
|
@ -105,7 +105,7 @@ INSTANTIATE_TEST_SUITE_P(smoke, crop_si_test,
|
|||||||
{tensor({0,0,0,0,1,1,1}),tensor({0,0,0,0,1,1,1})},
|
{tensor({0,0,0,0,1,1,1}),tensor({0,0,0,0,1,1,1})},
|
||||||
{},
|
{},
|
||||||
{{{4},data_types::f32,format::bfyx}},
|
{{{4},data_types::f32,format::bfyx}},
|
||||||
{{{3},data_types::f32,format::bfyx}}, 0
|
{{{3, 1, 1, 1},data_types::f32,format::bfyx}}, 0 // TODO: update once shape infer impl is fixed and don't do rank extension
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
tensor({-1,-1,-1,-1,-1,-1,-1}),
|
tensor({-1,-1,-1,-1,-1,-1,-1}),
|
||||||
|
@ -54,7 +54,7 @@ INSTANTIATE_TEST_SUITE_P(smoke, reduce_test,
|
|||||||
{
|
{
|
||||||
layout{ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx},
|
layout{ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx},
|
||||||
reduce_mode::max, {1}, false,
|
reduce_mode::max, {1}, false,
|
||||||
layout{ov::PartialShape{1}, data_types::f32, format::bfyx}
|
layout{ov::PartialShape{1, 1, 1}, data_types::f32, format::bfyx}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
layout{ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx},
|
layout{ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx},
|
||||||
|
@ -146,6 +146,14 @@ INSTANTIATE_TEST_SUITE_P(smoke, strided_slice_test_four_inputs,
|
|||||||
{1, 0, 1}, {1, 0, 1}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0},
|
{1, 0, 1}, {1, 0, 1}, {0, 0, 0}, {0, 0, 0}, {0, 0, 0},
|
||||||
layout{ov::PartialShape{1, 1, 1024}, data_types::i64, format::bfyx}
|
layout{ov::PartialShape{1, 1, 1024}, data_types::i64, format::bfyx}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
layout{ov::PartialShape{200, 128}, data_types::i64, format::bfyx},
|
||||||
|
layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {0},
|
||||||
|
layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {15},
|
||||||
|
layout{ov::PartialShape{1}, data_types::i64, format::bfyx}, {1},
|
||||||
|
{0}, {0}, {}, {}, {},
|
||||||
|
layout{ov::PartialShape{15, 128}, data_types::i64, format::bfyx}
|
||||||
|
},
|
||||||
}));
|
}));
|
||||||
|
|
||||||
} // shape_infer_tests
|
} // shape_infer_tests
|
||||||
|
@ -78,15 +78,14 @@ struct non_max_suppression_basic : public testing::Test {
|
|||||||
DataType(0.3f),
|
DataType(0.3f),
|
||||||
};
|
};
|
||||||
|
|
||||||
const layout boxes_layout = layout(type_to_data_type<DataType>::value,
|
const layout boxes_layout = layout(ov::PartialShape{batch_size, boxes_num, 4},
|
||||||
format::bfyx,
|
type_to_data_type<DataType>::value,
|
||||||
tensor(batch(batch_size), feature(boxes_num), spatial(1, 4)));
|
format::bfyx);
|
||||||
const layout scores_layout = layout(type_to_data_type<DataType>::value,
|
const layout scores_layout = layout(ov::PartialShape{batch_size, classes_num, boxes_num},
|
||||||
format::bfyx,
|
type_to_data_type<DataType>::value,
|
||||||
tensor(batch(batch_size), feature(classes_num), spatial(1, boxes_num)));
|
format::bfyx);
|
||||||
const layout selected_scores_layout =
|
const layout selected_scores_layout = layout(ov::PartialShape{selected_indices_num, 3}, data_type, layout_format);
|
||||||
layout(data_type, layout_format, tensor(batch(selected_indices_num), feature(3)));
|
const layout valid_outputs_layout = layout(ov::PartialShape{1}, cldnn::data_types::i32, layout_format);
|
||||||
const layout valid_outputs_layout = layout(cldnn::data_types::i32, layout_format, tensor(batch(1)));
|
|
||||||
|
|
||||||
memory::ptr get_boxes_memory(engine& engine) {
|
memory::ptr get_boxes_memory(engine& engine) {
|
||||||
auto mem = engine.allocate_memory(boxes_layout);
|
auto mem = engine.allocate_memory(boxes_layout);
|
||||||
|
Loading…
Reference in New Issue
Block a user