[GPU] Added shape canonicalization mechanism (#16166)
This commit is contained in:
parent
4936d4bb1d
commit
5113a5538c
@ -43,11 +43,13 @@ struct fully_connected : public primitive_base<fully_connected> {
|
||||
const primitive_id& weights,
|
||||
const primitive_id& bias = "",
|
||||
const padding& output_padding = padding(),
|
||||
const size_t input_size = 2)
|
||||
const size_t input_size = 2,
|
||||
const size_t weights_rank = 2)
|
||||
: primitive_base(id, {input}, {output_padding}),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
input_size(input_size)
|
||||
input_size(input_size),
|
||||
weights_rank(weights_rank)
|
||||
{}
|
||||
|
||||
/// @brief Constructs fully connected layer.
|
||||
@ -61,11 +63,13 @@ struct fully_connected : public primitive_base<fully_connected> {
|
||||
const primitive_id& bias,
|
||||
const data_types data_type,
|
||||
const padding& output_padding = padding(),
|
||||
const size_t input_size = 2)
|
||||
const size_t input_size = 2,
|
||||
const size_t weights_rank = 2)
|
||||
: primitive_base(id, { input }, {output_padding}, {optional_data_type{data_type}}),
|
||||
weights(weights),
|
||||
bias(bias),
|
||||
input_size(input_size)
|
||||
input_size(input_size),
|
||||
weights_rank(weights_rank)
|
||||
{}
|
||||
|
||||
/// @brief Primitive id containing weights data.
|
||||
@ -74,10 +78,13 @@ struct fully_connected : public primitive_base<fully_connected> {
|
||||
primitive_id bias;
|
||||
/// @brief Primitive dimension size.
|
||||
size_t input_size;
|
||||
/// @brief Primitive weights rank.
|
||||
size_t weights_rank;
|
||||
|
||||
size_t hash() const override {
|
||||
size_t seed = primitive::hash();
|
||||
seed = hash_combine(seed, input_size);
|
||||
seed = hash_combine(seed, weights_rank);
|
||||
seed = hash_combine(seed, bias.empty());
|
||||
return seed;
|
||||
}
|
||||
@ -89,6 +96,7 @@ struct fully_connected : public primitive_base<fully_connected> {
|
||||
auto rhs_casted = downcast<const fully_connected>(rhs);
|
||||
|
||||
return input_size == rhs_casted.input_size &&
|
||||
weights_rank == rhs_casted.weights_rank &&
|
||||
bias.empty() == rhs_casted.bias.empty();
|
||||
}
|
||||
|
||||
|
@ -521,7 +521,7 @@ struct layout {
|
||||
// for smaller buffer which, currently, should always be performed
|
||||
bool identical(const layout& other) const;
|
||||
|
||||
ov::PartialShape transform(cldnn::format new_fmt) const;
|
||||
static ov::PartialShape transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt);
|
||||
|
||||
size_t hash() const {
|
||||
size_t seed = 0;
|
||||
|
@ -98,88 +98,6 @@ std::vector<layout> broadcast_inst::calc_output_layouts(broadcast_node const& /*
|
||||
|
||||
template std::vector<layout> broadcast_inst::calc_output_layouts<ov::PartialShape>(broadcast_node const& node, const kernel_impl_params& impl_param);
|
||||
|
||||
std::vector<size_t> broadcast_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||
ov::PartialShape ps;
|
||||
|
||||
auto orig_input_layout = orig_impl_param.get_input_layout();
|
||||
auto updated_param = orig_impl_param;
|
||||
const auto& primitive = updated_param.typed_desc<broadcast>();
|
||||
|
||||
// Extend input dimensions with ones
|
||||
auto i_layout = updated_param.input_layouts[0];
|
||||
auto o_layout = updated_param.output_layouts[0];
|
||||
|
||||
auto input_shape = i_layout.get_shape();
|
||||
auto output_shape = o_layout.get_shape();
|
||||
|
||||
if (primitive->axes_mapping.empty()) {
|
||||
auto broadcastable = [&](layout a, layout b) {
|
||||
auto dims_a = a.get_dims();
|
||||
auto dims_b = b.get_dims();
|
||||
size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size();
|
||||
|
||||
for (size_t i = 0; i < min_size; i++) {
|
||||
if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto input_rank = input_shape.size();
|
||||
auto output_rank = output_shape.size();
|
||||
|
||||
if (!broadcastable(i_layout, o_layout)) {
|
||||
input_shape.insert(input_shape.begin(), output_rank - input_rank, 1ul);
|
||||
}
|
||||
} else {
|
||||
// If axis_mapping is specified, then ones are inserted according to it.
|
||||
ov::Shape tmp_shape;
|
||||
int prev_axis = -1;
|
||||
int next_axis = -1;
|
||||
size_t currentRank = 0;
|
||||
int axe_idx = 0;
|
||||
for (auto& axis : primitive->axes_mapping) {
|
||||
prev_axis = next_axis;
|
||||
next_axis = static_cast<int>(axis);
|
||||
|
||||
int ones_count = std::max(next_axis - prev_axis - 1, 0);
|
||||
tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul);
|
||||
tmp_shape.push_back(input_shape[axe_idx]); // Consider the Broadcast kernel 'broadcast' input to output shape
|
||||
|
||||
currentRank += ones_count + 1;
|
||||
axe_idx += 1;
|
||||
}
|
||||
|
||||
// insert 1 to match with output shape
|
||||
if (o_layout.get_rank() > tmp_shape.size()) {
|
||||
tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul);
|
||||
}
|
||||
input_shape = tmp_shape;
|
||||
}
|
||||
|
||||
ps = ov::PartialShape(input_shape);
|
||||
|
||||
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
std::vector<size_t> broadcast_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
|
||||
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
std::string broadcast_inst::to_string(broadcast_node const& node) {
|
||||
auto desc = node.get_primitive();
|
||||
auto node_info = node.desc_to_json();
|
||||
|
@ -115,7 +115,6 @@ std::vector<layout> eltwise_inst::calc_output_layouts(eltwise_node const& /*node
|
||||
auto out_data_type = desc->output_data_types[0].value_or(input_layout.data_type);
|
||||
|
||||
auto get_output_layout = [&]() {
|
||||
auto out_pshape = input_layout.get<ShapeType>();
|
||||
cldnn::format out_format = input_layout.format;
|
||||
|
||||
// We create dummy Add op as shape infer is exactly the same for any eltwise op type, so there is no need to have correct op type
|
||||
@ -373,13 +372,24 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
|
||||
"");
|
||||
}
|
||||
} else {
|
||||
std::vector<int32_t> input0_size = node.input().get_output_layout().get_tensor().raw.vector();
|
||||
for (size_t i = 1; i < inputs_count; i++) {
|
||||
std::vector<int32_t> input_size = node.input(i).get_output_layout().get_tensor().raw.vector();
|
||||
for (size_t d = 0; d < input0_size.size(); d++) {
|
||||
bool sizes_equal = input0_size[d] == input_size[d];
|
||||
bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
|
||||
auto input0_pshape = node.input().get_output_layout().get_partial_shape();
|
||||
|
||||
for (size_t i = 1; i < inputs_count; ++i) {
|
||||
auto input_pshape = node.input(i).get_output_layout().get_partial_shape();
|
||||
|
||||
if (input0_pshape.size() > input_pshape.size()) {
|
||||
if (use_new_shape_infer) {
|
||||
input_pshape.insert(input_pshape.begin(), input0_pshape.size() - input_pshape.size(), 1);
|
||||
} else {
|
||||
input_pshape.insert(input_pshape.end(), input0_pshape.size() - input_pshape.size(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t d = 0; d < input0_pshape.size(); ++d) {
|
||||
bool sizes_equal = input0_pshape[d] == input_pshape[d];
|
||||
bool broadcast =
|
||||
(input0_size[d] == 1 || input_size[d] == 1) && (input0_size[d] != 1 || input_size[d] != 1);
|
||||
(input0_pshape[d] == 1 || input_pshape[d] == 1) && (input0_pshape[d] != 1 || input_pshape[d] != 1);
|
||||
CLDNN_ERROR_BOOL(node.id(),
|
||||
"Sizes equal or broadcast is possible",
|
||||
!(sizes_equal || broadcast),
|
||||
|
@ -114,31 +114,8 @@ std::vector<layout> gemm_inst::calc_output_layouts(gemm_node const& /*node*/, co
|
||||
|
||||
template std::vector<layout> gemm_inst::calc_output_layouts<ov::PartialShape>(gemm_node const& node, const kernel_impl_params& impl_param);
|
||||
|
||||
std::vector<size_t> gemm_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
|
||||
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
std::vector<size_t> gemm_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
|
||||
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
std::vector<layout> gemm_inst::transform_input_layouts(const std::shared_ptr<const gemm> primitive,
|
||||
const std::vector<layout>& input_layouts,
|
||||
const layout& output_layout) {
|
||||
const std::vector<layout>& input_layouts) {
|
||||
auto get_updated_input_shape = [&](const ov::PartialShape& input_pshape, size_t input_rank, size_t output_rank, bool transpose, bool first_input) {
|
||||
ov::PartialShape updated_input_pshape;
|
||||
|
||||
|
@ -46,34 +46,25 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
}
|
||||
}
|
||||
|
||||
// Extend input dimensions with ones
|
||||
auto i_layout = impl_param.input_layouts[0];
|
||||
auto o_layout = impl_param.output_layouts[0];
|
||||
if (i_layout.is_static() && o_layout.is_static()) {
|
||||
auto data_shape = i_layout.get_shape();
|
||||
auto output_shape = o_layout.get_shape();
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
if (primitive->axes_mapping.empty()) {
|
||||
auto broadcastable = [&](layout a, layout b) {
|
||||
auto dims_a = a.get_dims();
|
||||
auto dims_b = b.get_dims();
|
||||
size_t min_size = (dims_a.size() < dims_b.size()) ? dims_a.size(): dims_b.size();
|
||||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
|
||||
auto updated_impl_params = canonicalize_fused_shapes(impl_params);
|
||||
const auto& primitive = impl_params.typed_desc<broadcast>();
|
||||
|
||||
for (size_t i = 0; i < min_size; i++) {
|
||||
if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
auto i_layout = impl_params.input_layouts[0];
|
||||
auto o_layout = impl_params.output_layouts[0];
|
||||
|
||||
auto input_rank = data_shape.size();
|
||||
auto output_rank = output_shape.size();
|
||||
auto input_pshape = i_layout.get_partial_shape();
|
||||
auto output_pshape = o_layout.get_partial_shape();
|
||||
|
||||
if (!broadcastable(i_layout, o_layout)) {
|
||||
data_shape.insert(data_shape.begin(), output_rank - input_rank, 1ul);
|
||||
}
|
||||
} else {
|
||||
auto output_rank = output_pshape.size();
|
||||
|
||||
if (primitive->axes_mapping.empty()) {
|
||||
input_pshape = extend_shape_to_rank_from_begin(input_pshape, output_rank);
|
||||
} else {
|
||||
if (i_layout.is_static() && o_layout.is_static()) {
|
||||
// If axis_mapping is specified, then ones are inserted according to it.
|
||||
ov::Shape tmp_shape;
|
||||
int prev_axis = -1;
|
||||
@ -86,37 +77,14 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
|
||||
int ones_count = std::max(next_axis - prev_axis - 1, 0);
|
||||
tmp_shape.insert(tmp_shape.begin() + currentRank, ones_count, 1ul);
|
||||
tmp_shape.push_back(data_shape[axe_idx]); // Consider the Broadcast kernel 'broadcast' input to output shape
|
||||
tmp_shape.push_back(input_pshape[axe_idx].get_length()); // Consider the Broadcast kernel 'broadcast' input to output shape
|
||||
|
||||
currentRank += ones_count + 1;
|
||||
axe_idx += 1;
|
||||
}
|
||||
|
||||
if (o_layout.get_rank() > tmp_shape.size()) {
|
||||
tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul);
|
||||
}
|
||||
data_shape = tmp_shape;
|
||||
}
|
||||
|
||||
layout new_layout = i_layout;
|
||||
new_layout.format = format::adjust_to_rank(i_layout.format, data_shape.size());
|
||||
new_layout.set_partial_shape(data_shape);
|
||||
params.inputs[0] = convert_data_tensor(new_layout);
|
||||
} else {
|
||||
// dynamic input
|
||||
if (primitive->axes_mapping.empty()) {
|
||||
ov::PartialShape i_shape = i_layout.get_partial_shape();
|
||||
ov::PartialShape o_shape = o_layout.get_partial_shape();
|
||||
|
||||
auto i_rank = i_shape.size();
|
||||
auto o_rank = o_shape.size();
|
||||
i_shape.insert(i_shape.begin(), o_rank - i_rank, 1ul);
|
||||
|
||||
layout new_layout = i_layout;
|
||||
new_layout.format = format::adjust_to_rank(i_layout.format, i_shape.size());
|
||||
new_layout.set_partial_shape(i_shape);
|
||||
params.inputs[0] = convert_data_tensor(new_layout);
|
||||
input_pshape = extend_shape_to_rank_from_end(tmp_shape, output_rank);
|
||||
} else {
|
||||
// dynamic input
|
||||
// insert 1 to extend dimensions by axes_mapping
|
||||
ov::Shape tmp_shape;
|
||||
size_t idx = 0;
|
||||
@ -131,20 +99,20 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// insert 1 to match with output shape
|
||||
if (o_layout.get_rank() > tmp_shape.size()) {
|
||||
tmp_shape.insert(tmp_shape.end(), o_layout.get_rank() - tmp_shape.size(), 1ul);
|
||||
}
|
||||
|
||||
layout new_layout = i_layout;
|
||||
new_layout.format = format::adjust_to_rank(i_layout.format, tmp_shape.size());
|
||||
new_layout.set_partial_shape(tmp_shape);
|
||||
params.inputs[0] = convert_data_tensor(new_layout);
|
||||
input_pshape = extend_shape_to_rank_from_end(tmp_shape, output_rank);
|
||||
}
|
||||
}
|
||||
|
||||
return {params, optional_params};
|
||||
updated_impl_params.input_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(input_pshape));
|
||||
updated_impl_params.input_layouts[0].format = format::adjust_to_rank(i_layout.format, input_pshape.size());
|
||||
|
||||
updated_impl_params.output_layouts[0].set_partial_shape(extend_shape_to_rank_from_end(output_pshape));
|
||||
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
|
||||
return static_canonicalize_shapes(impl_params);
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
@ -157,7 +125,7 @@ struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
namespace detail {
|
||||
|
||||
attach_broadcast_impl::attach_broadcast_impl() {
|
||||
auto dyn_types = {
|
||||
auto types = {
|
||||
data_types::f32,
|
||||
data_types::f16,
|
||||
data_types::i8,
|
||||
@ -175,115 +143,34 @@ attach_broadcast_impl::attach_broadcast_impl() {
|
||||
implementation_map<broadcast>::add(impl_types::ocl,
|
||||
shape_types::dynamic_shape,
|
||||
typed_primitive_impl_ocl<broadcast>::create<broadcast_impl>,
|
||||
dyn_types,
|
||||
types,
|
||||
dyn_formats);
|
||||
|
||||
implementation_map<broadcast>::add(impl_types::ocl, shape_types::static_shape, typed_primitive_impl_ocl<broadcast>::create<broadcast_impl>, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
auto static_formats = {
|
||||
format::bfyx,
|
||||
format::b_fs_yx_fsv4,
|
||||
format::b_fs_yx_fsv16,
|
||||
format::b_fs_yx_fsv32,
|
||||
format::bs_fs_yx_bsv4_fsv2,
|
||||
format::bs_fs_yx_bsv4_fsv4,
|
||||
format::bs_fs_yx_bsv8_fsv2,
|
||||
format::bs_fs_yx_bsv8_fsv4,
|
||||
format::bs_fs_yx_bsv16_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
format::bfzyx,
|
||||
format::b_fs_zyx_fsv16,
|
||||
format::b_fs_zyx_fsv32,
|
||||
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i64, format::bfwzyx),
|
||||
format::bfwzyx
|
||||
};
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i64, format::b_fs_yx_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i64, format::b_fs_yx_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i64, format::b_fs_yx_fsv32),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv32),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv2),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv2),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv4_fsv4),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv4_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv2),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv2),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv8_fsv4),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv8_fsv4),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv16_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv32_fsv16),
|
||||
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_yx_bsv32_fsv32),
|
||||
});
|
||||
implementation_map<broadcast>::add(impl_types::ocl,
|
||||
shape_types::static_shape,
|
||||
typed_primitive_impl_ocl<broadcast>::create<broadcast_impl>,
|
||||
types,
|
||||
static_formats);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
@ -119,6 +119,46 @@ public:
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
|
||||
auto updated_impl_params = canonicalize_fused_shapes(impl_params);
|
||||
bool use_new_shape_infer = impl_params.prog->get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
|
||||
|
||||
auto broadcastable = [use_new_shape_infer](const ov::PartialShape& first_pshape, const ov::PartialShape& second_pshape) {
|
||||
if (first_pshape.is_dynamic() || second_pshape.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
if (first_pshape.size() != second_pshape.size() && use_new_shape_infer) {
|
||||
return false;
|
||||
}
|
||||
size_t min_size = std::min(first_pshape.size(), second_pshape.size());
|
||||
|
||||
for (size_t i = 0; i < min_size; ++i) {
|
||||
if (!(first_pshape[i] == 1 || second_pshape[i] == 1 || first_pshape[i] == second_pshape[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto& output_layout = updated_impl_params.output_layouts[0];
|
||||
auto out_pshape = output_layout.get_partial_shape();
|
||||
output_layout.set_partial_shape(extend_shape_to_rank_from_end(out_pshape));
|
||||
|
||||
for (auto& input_layout : updated_impl_params.input_layouts) {
|
||||
auto input_pshape = input_layout.get_partial_shape();
|
||||
if (!broadcastable(input_pshape, out_pshape)) {
|
||||
input_pshape = extend_shape_to_rank_from_begin(input_pshape, out_pshape.size());
|
||||
}
|
||||
input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_pshape));
|
||||
}
|
||||
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
|
||||
return static_canonicalize_shapes(impl_params);
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
|
@ -38,12 +38,11 @@ public:
|
||||
const auto& primitive = impl_param.typed_desc<fully_connected>();
|
||||
|
||||
auto get_fc_input_layouts = [primitive](const std::vector<layout>& input_layouts) {
|
||||
auto reshape_to_2d = [](const ov::PartialShape& shape, const ov::Dimension& feature) {
|
||||
auto reshape_to_2d = [](const ov::PartialShape& shape, const ov::Dimension& feature, size_t rank) {
|
||||
if (shape.is_static()) {
|
||||
auto static_shape = shape.to_shape();
|
||||
size_t total =
|
||||
std::accumulate(static_shape.begin(), static_shape.end(), size_t(1), std::multiplies<size_t>());
|
||||
auto dim = feature.is_static() ? feature.get_length() : static_cast<int64_t>(static_shape.back());
|
||||
size_t total = std::accumulate(static_shape.begin(), static_shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
|
||||
auto dim = feature.is_static() ? feature.get_length() : static_cast<int64_t>(static_shape[rank - 1]);
|
||||
return ov::PartialShape{ static_cast<int64_t>(total) / dim, dim };
|
||||
} else {
|
||||
return ov::PartialShape{ ov::Dimension::dynamic(), feature };
|
||||
@ -59,10 +58,10 @@ public:
|
||||
ov::Dimension feature = input0_pshape[std::min(primitive->input_size, static_cast<size_t>(4)) - 1ul];
|
||||
|
||||
if (primitive->input_size > 3) {
|
||||
input0_layout.set_partial_shape(reshape_to_2d(input0_pshape, feature));
|
||||
input0_layout.set_partial_shape(reshape_to_2d(input0_pshape, feature, primitive->input_size));
|
||||
}
|
||||
if (input1_pshape.size() != 2) {
|
||||
input1_layout.set_partial_shape(reshape_to_2d(input1_pshape, feature));
|
||||
input1_layout.set_partial_shape(reshape_to_2d(input1_pshape, feature, primitive->weights_rank));
|
||||
}
|
||||
|
||||
std::vector<layout> layouts{input0_layout, input1_layout};
|
||||
|
@ -77,11 +77,13 @@ public:
|
||||
params.axis = convert_axis(primitive->axis, input_layout.get_rank());
|
||||
params.batch_dim = size_t(primitive->batch_dim);
|
||||
params.support_neg_ind = primitive->support_neg_ind;
|
||||
|
||||
auto output_layout = impl_param.get_output_layout(0);
|
||||
auto in_rank = impl_param.get_input_layout(0).get_rank();
|
||||
auto out_rank = impl_param.get_output_layout(0).get_rank();
|
||||
auto in_rank = input_layout.get_partial_shape().size();
|
||||
auto out_rank = output_layout.get_partial_shape().size();
|
||||
|
||||
if (in_rank > 4 && in_rank > out_rank) { // if in_rank <= 4, the dims are to be adjusted to 4 by convert_data_tensor
|
||||
auto output_shape = impl_param.get_output_layout(0).get_partial_shape();
|
||||
auto output_shape = output_layout.get_partial_shape();
|
||||
ov::PartialShape new_output_shape({output_shape[0], output_shape[1]});
|
||||
for (size_t i = 0; i < in_rank - out_rank; ++i)
|
||||
new_output_shape.push_back(1);
|
||||
@ -89,8 +91,9 @@ public:
|
||||
for (size_t i = 2; i < out_rank; ++i) {
|
||||
new_output_shape.push_back(output_shape[i]);
|
||||
}
|
||||
output_layout = layout(new_output_shape, impl_param.get_output_layout(0).data_type, format::get_default_format(new_output_shape.size()));
|
||||
output_layout = layout(new_output_shape, output_layout.data_type, format::get_default_format(new_output_shape.size()));
|
||||
}
|
||||
|
||||
params.outputs[0] = convert_data_tensor(output_layout);
|
||||
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(1)));
|
||||
return {params, optional_params};
|
||||
|
@ -26,17 +26,13 @@ struct gemm_impl : typed_primitive_impl_ocl<gemm> {
|
||||
public:
|
||||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param, bool is_shape_agnostic = false) {
|
||||
const auto& primitive = impl_param.typed_desc<gemm>();
|
||||
const auto input_layouts = gemm_inst::transform_input_layouts(primitive, impl_param.input_layouts, impl_param.output_layouts[0]);
|
||||
const auto output_layout = gemm_inst::transform_output_layout(primitive, input_layouts, impl_param.output_layouts[0]);
|
||||
|
||||
auto params = get_default_params<kernel_selector::gemm_params>(impl_param, is_shape_agnostic);
|
||||
auto optional_params = get_default_optional_params<kernel_selector::gemm_optional_params>(impl_param.get_program());
|
||||
|
||||
params.inputs.clear();
|
||||
for (size_t i = 0; i < primitive->input_size(); ++i) {
|
||||
params.inputs.push_back(convert_data_tensor(input_layouts[i]));
|
||||
for (size_t i = 1; i < primitive->input_size(); ++i) {
|
||||
params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
|
||||
}
|
||||
params.outputs[0] = convert_data_tensor(output_layout);
|
||||
|
||||
params.alpha = primitive->alpha;
|
||||
params.beta = primitive->beta;
|
||||
@ -55,6 +51,27 @@ public:
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
|
||||
const auto& primitive = impl_params.typed_desc<gemm>();
|
||||
auto updated_impl_params = canonicalize_fused_shapes(impl_params);
|
||||
|
||||
updated_impl_params.input_layouts = gemm_inst::transform_input_layouts(primitive, impl_params.input_layouts);
|
||||
updated_impl_params.output_layouts[0] = gemm_inst::transform_output_layout(primitive, updated_impl_params.input_layouts, impl_params.output_layouts[0]);
|
||||
|
||||
for (auto& input_layout : updated_impl_params.input_layouts) {
|
||||
input_layout.set_partial_shape(extend_shape_to_rank_from_begin(input_layout.get_partial_shape()));
|
||||
}
|
||||
|
||||
auto& output_layout = updated_impl_params.output_layouts[0];
|
||||
output_layout.set_partial_shape(extend_shape_to_rank_from_begin(output_layout.get_partial_shape()));
|
||||
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
|
||||
return static_canonicalize_shapes(impl_params);
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
|
||||
#include "intel_gpu/graph/kernel_impl_params.hpp"
|
||||
#include "intel_gpu/graph/fused_primitive_desc.hpp"
|
||||
#include "intel_gpu/graph/program.hpp"
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
#include "intel_gpu/runtime/utils.hpp"
|
||||
#include "intel_gpu/runtime/tensor.hpp"
|
||||
@ -291,4 +292,57 @@ switch (mode) {
|
||||
}
|
||||
}
|
||||
|
||||
inline ov::PartialShape extend_shape_to_rank_from_end(ov::PartialShape pshape, size_t rank = 4) {
|
||||
if (pshape.size() >= rank) {
|
||||
return pshape;
|
||||
}
|
||||
pshape.insert(pshape.end(), rank - pshape.size(), ov::Dimension(1));
|
||||
return pshape;
|
||||
}
|
||||
|
||||
inline ov::PartialShape extend_shape_to_rank_from_begin(ov::PartialShape pshape, size_t rank = 4) {
|
||||
if (pshape.size() >= rank) {
|
||||
return pshape;
|
||||
}
|
||||
ov::PartialShape extended_pshape(std::vector<int64_t>(rank - pshape.size(), 1));
|
||||
extended_pshape.insert(extended_pshape.end(), pshape.begin(), pshape.end());
|
||||
return extended_pshape;
|
||||
}
|
||||
|
||||
inline kernel_impl_params canonicalize_fused_shapes(const kernel_impl_params& impl_params) {
|
||||
auto updated_impl_params = impl_params;
|
||||
bool use_new_shape_infer = impl_params.prog->get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
|
||||
|
||||
auto broadcastable = [use_new_shape_infer](const ov::PartialShape& first_pshape, const ov::PartialShape& second_pshape) {
|
||||
if (first_pshape.is_dynamic() || second_pshape.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
if (first_pshape.size() != second_pshape.size() && use_new_shape_infer) {
|
||||
return false;
|
||||
}
|
||||
size_t min_size = std::min(first_pshape.size(), second_pshape.size());
|
||||
|
||||
for (size_t i = 0; i < min_size; ++i) {
|
||||
if (!(first_pshape[i] == 1 || second_pshape[i] == 1 || first_pshape[i] == second_pshape[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
for (auto& fd : updated_impl_params.fused_desc) {
|
||||
if (fd.is_type<eltwise>() && fd.total_num_deps == 2) {
|
||||
auto out_pshape = updated_impl_params.output_layouts[0].get_partial_shape();
|
||||
|
||||
auto& dep_layout = updated_impl_params.input_layouts[fd.dep_start_idx];
|
||||
auto dep_shape = dep_layout.get_partial_shape();
|
||||
|
||||
if (!broadcastable(dep_shape, out_pshape)) {
|
||||
dep_layout.set_partial_shape(extend_shape_to_rank_from_begin(dep_shape, out_pshape.size()));
|
||||
}
|
||||
}
|
||||
}
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
} // namespace cldnn
|
||||
|
@ -89,7 +89,7 @@ struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
if (arg.can_be_optimized()) {
|
||||
return make_unique<ImplType>(kernel_selector::kernel_data{});
|
||||
}
|
||||
auto kernel_params = ImplType::get_kernel_params(impl_param);
|
||||
auto kernel_params = ImplType::get_kernel_params(ImplType::static_canonicalize_shapes(impl_param));
|
||||
kernel_params.first.is_shape_agnostic = impl_param.is_dynamic();
|
||||
auto& kernel_selector = ImplType::kernel_selector_t::Instance();
|
||||
auto best_kernel = kernel_selector.get_best_kernel(kernel_params.first, kernel_params.second);
|
||||
|
@ -28,37 +28,6 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
||||
auto optional_params = get_default_optional_params<kernel_selector::select_optional_params>(impl_param.get_program());
|
||||
|
||||
std::vector<layout> input_layouts = impl_param.input_layouts;
|
||||
auto o_layout = impl_param.get_output_layout();
|
||||
|
||||
auto broadcastable = [&](const layout& a, const layout& b) {
|
||||
if (a.is_dynamic() || b.is_dynamic()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto dims_a = a.get_partial_shape();
|
||||
auto dims_b = b.get_partial_shape();
|
||||
|
||||
size_t min_size = std::min(dims_a.size(), dims_b.size());
|
||||
|
||||
for (size_t i = 0; i < min_size; ++i) {
|
||||
if (!(dims_a[i] == 1 || dims_b[i] == 1 || dims_a[i] == dims_b[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
for (auto& l : input_layouts) {
|
||||
auto pshape = l.get_partial_shape();
|
||||
auto rank = pshape.size();
|
||||
|
||||
if (rank < 4 && !broadcastable(o_layout, l)) {
|
||||
pshape.insert(pshape.begin(), 4 - rank, 1);
|
||||
layout new_layout = l;
|
||||
new_layout.set_partial_shape(pshape);
|
||||
l = new_layout;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < input_layouts.size(); ++i) {
|
||||
params.inputs.push_back(convert_data_tensor(input_layouts[i]));
|
||||
@ -66,6 +35,23 @@ struct select_impl : typed_primitive_impl_ocl<select> {
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
|
||||
auto updated_impl_params = canonicalize_fused_shapes(impl_params);
|
||||
|
||||
for (auto& input_layout : updated_impl_params.input_layouts) {
|
||||
input_layout.set_partial_shape(extend_shape_to_rank_from_begin(input_layout.get_partial_shape()));
|
||||
}
|
||||
|
||||
auto& output_layout = updated_impl_params.output_layouts[0];
|
||||
output_layout.set_partial_shape(extend_shape_to_rank_from_begin(output_layout.get_partial_shape()));
|
||||
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
|
||||
return static_canonicalize_shapes(impl_params);
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
|
@ -34,6 +34,22 @@ struct shape_of_impl : typed_primitive_impl_ocl<shape_of> {
|
||||
return {params, optional_params};
|
||||
}
|
||||
|
||||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) {
|
||||
auto updated_impl_params = canonicalize_fused_shapes(impl_params);
|
||||
|
||||
auto& input_layout = updated_impl_params.input_layouts[0];
|
||||
input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape(), 6));
|
||||
|
||||
auto& output_layout = updated_impl_params.output_layouts[0];
|
||||
output_layout.set_partial_shape(extend_shape_to_rank_from_end(output_layout.get_partial_shape(), 6));
|
||||
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override {
|
||||
return static_canonicalize_shapes(impl_params);
|
||||
}
|
||||
|
||||
void update_dispatch_data(const kernel_impl_params& impl_param) override {
|
||||
auto kernel_params = get_kernel_params(impl_param, true);
|
||||
(_kernel_data.update_dispatch_data_func)(kernel_params.first, _kernel_data);
|
||||
|
@ -75,7 +75,7 @@ protected:
|
||||
in_layouts.emplace_back(impl_params.get_input_layout(2));
|
||||
}
|
||||
|
||||
in_layouts = gemm_inst::transform_input_layouts(prim, in_layouts, out_l);
|
||||
in_layouts = gemm_inst::transform_input_layouts(prim, in_layouts);
|
||||
out_l = gemm_inst::transform_output_layout(prim, in_layouts, out_l);
|
||||
|
||||
const auto& in0_l = in_layouts[0];
|
||||
|
@ -37,8 +37,6 @@ public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(broadcast_node const& /*node*/, const kernel_impl_params& impl_param);
|
||||
static layout calc_output_layout(broadcast_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
|
||||
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
|
||||
static std::string to_string(broadcast_node const& node);
|
||||
typed_primitive_inst(network& network, broadcast_node const& node);
|
||||
};
|
||||
|
@ -32,13 +32,10 @@ public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(gemm_node const& /*node*/, const kernel_impl_params& impl_param);
|
||||
static layout calc_output_layout(gemm_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
|
||||
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
|
||||
static std::string to_string(gemm_node const& node);
|
||||
|
||||
static std::vector<layout> transform_input_layouts(const std::shared_ptr<const gemm> primitive,
|
||||
const std::vector<layout>& input_layouts,
|
||||
const layout& output_layout);
|
||||
const std::vector<layout>& input_layouts);
|
||||
static layout transform_output_layout(const std::shared_ptr<const gemm> primitive, const std::vector<layout>& input_layouts, const layout& output_layout);
|
||||
|
||||
typed_primitive_inst(network& network, gemm_node const& node);
|
||||
|
@ -82,6 +82,12 @@ struct primitive_impl {
|
||||
OPENVINO_ASSERT(false, "[GPU] update_dispatch_data is not implemented for dynamic implemenation ", _kernel_name);
|
||||
}
|
||||
|
||||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params);
|
||||
|
||||
virtual kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const {
|
||||
return primitive_impl::static_canonicalize_shapes(impl_params);
|
||||
}
|
||||
|
||||
virtual void set_kernels(std::map<const std::string, kernel::ptr>& kernels) {}
|
||||
|
||||
protected:
|
||||
@ -424,26 +430,6 @@ public:
|
||||
return std::move(orig_impl_param);
|
||||
}
|
||||
|
||||
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
|
||||
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
|
||||
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.end(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
typed_primitive_inst_base(network& network, typed_node const& node)
|
||||
: typed_primitive_inst_base(network, node, do_allocate_memory(node)) {}
|
||||
|
||||
|
@ -44,8 +44,6 @@ struct primitive_type {
|
||||
virtual layout calc_output_layout(const program_node& node, const kernel_impl_params& params) const = 0;
|
||||
virtual std::vector<layout> calc_output_layouts(const program_node& node, const kernel_impl_params& impl_param) const = 0;
|
||||
virtual kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const = 0;
|
||||
virtual std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) const = 0;
|
||||
virtual std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) const = 0;
|
||||
virtual std::string to_string(const program_node& node) const = 0;
|
||||
};
|
||||
} // namespace cldnn
|
||||
|
@ -107,15 +107,11 @@ struct primitive_type_base : primitive_type {
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
kernel_impl_params get_fake_aligned_params(kernel_impl_params const& orig_impl_param) const override {
|
||||
return typed_primitive_inst<PType>::get_fake_aligned_params(orig_impl_param);
|
||||
}
|
||||
std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) const override {
|
||||
return typed_primitive_inst<PType>::extend_input_shape_to_6d(orig_impl_param, input_idx);
|
||||
}
|
||||
std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) const override {
|
||||
return typed_primitive_inst<PType>::extend_output_shape_to_6d(orig_impl_param, output_idx);
|
||||
}
|
||||
|
||||
std::string to_string(const cldnn::program_node& node) const override {
|
||||
OPENVINO_ASSERT(node.type() == this, "[GPU] primitive_type_base::to_string: primitive type mismatch");
|
||||
return typed_primitive_inst<PType>::to_string(node);
|
||||
|
@ -32,9 +32,6 @@ public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(const select_node& /*node*/, const kernel_impl_params& impl_param);
|
||||
static layout calc_output_layout(select_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::vector<size_t> extend_shape_to_6d(ov::PartialShape ps);
|
||||
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
|
||||
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
|
||||
static std::string to_string(select_node const& node);
|
||||
typed_primitive_inst(network& network, select_node const& node);
|
||||
};
|
||||
|
@ -35,8 +35,6 @@ public:
|
||||
template<typename ShapeType>
|
||||
static std::vector<layout> calc_output_layouts(shape_of_node const& /*node*/, const kernel_impl_params& impl_param);
|
||||
static layout calc_output_layout(shape_of_node const& node, kernel_impl_params const& impl_param);
|
||||
static std::vector<size_t> extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx);
|
||||
static std::vector<size_t> extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx);
|
||||
static std::string to_string(shape_of_node const& node);
|
||||
|
||||
typed_primitive_inst(network& network, shape_of_node const& node);
|
||||
|
@ -88,6 +88,21 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
kernel_impl_params primitive_impl::static_canonicalize_shapes(const kernel_impl_params& impl_params) {
|
||||
auto updated_impl_params = canonicalize_fused_shapes(impl_params);
|
||||
|
||||
for (auto& input_layout : updated_impl_params.input_layouts) {
|
||||
input_layout.set_partial_shape(extend_shape_to_rank_from_end(input_layout.get_partial_shape()));
|
||||
}
|
||||
|
||||
for (auto& output_layout : updated_impl_params.output_layouts) {
|
||||
output_layout.set_partial_shape(extend_shape_to_rank_from_end(output_layout.get_partial_shape()));
|
||||
}
|
||||
|
||||
return updated_impl_params;
|
||||
}
|
||||
|
||||
uint32_t primitive_inst::get_network_id() const { return _network.get_id(); }
|
||||
|
||||
void primitive_inst::check_memory_to_set(const memory& mem, const layout& layout) const {
|
||||
@ -308,7 +323,11 @@ bool primitive_inst::update_impl() {
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < _node->get_dependencies().size(); i++) {
|
||||
if (_node->get_dependency(i).get_output_layout().is_dynamic()) {
|
||||
auto input_shape = _node->type()->extend_input_shape_to_6d(params, static_cast<uint32_t>(i));
|
||||
auto pshape = params.get_input_layout(i).get_partial_shape();
|
||||
auto input_shape = layout::transform(pshape,
|
||||
format::get_default_format(pshape.size()),
|
||||
format::bfwzyx).to_shape();
|
||||
|
||||
for (size_t j = 0; j < input_shape.size(); j++)
|
||||
lock[offset++] = static_cast<int32_t>(input_shape[j]);
|
||||
}
|
||||
@ -316,7 +335,11 @@ bool primitive_inst::update_impl() {
|
||||
|
||||
for (size_t i = 0; i < _node->get_output_layouts().size(); i++) {
|
||||
if (_node->get_output_layout(i).is_dynamic()) {
|
||||
auto output_shape = _node->type()->extend_output_shape_to_6d(params, static_cast<uint32_t>(i));
|
||||
auto pshape = params.get_output_layout(i).get_partial_shape();
|
||||
auto output_shape = layout::transform(pshape,
|
||||
format::get_default_format(pshape.size()),
|
||||
format::bfwzyx).to_shape();
|
||||
|
||||
for (size_t j = 0; j < output_shape.size(); j++)
|
||||
lock[offset++] = static_cast<int32_t>(output_shape[j]);
|
||||
}
|
||||
@ -365,9 +388,10 @@ bool primitive_inst::update_impl() {
|
||||
cache.add(updated_params, impl->clone());
|
||||
});
|
||||
_impl = _dynamic_impl->clone();
|
||||
_impl->update_dispatch_data(*_impl_params);
|
||||
auto new_impl_params = _impl->canonicalize_shapes(*_impl_params);
|
||||
_impl->update_dispatch_data(new_impl_params);
|
||||
|
||||
update_shape_info(*_impl_params);
|
||||
update_shape_info(new_impl_params);
|
||||
} else {
|
||||
_impl = _node->type()->choose_impl(*_node, updated_params);
|
||||
auto& kernels_cache = get_network().get_program()->get_kernels_cache();
|
||||
|
@ -55,23 +55,6 @@ std::vector<layout> select_inst::calc_output_layouts(const select_node& /*node*/
|
||||
return {{output_shapes[0], dt, format::get_default_format(output_shapes[0].size())}};
|
||||
}
|
||||
|
||||
std::vector<size_t> select_inst::extend_shape_to_6d(ov::PartialShape ps) {
|
||||
if (ps.size() < 4) {
|
||||
ps.insert(ps.begin(), 4 - ps.size(), ov::Dimension(1));
|
||||
}
|
||||
|
||||
layout l(ps, data_types::i32, format::get_default_format(ps.size()));
|
||||
return l.transform(format::bfwzyx).to_shape();
|
||||
}
|
||||
|
||||
std::vector<size_t> select_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||
return extend_shape_to_6d(orig_impl_param.get_input_layout(input_idx).get_partial_shape());
|
||||
}
|
||||
|
||||
std::vector<size_t> select_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
|
||||
return extend_shape_to_6d(orig_impl_param.get_output_layout(output_idx).get_partial_shape());
|
||||
}
|
||||
|
||||
std::string select_inst::to_string(select_node const& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
auto desc = node.get_primitive();
|
||||
|
@ -46,18 +46,6 @@ std::vector<layout> shape_of_inst::calc_output_layouts(shape_of_node const& /*no
|
||||
|
||||
template std::vector<layout> shape_of_inst::calc_output_layouts<ov::PartialShape>(shape_of_node const& node, const kernel_impl_params& impl_param);
|
||||
|
||||
std::vector<size_t> shape_of_inst::extend_input_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t input_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_input_layout(input_idx).get_partial_shape();
|
||||
ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1));
|
||||
return ps.to_shape();
|
||||
}
|
||||
|
||||
std::vector<size_t> shape_of_inst::extend_output_shape_to_6d(kernel_impl_params const& orig_impl_param, int32_t output_idx) {
|
||||
ov::PartialShape ps = orig_impl_param.get_output_layout(output_idx).get_partial_shape();
|
||||
ps.insert(ps.end(), 6 - ps.size(), ov::Dimension(1));
|
||||
return ps.to_shape();
|
||||
}
|
||||
|
||||
std::string shape_of_inst::to_string(shape_of_node const& node) {
|
||||
auto node_info = node.desc_to_json();
|
||||
auto desc = node.get_primitive();
|
||||
|
@ -69,7 +69,7 @@ KernelsData GemmKernelBase::GetCommonKernelsData(const Params& params,
|
||||
(uint32_t)prim_params.inputs.size(),
|
||||
GetFusedPrimitiveInputsCount(params),
|
||||
1,
|
||||
prim_params.outputs[0].is_dynamic());
|
||||
prim_params.has_dynamic_tensors());
|
||||
|
||||
return {k_data};
|
||||
}
|
||||
|
@ -133,7 +133,8 @@ static void CreateMatMulOp(Program& p, const std::shared_ptr<ngraph::op::v0::Mat
|
||||
"",
|
||||
cldnn::element_type_to_data_type(op->get_output_element_type(0)),
|
||||
cldnn::padding(),
|
||||
shape_a.size());
|
||||
rank_a,
|
||||
rank_b);
|
||||
|
||||
p.add_primitive(*op, fcPrim);
|
||||
|
||||
|
@ -506,20 +506,19 @@ bool layout::identical(const layout& other) const {
|
||||
return are_layouts_identical(*this, other).first;
|
||||
}
|
||||
|
||||
ov::PartialShape layout::transform(cldnn::format new_fmt) const {
|
||||
if (format == new_fmt) {
|
||||
return size;
|
||||
ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt) {
|
||||
if (old_fmt == new_fmt) {
|
||||
return pshape;
|
||||
}
|
||||
|
||||
cldnn::tensor::value_type default_size = -1;
|
||||
auto shape = size.to_shape();
|
||||
auto shape = pshape.to_shape();
|
||||
std::vector<tensor::value_type> dims;
|
||||
for (auto dim : shape) {
|
||||
dims.push_back(static_cast<tensor::value_type>(dim));
|
||||
}
|
||||
|
||||
const cldnn::format default_fmt = cldnn::format::bfwzyx;
|
||||
auto old_sizes = convert_dimensions(dims, format.order(), default_fmt.internal_order()); // convert to internal order (bfxyzw)
|
||||
auto old_sizes = convert_dimensions(dims, old_fmt.order(), default_fmt.internal_order()); // convert to internal order (bfxyzw)
|
||||
|
||||
auto val_order = default_fmt.internal_order();
|
||||
auto new_order = new_fmt.internal_order();
|
||||
|
@ -63,6 +63,10 @@ public:
|
||||
return layout{ p.weights_shape, p.weights_type, p.weights_format };
|
||||
}
|
||||
|
||||
size_t get_input_weights_rank(fully_connected_test_params& p) {
|
||||
return p.weights_shape.size();
|
||||
}
|
||||
|
||||
layout get_bias_layout(fully_connected_test_params& p) {
|
||||
auto bias_shape = p.out_shape.size() == 3 ? ov::PartialShape{1, 1, p.out_shape[2]} : ov::PartialShape{1, p.out_shape[1]};
|
||||
return layout{ bias_shape, p.default_type, p.default_format };
|
||||
@ -113,6 +117,10 @@ public:
|
||||
return layout{ p.weights_shape, p.weights_type, p.weights_format };
|
||||
}
|
||||
|
||||
size_t get_input_weights_rank(fully_connected_test_params& p) {
|
||||
return p.weights_shape.size();
|
||||
}
|
||||
|
||||
layout get_bias_layout(fully_connected_test_params& p) {
|
||||
auto bias_shape = p.out_shape.size() == 3 ? ov::PartialShape{1, 1, p.out_shape[2]} : ov::PartialShape{1, p.out_shape[1]};
|
||||
return layout{ bias_shape, p.default_type, p.default_format };
|
||||
@ -159,7 +167,7 @@ TEST_P(fc_fp32_activation, basic) {
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
activation("activation", input_info("fc_prim"), activation_func::abs),
|
||||
reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
|
||||
);
|
||||
@ -185,7 +193,7 @@ TEST_P(fc_fp32_activation_dynamic, basic) {
|
||||
input_layout("input", dynamic_input_layout),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
activation("activation", input_info("fc_prim"), activation_func::abs),
|
||||
reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
|
||||
);
|
||||
@ -212,7 +220,7 @@ TEST_P(fc_fp32_bias, basic) {
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("bias_add", { input_info("fc_prim"), input_info("bias") }, eltwise_mode::sum),
|
||||
reorder("reorder_bfyx", input_info("bias_add"), p.default_format, data_types::f32)
|
||||
);
|
||||
@ -239,7 +247,7 @@ TEST_P(fc_fp32_bias_dynamic, basic) {
|
||||
input_layout("input", dynamic_input_layout),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("bias_add", { input_info("fc_prim"), input_info("bias") }, eltwise_mode::sum),
|
||||
reorder("reorder_bfyx", input_info("bias_add"), p.default_format, data_types::f32)
|
||||
);
|
||||
@ -265,7 +273,7 @@ TEST_P(fc_int8_eltwise, basic) {
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
data("eltwise_data", get_mem(get_per_channel_layout(p), 1, 9)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::sum),
|
||||
reorder("reorder_bfyx", input_info("eltwise"), p.default_format, data_types::f32)
|
||||
);
|
||||
@ -297,7 +305,7 @@ TEST_P(fc_int8_quantize_u8, basic) {
|
||||
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||
data("out_lo", get_mem(get_single_element_layout(p), 0)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 255)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
quantize("quantize", input_info("fc_prim"), input_info("in_lo"), input_info("in_hi"),
|
||||
input_info("out_lo"), input_info("out_hi"), 256, data_types::u8),
|
||||
reorder("reorder_bfyx", input_info("quantize"), p.default_format, data_types::f32)
|
||||
@ -331,7 +339,7 @@ TEST_P(fc_int8_eltwise_quantize_i8, basic) {
|
||||
data("out_lo", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 127)),
|
||||
data("eltwise_data", get_mem(get_per_channel_layout(p), 1.0f / get_weights_layout(p).count() / 255)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::prod),
|
||||
quantize("quantize", input_info("eltwise"), input_info("in_lo"), input_info("in_hi"),
|
||||
input_info("out_lo"), input_info("out_hi"), 255, data_types::i8),
|
||||
@ -366,7 +374,7 @@ TEST_P(fc_int8_eltwise_activation_quantize_i8, basic) {
|
||||
data("out_lo", get_mem(get_single_element_layout(p), -127)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 127)),
|
||||
data("eltwise_data", get_mem(get_per_channel_layout(p), 1.0f / get_weights_layout(p).count() / 255)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("eltwise", { input_info("fc_prim"), input_info("eltwise_data") }, eltwise_mode::prod),
|
||||
activation("activation_eltwise", input_info("eltwise"), activation_func::exp),
|
||||
quantize("quantize", input_info("activation_eltwise"), input_info("in_lo"), input_info("in_hi"),
|
||||
@ -405,7 +413,7 @@ TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
data("shift_data", get_mem(shift_layout, 1)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", cldnn::data_types::f32, padding(), get_output_dim_size(p)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", cldnn::data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("shift", { input_info("fc_prim"), input_info("shift_data") }, eltwise_mode::sum, cldnn::data_types::f32),
|
||||
crop("crop", input_info("shift"), get_output_layout(p).get_tensor(), { 0, 0, 0, 0 }),
|
||||
reorder("reorder_bfyx", input_info("crop"), p.default_format, data_types::f32)
|
||||
|
@ -0,0 +1,238 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include "test_utils.h"
|
||||
#include "program_wrapper.h"
|
||||
|
||||
#include "primitive_inst.h"
|
||||
#include "shape_of_inst.h"
|
||||
#include "select_inst.h"
|
||||
#include "broadcast_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "fully_connected_inst.h"
|
||||
#include "gemm_inst.h"
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace {
|
||||
|
||||
// first - input shape, second - expected input shape after canonicalization, third - expected output shape after canonicalization
|
||||
using Shapes = std::tuple<std::vector<ov::PartialShape>, std::vector<ov::PartialShape>, std::vector<ov::PartialShape>>;
|
||||
|
||||
void canonicalization_test(cldnn::topology topology, std::string prim_name,
|
||||
const std::vector<ov::PartialShape>& expected_input_pshapes,
|
||||
const std::vector<ov::PartialShape>& expected_output_pshapes,
|
||||
bool enable_fusing = false) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
ExecutionConfig config({ov::intel_gpu::optimize_data(true),
|
||||
ov::intel_gpu::allow_new_shape_infer(true)});
|
||||
|
||||
auto prog = program::build_program(engine, topology, config, false, true);
|
||||
if (enable_fusing) {
|
||||
layout_optimizer lo;
|
||||
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
|
||||
}
|
||||
program_wrapper::run_graph_compilation(*prog);
|
||||
|
||||
auto& node = prog->get_node(prim_name);
|
||||
auto impl = node.get_selected_impl();
|
||||
ASSERT_TRUE(impl != nullptr);
|
||||
|
||||
auto impl_param = node.get_kernel_impl_params();
|
||||
auto canonicalized_impl_param = impl->canonicalize_shapes(*impl_param);
|
||||
|
||||
for (size_t i = 0; i < canonicalized_impl_param.input_layouts.size(); ++i) {
|
||||
EXPECT_TRUE(canonicalized_impl_param.input_layouts[i].get_partial_shape() == expected_input_pshapes[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < canonicalized_impl_param.output_layouts.size(); ++i) {
|
||||
EXPECT_TRUE(canonicalized_impl_param.output_layouts[i].get_partial_shape() == expected_output_pshapes[i]);
|
||||
}
|
||||
};
|
||||
|
||||
layout create_default_layout(const ov::PartialShape& pshape) {
|
||||
return layout {pshape, data_types::f32, format::bfyx};
|
||||
}
|
||||
|
||||
std::vector<Shapes> shape_of_shapes {
|
||||
{{{3}}, {{3, 1, 1, 1, 1, 1}}, {{1, 1, 1, 1, 1, 1}}},
|
||||
{{{1, 2, 3}}, {{1, 2, 3, 1, 1, 1}}, {{3, 1, 1, 1, 1, 1}}},
|
||||
{{{1, 2, 3, 4, 5}}, {{1, 2, 3, 4, 5, 1}}, {{5, 1, 1, 1, 1, 1}}}
|
||||
};
|
||||
|
||||
TEST(canonicalization, shape_of) {
|
||||
for (const auto& shapes : shape_of_shapes) {
|
||||
layout in_layout {std::get<0>(shapes)[0], data_types::f32, format::bfyx};
|
||||
|
||||
cldnn::topology topology;
|
||||
topology.add(input_layout("input", in_layout));
|
||||
topology.add(shape_of("shape_of", input_info("input"), 3, data_types::i32));
|
||||
|
||||
canonicalization_test(topology, "shape_of", std::get<1>(shapes), std::get<2>(shapes));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Shapes> select_shapes {
|
||||
{{{2, 2}, {1, 2}, {2, 1}}, {{1, 1, 2, 2}, {1, 1, 2, 2}, {1, 1, 2, 2}}, {{1, 1, 2, 2}}}
|
||||
};
|
||||
|
||||
TEST(canonicalization, select) {
|
||||
for (const auto& shapes : select_shapes) {
|
||||
layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
|
||||
layout input1_layout = create_default_layout(std::get<1>(shapes)[0]);
|
||||
layout input2_layout = create_default_layout(std::get<2>(shapes)[0]);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("mask", input0_layout));
|
||||
topology.add(input_layout("input1", input1_layout));
|
||||
topology.add(input_layout("input2", input2_layout));
|
||||
topology.add(cldnn::select("select", input_info("mask"), input_info("input1"), input_info("input2")));
|
||||
|
||||
canonicalization_test(topology, "select", std::get<1>(shapes), std::get<2>(shapes));
|
||||
}
|
||||
}
|
||||
|
||||
struct broadcast_params {
|
||||
ov::Shape target_shape;
|
||||
ov::AxisSet axes_mapping;
|
||||
ov::op::BroadcastModeSpec broadcast_mode;
|
||||
};
|
||||
|
||||
std::vector<std::pair<Shapes, broadcast_params>> broadcast_shapes_with_params {
|
||||
{{{{5}}, {{1, 1, 5, 1}}, {{3, 1, 5, 1}}}, {{3, 1, 5}, {}, ov::op::BroadcastType::NUMPY}},
|
||||
{{{{5}}, {{1, 1, 1, 1, 5}}, {{1, 2, 3, 4, 5}}}, {{1, 2, 3, 4, 5}, {}, ov::op::BroadcastType::NUMPY}},
|
||||
{{{{3, 1}}, {{1, 1, 3, 1}}, {{1, 2, 3, 4}}}, {{1, 2, 3, 4}, {}, {ov::op::BroadcastType::PDPD, 2}}},
|
||||
{{{{4, 1, 6}}, {{1, 1, 1, 4, 1, 6}}, {{1, 2, 3, 4, 5, 6}}}, {{1, 2, 3, 4, 5, 6}, {}, {ov::op::BroadcastType::PDPD, 3}}}
|
||||
};
|
||||
|
||||
TEST(canonicalization, broadcast) {
|
||||
for (const auto& params : broadcast_shapes_with_params) {
|
||||
layout input0_layout = create_default_layout(std::get<0>(params.first)[0]);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input0_layout));
|
||||
topology.add(broadcast("broadcast", input_info("input"), params.second.target_shape,
|
||||
params.second.axes_mapping, params.second.broadcast_mode));
|
||||
|
||||
canonicalization_test(topology, "broadcast", std::get<1>(params.first), std::get<2>(params.first));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Shapes> eltwise_shapes {
|
||||
{{{2, 2, 3}, {2, 3}}, {{2, 2, 3, 1}, {1, 2, 3, 1}}, {{2, 2, 3, 1}}},
|
||||
{{{6}, {2, 3, 4, 5, 6}}, {{1, 1, 1, 1, 6}, {2, 3, 4, 5, 6}}, {{2, 3, 4, 5, 6}}}
|
||||
};
|
||||
|
||||
TEST(canonicalization, eltwise) {
|
||||
for (const auto& shapes : eltwise_shapes) {
|
||||
layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
|
||||
layout input1_layout = create_default_layout(std::get<0>(shapes)[1]);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input0", input0_layout));
|
||||
topology.add(input_layout("input1", input1_layout));
|
||||
topology.add(eltwise("eltwise", { input_info("input0"), input_info("input1") }, eltwise_mode::sum));
|
||||
|
||||
canonicalization_test(topology, "eltwise", std::get<1>(shapes), std::get<2>(shapes));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Shapes> fully_connected_shapes {
|
||||
{{{5, 2}, {5, 2}}, {{5, 2, 1, 1}, {5, 2, 1, 1}}, {{5, 5, 1, 1}}}
|
||||
};
|
||||
|
||||
TEST(canonicalization, fully_connected) {
|
||||
auto& engine = get_test_engine();
|
||||
for (const auto& shapes : fully_connected_shapes) {
|
||||
layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
|
||||
auto weights_prim = engine.allocate_memory(create_default_layout(std::get<0>(shapes)[1]));
|
||||
|
||||
size_t input_rank = input0_layout.get_partial_shape().size();
|
||||
size_t weights_rank = weights_prim->get_layout().get_partial_shape().size();
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", input0_layout));
|
||||
topology.add(data("weights", weights_prim));
|
||||
topology.add(fully_connected("fully_connected", input_info("input"), "weights", "", {}, input_rank, weights_rank));
|
||||
|
||||
canonicalization_test(topology, "fully_connected", std::get<1>(shapes), std::get<2>(shapes));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Shapes> gemm_shapes {
|
||||
{{{1, 5}, {5, 2}}, {{1, 1, 1, 5}, {1, 1, 5, 2}}, {{1, 1, 1, 2}}}
|
||||
};
|
||||
|
||||
TEST(canonicalization, gemm) {
|
||||
for (const auto& shapes : gemm_shapes) {
|
||||
layout input0_layout = create_default_layout(std::get<0>(shapes)[0]);
|
||||
layout input1_layout = create_default_layout(std::get<0>(shapes)[1]);
|
||||
|
||||
size_t input_rank = input0_layout.get_partial_shape().size();
|
||||
size_t weights_rank = input1_layout.get_partial_shape().size();
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input0", input0_layout));
|
||||
topology.add(input_layout("input1", input1_layout));
|
||||
topology.add(gemm("gemm", {input_info("input0"), input_info("input1")},
|
||||
data_types::f32, false, false, 1.0f, 0.0f, input_rank, weights_rank));
|
||||
|
||||
canonicalization_test(topology, "gemm", std::get<1>(shapes), std::get<2>(shapes));
|
||||
}
|
||||
}
|
||||
|
||||
struct fusing_gemm_eltwise_params {
|
||||
ov::PartialShape input_gemm_first;
|
||||
ov::PartialShape weights_gemm_first;
|
||||
ov::PartialShape input_gemm_second;
|
||||
ov::PartialShape weights_gemm_second;
|
||||
};
|
||||
|
||||
std::vector<std::pair<Shapes, fusing_gemm_eltwise_params>> fusing_gemm_eltwise_shapes_with_params {
|
||||
{
|
||||
{{/* placeholder */}, {{1, 1, 1, 4, 4}}, {{1, 1, 1, 4, 4}}},
|
||||
{{1, 1, 1, 4, 5}, {1, 1, 1, 5, 4}, {1, 1, 4, 5}, {1, 1, 5, 4}}
|
||||
}
|
||||
};
|
||||
|
||||
TEST(canonicalization, fusing_gemm_eltwise) {
|
||||
for (const auto& shapes : fusing_gemm_eltwise_shapes_with_params) {
|
||||
layout input_layout_first = create_default_layout(shapes.second.input_gemm_first);
|
||||
layout weights_layout_first = create_default_layout(shapes.second.weights_gemm_first);
|
||||
|
||||
layout input_layout_second = create_default_layout(shapes.second.input_gemm_second);
|
||||
layout weights_layout_second = create_default_layout(shapes.second.weights_gemm_second);
|
||||
|
||||
size_t input_rank_first = input_layout_first.get_partial_shape().size();
|
||||
size_t weights_rank_first = weights_layout_first.get_partial_shape().size();
|
||||
|
||||
size_t input_rank_second = input_layout_second.get_partial_shape().size();
|
||||
size_t weights_rank_second = weights_layout_second.get_partial_shape().size();
|
||||
|
||||
size_t out_rank = std::max(std::max(input_rank_first, weights_rank_first),
|
||||
std::max(input_rank_second, weights_rank_second));
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input_first", input_layout_first));
|
||||
topology.add(input_layout("weights_first", weights_layout_first));
|
||||
topology.add(input_layout("input_second", input_layout_second));
|
||||
topology.add(input_layout("weights_second", weights_layout_second));
|
||||
|
||||
topology.add(gemm("gemm_first", {input_info("input_first"), input_info("weights_first")},
|
||||
data_types::f32, false, false, 1.0f, 0.0f, input_rank_first, weights_rank_first));
|
||||
|
||||
topology.add(gemm("gemm_second", {input_info("input_second"), input_info("weights_second")},
|
||||
data_types::f32, false, false, 1.0f, 0.0f, input_rank_second, weights_rank_second));
|
||||
|
||||
topology.add(eltwise("sum", {input_info("gemm_first"), input_info("gemm_second")}, eltwise_mode::sum));
|
||||
topology.add(reorder("out_reorder", input_info("sum"), format::get_default_format(out_rank), data_types::f32));
|
||||
|
||||
canonicalization_test(topology, "out_reorder", std::get<1>(shapes.first), std::get<2>(shapes.first), true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
@ -1356,6 +1356,124 @@ TEST(eltwise_gpu_f32, dynamic_kernel_broadcast) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(eltwise_gpu_f32, dynamic_kernel_broadcast_mixed_ranks_3d_2d) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
ov::PartialShape in1_shape = {3, 1, 5};
|
||||
ov::PartialShape in2_shape = {1, 5};
|
||||
auto in1_layout = layout{{-1, -1, 5}, data_types::f32, format::bfyx};
|
||||
auto in2_layout = layout{{-1, 5}, data_types::f32, format::bfyx};
|
||||
auto in1_mem_layout = layout{in1_shape, data_types::f32, format::bfyx};
|
||||
auto in2_mem_layout = layout{in2_shape, data_types::f32, format::bfyx};
|
||||
auto input1 = engine.allocate_memory(in1_mem_layout);
|
||||
auto input2 = engine.allocate_memory(in2_mem_layout);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input1", in1_layout));
|
||||
topology.add(input_layout("input2", in2_layout));
|
||||
topology.add(eltwise("eltwise", { input_info("input1"), input_info("input2") }, eltwise_mode::sum));
|
||||
|
||||
set_values(input1, {
|
||||
1.f, 0.f , 5.f, 1.5f, 2.f,
|
||||
0.f, 6.f, 5.2f, 3.f, 0.5f,
|
||||
7.f, 12.f, 4.f, -0.5f, 8.f
|
||||
});
|
||||
|
||||
set_values(input2, { 0.5f, -0.5f, 1.0f, -1.0f, 2.f });
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input1", input1);
|
||||
network.set_input_data("input2", input2);
|
||||
|
||||
auto inst = network.get_primitive("eltwise");
|
||||
|
||||
auto impl = inst->get_impl();
|
||||
ASSERT_TRUE(impl != nullptr);
|
||||
ASSERT_TRUE(impl->is_dynamic());
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "eltwise");
|
||||
|
||||
auto output = outputs.at("eltwise").get_memory();
|
||||
|
||||
ov::PartialShape expected_shape{3, 1, 5};
|
||||
|
||||
ASSERT_EQ(output->get_layout().get_partial_shape(), expected_shape);
|
||||
|
||||
float answers[15] = { 1.5f, -0.5f, 6.f, 0.5f, 4.f,
|
||||
0.5f, 5.5f, 6.2f, 2.f, 2.5f,
|
||||
7.5f, 11.5f, 5.f, -1.5f, 10.f };
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (int i = 0; i < 15; i++) {
|
||||
ASSERT_EQ(answers[i], output_ptr[i]) << "i = " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(eltwise_gpu_f32, dynamic_kernel_broadcast_mixed_ranks_5d_2d) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
ov::PartialShape in1_shape = {1, 1, 3, 1, 5};
|
||||
ov::PartialShape in2_shape = {1, 5};
|
||||
auto in1_layout = layout{{1, 1, -1, -1, 5}, data_types::f32, format::bfzyx};
|
||||
auto in2_layout = layout{{-1, 5}, data_types::f32, format::bfyx};
|
||||
auto in1_mem_layout = layout{in1_shape, data_types::f32, format::bfzyx};
|
||||
auto in2_mem_layout = layout{in2_shape, data_types::f32, format::bfyx};
|
||||
auto input1 = engine.allocate_memory(in1_mem_layout);
|
||||
auto input2 = engine.allocate_memory(in2_mem_layout);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input1", in1_layout));
|
||||
topology.add(input_layout("input2", in2_layout));
|
||||
topology.add(eltwise("eltwise", { input_info("input1"), input_info("input2") }, eltwise_mode::sum));
|
||||
|
||||
set_values(input1, {
|
||||
1.f, 0.f , 5.f, 1.5f, 2.f,
|
||||
0.f, 6.f, 5.2f, 3.f, 0.5f,
|
||||
7.f, 12.f, 4.f, -0.5f, 8.f
|
||||
});
|
||||
|
||||
set_values(input2, { 0.5f, -0.5f, 1.0f, -1.0f, 2.f });
|
||||
|
||||
ExecutionConfig config;
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input1", input1);
|
||||
network.set_input_data("input2", input2);
|
||||
|
||||
auto inst = network.get_primitive("eltwise");
|
||||
|
||||
auto impl = inst->get_impl();
|
||||
ASSERT_TRUE(impl != nullptr);
|
||||
ASSERT_TRUE(impl->is_dynamic());
|
||||
|
||||
auto outputs = network.execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "eltwise");
|
||||
|
||||
auto output = outputs.at("eltwise").get_memory();
|
||||
|
||||
ov::PartialShape expected_shape{1, 1, 3, 1, 5};
|
||||
|
||||
ASSERT_EQ(output->get_layout().get_partial_shape(), expected_shape);
|
||||
|
||||
float answers[15] = { 1.5f, -0.5f, 6.f, 0.5f, 4.f,
|
||||
0.5f, 5.5f, 6.2f, 2.f, 2.5f,
|
||||
7.5f, 11.5f, 5.f, -1.5f, 10.f };
|
||||
|
||||
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
|
||||
|
||||
for (int i = 0; i < 15; i++) {
|
||||
ASSERT_EQ(answers[i], output_ptr[i]) << "i = " << i;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
|
||||
// Input2 : 2x2x2
|
||||
// Input : 2x2x2x2
|
||||
|
@ -74,8 +74,8 @@ TEST(check_hash_value, fc_basic) {
|
||||
const auto primitive_hash = primitve->hash();
|
||||
const auto params_hash = prog_node.type()->get_fake_aligned_params(*prog_node.get_kernel_impl_params()).hash();
|
||||
|
||||
ASSERT_EQ(primitive_hash, 7881065839556591629UL);
|
||||
ASSERT_EQ(params_hash, 12327057149074647711UL);
|
||||
ASSERT_EQ(primitive_hash, 2197080758510296176UL);
|
||||
ASSERT_EQ(params_hash, 5241462399408562393UL);
|
||||
}
|
||||
|
||||
TEST(check_hash_value, gather_basic) {
|
||||
|
@ -127,7 +127,7 @@ TEST(shape_of_gpu, dynamic) {
|
||||
|
||||
cldnn::topology topology;
|
||||
topology.add(input_layout("input", in_layout));
|
||||
topology.add(shape_of("shape_of", input_info("input"), 5, data_types::i32));
|
||||
topology.add(shape_of("shape_of", input_info("input"), 4, data_types::i32));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
|
Loading…
Reference in New Issue
Block a user