[GPU] Minor layout optimizer refactoring (#17553)
This commit is contained in:
parent
55156f9a6c
commit
3d79bd1ac5
@ -82,18 +82,18 @@ std::string concatenation_inst::to_string(concatenation_node const& node) {
|
||||
std::stringstream ss_inputs;
|
||||
std::stringstream primitive_description;
|
||||
|
||||
for (size_t i = 0; i < node.inputs_count(); ++i) {
|
||||
for (size_t i = 0; i < node.get_inputs_count(); ++i) {
|
||||
ss_inputs << node.input(i).id();
|
||||
if (node.input(i).get_output_layout().is_static())
|
||||
ss_inputs << ", count: " << node.input(i).get_output_layout().count();
|
||||
else
|
||||
ss_inputs << ", count: " << "?";
|
||||
i != (node.inputs_count() - 1) ? ss_inputs << ", " : ss_inputs << "";
|
||||
i != (node.get_inputs_count() - 1) ? ss_inputs << ", " : ss_inputs << "";
|
||||
}
|
||||
|
||||
json_composite concat_info;
|
||||
concat_info.add("concat axis", desc->axis);
|
||||
concat_info.add("inputs count", node.inputs_count());
|
||||
concat_info.add("inputs count", node.get_inputs_count());
|
||||
concat_info.add("inputs", ss_inputs.str());
|
||||
|
||||
node_info->add("concat info", concat_info);
|
||||
|
@ -305,7 +305,7 @@ std::string eltwise_inst::to_string(eltwise_node const& node) {
|
||||
}
|
||||
|
||||
json_composite eltwise_info;
|
||||
for (size_t i = 0; i < node.inputs_count(); i++) {
|
||||
for (size_t i = 0; i < node.get_inputs_count(); i++) {
|
||||
eltwise_info.add("input_" + std::to_string(i), node.input(i).id());
|
||||
}
|
||||
eltwise_info.add("mode", str_mode);
|
||||
@ -322,7 +322,7 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
|
||||
check_inputs_count(node);
|
||||
// check for stride
|
||||
auto prim = node.get_primitive();
|
||||
auto inputs_count = node.inputs_count();
|
||||
auto inputs_count = node.get_inputs_count();
|
||||
|
||||
if (is_dynamic())
|
||||
return;
|
||||
@ -363,10 +363,10 @@ eltwise_inst::typed_primitive_inst(network& network, eltwise_node const& node) :
|
||||
}
|
||||
} else {
|
||||
bool use_new_shape_infer = network.get_config().get_property(ov::intel_gpu::allow_new_shape_infer);
|
||||
auto input0_pshape = node.input().get_output_layout().get_partial_shape();
|
||||
auto input0_pshape = node.get_input_pshape(0);
|
||||
|
||||
for (size_t i = 1; i < inputs_count; ++i) {
|
||||
auto input_pshape = node.input(i).get_output_layout().get_partial_shape();
|
||||
auto input_pshape = node.get_input_pshape(i);
|
||||
|
||||
if (input0_pshape.size() > input_pshape.size()) {
|
||||
if (use_new_shape_infer) {
|
||||
|
@ -216,7 +216,7 @@ std::string gemm_inst::to_string(gemm_node const& node) {
|
||||
std::stringstream primitive_description;
|
||||
|
||||
json_composite gemm_info;
|
||||
for (size_t i = 0; i < node.inputs_count(); i++) {
|
||||
for (size_t i = 0; i < node.get_inputs_count(); i++) {
|
||||
gemm_info.add("input_" + std::to_string(i), node.input(i).id());
|
||||
}
|
||||
gemm_info.add("alpha", alpha);
|
||||
|
@ -59,7 +59,7 @@ void compile_graph::run(program& p) {
|
||||
|
||||
// TODO: need to come up with better handling of unsupported shape agnostic cases
|
||||
// e.g. process exceptions from choose_impl() and ignore those for dynamic parameters
|
||||
if (node->is_type<fully_connected>() && node->is_dynamic() && node->get_output_layout().get_partial_shape().size() > 3)
|
||||
if (node->is_type<fully_connected>() && node->is_dynamic() && node->get_output_pshape().size() > 3)
|
||||
can_select_impl = false;
|
||||
|
||||
// TODO: Remove this WA once we have shape agnostic arg_max_min_axis kernel with non-const k input
|
||||
|
@ -104,7 +104,7 @@ void handle_reshape::run(program& p) {
|
||||
if (user->is_type<fully_connected>() || user->is_type<gemm>()) {
|
||||
bool is_fc = user->is_type<fully_connected>();
|
||||
auto wei_dt = is_fc ? user->as<fully_connected>().weights().get_output_layout().data_type :
|
||||
user->as<gemm>().get_dependency(1).get_output_layout().data_type;
|
||||
user->as<gemm>().get_input_layout(1).data_type;
|
||||
onednn_support = layout_optimizer::onednn_check_data_types_for_fc_gemm(output_data_type, wei_dt, out_dt);
|
||||
} else if (user->is_type<convolution>() || user->is_type<deconvolution>()) {
|
||||
bool is_conv = user->is_type<convolution>();
|
||||
|
@ -39,6 +39,7 @@ void pre_replace_deconv::run(program& p) {
|
||||
auto weights_nodes_id = deconv_prim->weights;
|
||||
auto biases_nodes_id = deconv_prim->bias;
|
||||
auto& input_node = deconv_node.get_dependency(0);
|
||||
auto input_layout = deconv_node.get_input_layout(0);
|
||||
const primitive_id deconv_node_id = deconv_node.id();
|
||||
const primitive_id& input_node_id = input_node.id();
|
||||
|
||||
@ -50,12 +51,12 @@ void pre_replace_deconv::run(program& p) {
|
||||
|
||||
bool perform_opt = false;
|
||||
// fp16 and fp32 bfyx implementation supports transposed convolution
|
||||
perform_opt |= cldnn::format::dimension(input_node.get_output_layout().format) == 4 &&
|
||||
(input_node.get_output_layout().data_type == data_types::f32 || input_node.get_output_layout().data_type == data_types::f16) &&
|
||||
!((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_node.get_output_layout().format == format::b_fs_yx_fsv16) &&
|
||||
perform_opt |= cldnn::format::dimension(input_layout.format) == 4 &&
|
||||
(input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16) &&
|
||||
!((_lo.get_optimization_attributes().b_fs_yx_fsv16_network || input_layout.format == format::b_fs_yx_fsv16) &&
|
||||
_lo.is_format_optimized(deconv_node, format::b_fs_yx_fsv16));
|
||||
// int8/uint8 input
|
||||
perform_opt |= (input_node.get_output_layout().data_type == data_types::i8 || input_node.get_output_layout().data_type == data_types::u8);
|
||||
perform_opt |= (input_layout.data_type == data_types::i8 || input_layout.data_type == data_types::u8);
|
||||
|
||||
if (!perform_opt)
|
||||
continue;
|
||||
@ -64,7 +65,7 @@ void pre_replace_deconv::run(program& p) {
|
||||
// setting convolution parameters based on deconvolution params
|
||||
auto output_layout = deconv_node.get_output_layout();
|
||||
auto output_pshape = output_layout.get_partial_shape();
|
||||
auto input_pshape = input_node.get_output_layout().get_partial_shape();
|
||||
auto input_pshape = input_layout.get_partial_shape();
|
||||
auto spatial_rank = output_layout.get_spatial_rank();
|
||||
auto stride = deconv_prim->stride;
|
||||
auto pad = deconv_prim->pad;
|
||||
|
@ -342,7 +342,7 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
auto can_optimize = [](const program_node* node) {
|
||||
bool is_dynamic = node->is_dynamic();
|
||||
bool is_planar = format::is_default_format(node->get_output_layout().format);
|
||||
bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layouts()[0].data_padding;
|
||||
bool no_pad = !node->get_output_layout().data_padding && !node->get_input_layouts().empty() && !node->get_input_layout(0).data_padding;
|
||||
if (node->is_type<reshape>() && is_dynamic && is_planar && no_pad && !node->is_output() && !node->has_fused_primitives()) {
|
||||
return true;
|
||||
}
|
||||
@ -398,7 +398,7 @@ void prepare_buffer_fusing::run(program& p) {
|
||||
const auto& crop_layout = node.get_output_layout();
|
||||
auto format = crop_layout.format;
|
||||
auto crop_prim = node.get_primitive();
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
const auto& crop_size = crop_layout.get_tensor();
|
||||
const auto& out_padd = crop_layout.data_padding;
|
||||
auto opt_lower_pad = crop_prim->offsets.feature[0];
|
||||
|
@ -184,7 +184,7 @@ void prepare_primitive_fusing::fuse_sigmoid_mul_to_swish(program &p) {
|
||||
|
||||
p.get_processing_order().insert_next(&input, &swish);
|
||||
|
||||
swish.calc_output_layout();
|
||||
swish.recalc_output_layout();
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -291,10 +291,10 @@ void prepare_primitive_fusing::fuse_bias(program &p) {
|
||||
|
||||
// Change out_features value to proper dimension for 3D FC case
|
||||
if (is_3d_fully_connected(node->get_dependency(0))) {
|
||||
out_features = node->get_dependency(0).get_output_layout().spatial(1);
|
||||
out_features = node->get_input_layout(0).spatial(1);
|
||||
is_3d_fc = true;
|
||||
} else if (is_3d_fully_connected(node->get_dependency(1))) {
|
||||
out_features = node->get_dependency(1).get_output_layout().spatial(1);
|
||||
out_features = node->get_input_layout(1).spatial(1);
|
||||
is_3d_fc = true;
|
||||
}
|
||||
auto& const_dep = eltw_node.get_dependency(const_dep_idx);
|
||||
@ -486,7 +486,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
continue;
|
||||
|
||||
auto is_grouped_conv = [](convolution_node& node) -> bool {
|
||||
auto in_layout = node.get_dependency(0).get_output_layout();
|
||||
auto in_layout = node.get_input_layout(0);
|
||||
return (node.get_groups() > 1 && node.get_groups() != static_cast<uint32_t>(in_layout.feature()));
|
||||
};
|
||||
|
||||
@ -504,7 +504,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
// Since reorder inputs is called after this pass
|
||||
// we have to check that blocked formats can be used in the network and layer is optimized for it.
|
||||
if ((node.get_output_layout().format == format::b_fs_yx_fsv16 ||
|
||||
_lo.should_select_b_fs_yx_fsv16_layout(node, node.get_dependency(1).get_output_layout())) &&
|
||||
_lo.should_select_b_fs_yx_fsv16_layout(node, node.get_input_layout(1))) &&
|
||||
!is_grouped_conv(node))
|
||||
return true;
|
||||
|
||||
@ -517,7 +517,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
_lo.is_format_optimized(node, format::fs_b_yx_fsv32) && node.get_primitive()->groups == 1)))
|
||||
return true;
|
||||
|
||||
const size_t in_feature = node.get_dependency(0).get_output_layout().feature();
|
||||
const size_t in_feature = node.get_input_layout(0).feature();
|
||||
if ((node.get_output_layout().format == format::b_fs_zyx_fsv16 ||
|
||||
(_lo.is_format_optimized(node, format::b_fs_zyx_fsv16) &&
|
||||
_lo.get_optimization_attributes().b_fs_zyx_fsv16_network)) && in_feature != 3)
|
||||
@ -534,7 +534,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
if (node.get_output_layout().format == format::bs_fs_yx_bsv32_fsv16 || _lo.is_format_optimized(node, format::bs_fs_yx_bsv32_fsv16))
|
||||
return true;
|
||||
|
||||
auto in_dt = node.get_dependency(0).get_output_layout().data_type;
|
||||
auto in_dt = node.get_input_layout(0).data_type;
|
||||
|
||||
// TODO: check if that's enough for correct work
|
||||
return data_type_traits::is_i8_u8(in_dt);
|
||||
@ -547,7 +547,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
if (eltw_node.get_dependencies().size() < 2)
|
||||
return false;
|
||||
|
||||
auto const_layout = eltw_node.get_dependency(1).get_output_layout();
|
||||
auto const_layout = eltw_node.get_input_layout(1);
|
||||
auto conv_layout = conv_node.get_output_layout();
|
||||
auto per_channel_eltwise = const_layout.feature() == conv_layout.feature();
|
||||
|
||||
@ -564,17 +564,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
_lo.get_preferred_impl_type(node, format::any /*dummy*/) == impl_types::onednn) {
|
||||
return true;
|
||||
} else {
|
||||
auto in_dt = node.get_dependency(0).get_output_layout().data_type;
|
||||
auto in_dt = node.get_input_layout(0).data_type;
|
||||
return data_type_traits::is_i8_u8(in_dt);
|
||||
}
|
||||
};
|
||||
|
||||
auto gemm_supports_fusings = [](gemm_node& node) -> bool {
|
||||
bool does_support_fusings = false;
|
||||
auto in0_dt = node.get_dependency(0).get_output_layout().data_type;
|
||||
auto in1_dt = node.get_dependency(1).get_output_layout().data_type;
|
||||
auto in0_fmt = node.get_dependency(0).get_output_layout().format;
|
||||
auto in1_fmt = node.get_dependency(1).get_output_layout().format;
|
||||
auto in0_dt = node.get_input_layout(0).data_type;
|
||||
auto in1_dt = node.get_input_layout(1).data_type;
|
||||
auto in0_fmt = node.get_input_layout(0).format;
|
||||
auto in1_fmt = node.get_input_layout(1).format;
|
||||
|
||||
if (data_type_traits::is_floating_point(in0_dt) &&
|
||||
data_type_traits::is_floating_point(in1_dt))
|
||||
@ -582,9 +582,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
|
||||
if (data_type_traits::is_i8_u8(in0_dt) && in0_fmt == format::bfyx &&
|
||||
data_type_traits::is_i8_u8(in1_dt) && in1_fmt == format::bfyx) {
|
||||
if (node.inputs_count() == 3) {
|
||||
auto in2_dt = node.get_dependency(2).get_output_layout().data_type;
|
||||
auto in2_fmt = node.get_dependency(2).get_output_layout().format;
|
||||
if (node.get_inputs_count() == 3) {
|
||||
auto in2_dt = node.get_input_layout(2).data_type;
|
||||
auto in2_fmt = node.get_input_layout(2).format;
|
||||
does_support_fusings = data_type_traits::is_i8_u8(in2_dt) && in2_fmt == format::bfyx ? true : false;
|
||||
} else {
|
||||
does_support_fusings = true;
|
||||
@ -595,7 +595,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
};
|
||||
|
||||
auto mvn_supports_fusings = [](mvn_node& node, bool for_eltwise = false) -> bool {
|
||||
auto in_layout = node.get_dependency(0).get_output_layout();
|
||||
auto in_layout = node.get_input_layout(0);
|
||||
if (node.get_primitive()->requires_alignment(in_layout.get_partial_shape()))
|
||||
return false;
|
||||
return data_type_traits::is_i8_u8(in_layout.data_type) || for_eltwise;
|
||||
@ -608,8 +608,8 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
auto& eltw = static_cast<const eltwise&>(*node.get_users().front()->get_primitive());
|
||||
auto& conv = node.get_dependency(0).as<convolution>();
|
||||
auto eltw_mode = eltw.mode == eltwise_mode::sum;
|
||||
auto conv_size = conv.get_dependency(0).get_output_layout().spatial(0) % 128 == 0 &&
|
||||
conv.get_dependency(0).get_output_layout().spatial(1) % 2 == 0;
|
||||
auto conv_size = conv.get_input_layout(0).spatial(0) % 128 == 0 &&
|
||||
conv.get_input_layout(0).spatial(1) % 2 == 0;
|
||||
auto format = conv.get_output_layout().format == format::bfyx;
|
||||
auto dt = conv.get_output_layout().data_type == data_types::f16;
|
||||
if (eltw_mode && conv_size && format && dt)
|
||||
@ -743,7 +743,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
|
||||
should_fuse |= input.is_type<mvn>();
|
||||
|
||||
should_fuse |= input.is_type<normalize>() && data_type_traits::is_i8_u8(input.get_dependency(0).get_output_layout().data_type);
|
||||
should_fuse |= input.is_type<normalize>() && data_type_traits::is_i8_u8(input.get_input_layout(0).data_type);
|
||||
|
||||
should_fuse |= input.is_type<deconvolution>();
|
||||
|
||||
@ -832,7 +832,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
auto out_layout = quantize_node.get_output_layout();
|
||||
auto in_layout = input_data.get_output_layout();
|
||||
auto out_dt = out_layout.data_type;
|
||||
auto in_dt = input_data.get_dependency(0).get_output_layout().data_type;
|
||||
auto in_dt = input_data.get_input_layout(0).data_type;
|
||||
auto out_dt_is_i8_u8 = data_type_traits::is_i8_u8(out_dt);
|
||||
auto in_dt_is_i8_u8 = data_type_traits::is_i8_u8(in_dt);
|
||||
|
||||
@ -858,7 +858,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
((out_dt == data_types::f32 || out_dt == data_types::f16) ||
|
||||
in_layout.format == format::b_fs_yx_fsv16 ||
|
||||
in_layout.format == format::bs_fs_yx_bsv32_fsv16 ||
|
||||
(_lo.should_select_b_fs_yx_fsv16_layout(input_data.as<convolution>(), input_data.get_dependency(1).get_output_layout()) &&
|
||||
(_lo.should_select_b_fs_yx_fsv16_layout(input_data.as<convolution>(), input_data.get_input_layout(1)) &&
|
||||
!is_grouped_conv(input_data.as<convolution>())) ||
|
||||
// Avoid fusing to b_fs_yx_fsv16 (and similar) kernels
|
||||
_lo.get_optimization_attributes().use_onednn_impls ||
|
||||
@ -935,7 +935,7 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
eltwise_mode::div
|
||||
};
|
||||
|
||||
if (node.is_output() || node.inputs_count() != 2 ||
|
||||
if (node.is_output() || node.get_inputs_count() != 2 ||
|
||||
std::find(supported_modes.begin(), supported_modes.end(), prim->mode) == supported_modes.end() ||
|
||||
!prim->stride.empty())
|
||||
return;
|
||||
@ -1008,9 +1008,9 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
// E.g. parent1 [?,?,768], parent2 [?,?,1]
|
||||
// expected eltw out shape: [?,?,768]
|
||||
// but w/o this check we can fuse eltwise to parent2 and return [?,?,1] as output shape which is unexpected
|
||||
auto parent1_pshape = parent1.first->get_output_layout().get_partial_shape();
|
||||
auto parent2_pshape = parent2.first->get_output_layout().get_partial_shape();
|
||||
auto out_pshape = node.get_output_layout().get_partial_shape();
|
||||
auto parent1_pshape = parent1.first->get_output_pshape(0);
|
||||
auto parent2_pshape = parent2.first->get_output_pshape(0);
|
||||
auto out_pshape = node.get_output_pshape(0);
|
||||
|
||||
auto are_compatible = [](const ov::PartialShape& out_shape, const ov::PartialShape& in_shape) -> bool {
|
||||
if (out_shape.rank().get_length() != in_shape.rank().get_length())
|
||||
|
@ -39,7 +39,7 @@ void prepare_primitive_fusing_through::run(program& p) {
|
||||
return false;
|
||||
|
||||
if (node->is_type<reorder>() &&
|
||||
node->get_output_layout().data_type != node->get_dependency(0).get_output_layout().data_type)
|
||||
node->get_output_layout().data_type != node->get_input_layout(0).data_type)
|
||||
return false;
|
||||
|
||||
// Not to fuse reshape after Reduce changing the order of un-reduced axes. It is expected to be optimized out.
|
||||
@ -48,7 +48,7 @@ void prepare_primitive_fusing_through::run(program& p) {
|
||||
|
||||
// Not to raise up target node through reshape where the size of dimension is changed (e.g. Unsqueeze)
|
||||
if (node->is_type<reshape>() &&
|
||||
node->get_output_layout().get_partial_shape().size() != node->get_dependency(0).get_output_layout().get_partial_shape().size())
|
||||
node->get_output_pshape().size() != node->get_input_pshape(0).size())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -444,7 +444,7 @@ void prepare_quantization::remove_fake_reorders(program& p, reorder_node& reorde
|
||||
|
||||
auto &usr = reorder_node.get_users().front();
|
||||
auto &dep = reorder_node.get_dependency(0);
|
||||
if (!(usr->is_type<convolution>() && usr->get_dependency(1).get_output_layout().data_type == data_types::i8) ||
|
||||
if (!(usr->is_type<convolution>() && usr->get_input_layout(1).data_type == data_types::i8) ||
|
||||
!dep.is_input() ||
|
||||
dep.get_output_layout().data_type != data_types::u8 ||
|
||||
(reorder_node.get_output_layout().data_type != data_types::f32 && reorder_node.get_output_layout().data_type != data_types::f16) ||
|
||||
@ -492,8 +492,8 @@ void prepare_quantization::prepare_asymmetric_quantization(program &p, convoluti
|
||||
if (node.get_users().size() != 1)
|
||||
return false;
|
||||
|
||||
auto in0_layout = node.get_dependency(0).get_output_layout();
|
||||
auto in1_layout = node.get_dependency(1).get_output_layout();
|
||||
auto in0_layout = node.get_input_layout(0);
|
||||
auto in1_layout = node.get_input_layout(1);
|
||||
|
||||
if (!node.get_dependency(1).is_type<data>())
|
||||
return false;
|
||||
|
@ -279,7 +279,7 @@ void remove_redundant_reorders::run(program& p) {
|
||||
continue;
|
||||
|
||||
auto o_layout = r_node.get_output_layout();
|
||||
auto i_layout = r_node.get_dependency(0).get_output_layout();
|
||||
auto i_layout = r_node.get_input_layout(0);
|
||||
|
||||
// Optimize reorder b_fs_yx_fsv16 -> bfyx when spatials are equal to 1. In this case we can reinterpret buffer,
|
||||
// but pads need to be handled correctly.
|
||||
@ -513,9 +513,9 @@ void remove_redundant_reorders::run(program& p) {
|
||||
return false;
|
||||
|
||||
auto node_format = node->get_output_layout().format;
|
||||
for (size_t axis = 0; axis < node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format).size(); axis++) {
|
||||
for (size_t axis = 0; axis < node->get_input_layout(0).data_padding.lower_size().sizes(node_format).size(); axis++) {
|
||||
if (!user->is_padding_supported(static_cast<int>(axis),
|
||||
node->get_dependency(0).get_output_layout().data_padding.lower_size().sizes(node_format)[axis]))
|
||||
node->get_input_layout(0).data_padding.lower_size().sizes(node_format)[axis]))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -580,7 +580,7 @@ void remove_redundant_reorders::run(program& p) {
|
||||
|
||||
// Add fused_primitive_desc of reorder to convolution which propagate original output layout to jitter
|
||||
fused_primitive_desc local_desc(node->get_primitive());
|
||||
local_desc.input_layout = input.get_dependency(0).get_output_layout(); // original convolution's output layout
|
||||
local_desc.input_layout = input.get_input_layout(0); // original convolution's output layout
|
||||
node->set_input_layout(local_desc.input_layout);
|
||||
local_desc.f_param = node->get_fuse_params();
|
||||
local_desc.outer_dep_start_idx = -1;
|
||||
@ -654,7 +654,7 @@ void remove_redundant_reorders::run(program& p) {
|
||||
bool remove_dep = reshape_input_node.get_users().size() == 1 && !reshape_input_node.is_output() &&
|
||||
!reshape_input_node.has_fused_primitives();
|
||||
bool remove_current = remove_dep && !reshape_input_node.get_dependencies().empty() &&
|
||||
reshape_input_node.get_dependency(0).get_output_layout() == reshape_node.get_output_layout() &&
|
||||
reshape_input_node.get_input_layout(0) == reshape_node.get_output_layout() &&
|
||||
!reshape_node.has_fused_primitives();
|
||||
|
||||
if (remove_dep) {
|
||||
@ -692,7 +692,7 @@ void remove_redundant_reorders::run(program& p) {
|
||||
|
||||
for (auto n : p.get_processing_order()) {
|
||||
if (n->is_in_data_flow() && n->is_type<reorder>()) {
|
||||
auto preferred_impl = lo.get_preferred_impl_type(*n, n->get_dependency(0).get_output_layout().format);
|
||||
auto preferred_impl = lo.get_preferred_impl_type(*n, n->get_input_layout(0).format);
|
||||
n->set_preferred_impl_type(preferred_impl);
|
||||
}
|
||||
|
||||
|
@ -76,7 +76,7 @@ public:
|
||||
quantize_params.out_scale = arg.get_output_scale_val();
|
||||
quantize_params.out_shift = arg.get_output_shift_val();
|
||||
|
||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||
for (size_t i = 1; i < arg.get_inputs_count(); i++) {
|
||||
quantize_params.inputs.push_back(convert_data_tensor(impl_param.input_layouts[i]));
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,6 @@ public:
|
||||
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
|
||||
size_t inputs_count() const { return desc->input.size(); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
|
@ -36,7 +36,7 @@ private:
|
||||
program::ptr _program = nullptr;
|
||||
|
||||
void add_or_change_input_layout(const program_node& node) {
|
||||
auto layout = node.get_dependency(0).get_output_layout();
|
||||
auto layout = node.get_input_layout(0);
|
||||
auto input_id = node.as<condition>().result_id();
|
||||
if (_topology.get_primitives().count(input_id) == 0) {
|
||||
_topology.add_primitive(std::make_shared<input_layout>(input_id, layout));
|
||||
|
@ -15,7 +15,6 @@ struct typed_program_node<convert_color> : public typed_program_node_base<conver
|
||||
public:
|
||||
using parent::parent;
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
size_t inputs_count() const { return get_primitive()->input.size(); }
|
||||
};
|
||||
|
||||
using convert_color_node = typed_program_node<convert_color>;
|
||||
|
@ -15,7 +15,6 @@ public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
size_t inputs_count() const { return get_dependencies().size(); }
|
||||
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
@ -30,7 +30,6 @@ public:
|
||||
}
|
||||
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
size_t inputs_count() const { return get_primitive()->input.size(); }
|
||||
|
||||
std::shared_ptr<NodeFuseParams> get_fuse_params() const override {
|
||||
return std::make_shared<EltwiseFuseParams>(typed_desc());
|
||||
|
@ -17,7 +17,6 @@ public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
size_t inputs_count() const { return get_dependencies().size(); }
|
||||
};
|
||||
|
||||
using embedding_bag_node = typed_program_node<embedding_bag>;
|
||||
|
@ -17,7 +17,6 @@ public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
size_t inputs_count() const { return this->get_primitive()->input_size(); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "deconvolution_inst.h"
|
||||
#include "detection_output_inst.h"
|
||||
#include "binary_convolution_inst.h"
|
||||
#include "quantize_inst.h"
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
@ -111,18 +112,9 @@ private:
|
||||
size_t _total_conv;
|
||||
std::map<std::pair<format::type, bool>, size_t> _optimized_conv_count;
|
||||
|
||||
layout get_expected_layout(layout const& current_layout,
|
||||
convolution_node const& node,
|
||||
layout const& output_or_weights_layout);
|
||||
layout get_expected_layout(layout const& current_layout,
|
||||
deconvolution_node const& node,
|
||||
layout const& output_or_weights_layout);
|
||||
layout get_expected_layout(layout const& current_layout,
|
||||
detection_output_node const& node,
|
||||
layout const& output_or_weights_layout);
|
||||
layout get_expected_layout(layout const& current_layout,
|
||||
binary_convolution_node const& node,
|
||||
layout const& output_or_weights_layout);
|
||||
format get_expected_format(convolution_node const& node);
|
||||
format get_expected_format(deconvolution_node const& node);
|
||||
format get_expected_format(quantize_node const& node);
|
||||
|
||||
bool is_depthwise(const convolution_node& node) const;
|
||||
format imad_case(convolution_node const& node) const;
|
||||
|
@ -166,6 +166,11 @@ public:
|
||||
program_node& get_dependency(size_t idx) const { return *dependencies.at(idx).first; }
|
||||
std::pair<program_node*, int32_t> get_dependency_with_port(size_t idx) const { return dependencies.at(idx); }
|
||||
|
||||
// Count of original primitive inputs, i.e. it doesn't include fused dependencies
|
||||
size_t get_inputs_count() const { return desc->input_size(); }
|
||||
// Count of original primitive outputs
|
||||
size_t get_outputs_count() const { return desc->output_size(); }
|
||||
|
||||
std::vector<layout> const get_input_layouts() const {
|
||||
std::vector<layout> layouts;
|
||||
for (const auto& i : dependencies) {
|
||||
@ -174,6 +179,20 @@ public:
|
||||
return layouts;
|
||||
}
|
||||
|
||||
layout get_input_layout(size_t idx = 0) const {
|
||||
return get_dependency(idx).get_output_layout(false);
|
||||
}
|
||||
|
||||
ov::PartialShape get_input_pshape(size_t idx = 0) const {
|
||||
return get_input_layout(idx).get_partial_shape();
|
||||
}
|
||||
|
||||
ov::PartialShape get_output_pshape(size_t idx = 0) const {
|
||||
if (!is_valid_output_layout(idx))
|
||||
return calc_output_layouts()[idx].get_partial_shape();
|
||||
return get_output_layout(idx).get_partial_shape();
|
||||
}
|
||||
|
||||
// replaces idx-th dependency of 'this' with 'new_dep', calls program::remove_if_dangling(old_dep)
|
||||
void replace_dependency(size_t idx, program_node& new_dep, bool remove_if_dangling = true);
|
||||
// searches for 'old_dep' in dependencies list of 'this' and replaces it with 'new_dep', calls
|
||||
@ -185,8 +204,8 @@ public:
|
||||
void remove_dependency(size_t idx);
|
||||
void remove_dependency(program_node& node);
|
||||
|
||||
size_t get_dependency_index(program_node& node) const;
|
||||
size_t get_user_index(program_node& node) const;
|
||||
size_t get_dependency_index(const program_node& node) const;
|
||||
size_t get_user_index(const program_node& node) const;
|
||||
|
||||
std::set<primitive_id> get_memory_dependencies() const;
|
||||
void add_memory_dependency(primitive_id);
|
||||
@ -242,8 +261,6 @@ public:
|
||||
bool set_output_layout(layout& new_layout, bool invalidate_users_if_changed = true, size_t idx = 0);
|
||||
bool set_output_layouts(std::vector<layout>& new_layout, bool invalidate_users_if_changed = true);
|
||||
|
||||
size_t get_outputs_count() const { return num_outputs; }
|
||||
|
||||
// forces recalculation of cached output layout, invalidates users if new layout is different than previous one and
|
||||
// @p invalidate_users_if_changed is set to true returns whether output layout has changed
|
||||
bool recalc_output_layout(bool invalidate_users_if_changed = true);
|
||||
|
@ -141,7 +141,6 @@ public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t index = 0) const { return get_dependency(index); }
|
||||
size_t inputs_count() const { return get_dependencies().size(); }
|
||||
int get_levels() const { return get_primitive()->levels; }
|
||||
bool get_packed_binary_output() const { return get_output_layout().data_type == data_types::bin; }
|
||||
bool get_scale_shift_opt() const { return get_primitive()->scale_shift_opt; }
|
||||
|
@ -29,7 +29,6 @@ public:
|
||||
support_padding_all(true);
|
||||
}
|
||||
|
||||
size_t inputs_count() const { return get_primitive()->input.size(); }
|
||||
program_node& mean_nv12() const { return get_dependency(2); }
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
program_node& mean() const { return get_dependency(1); }
|
||||
|
@ -17,7 +17,6 @@ public:
|
||||
using parent::parent;
|
||||
|
||||
program_node& input(size_t idx = 0) const { return get_dependency(idx); }
|
||||
size_t inputs_count() const { return get_dependencies().size(); }
|
||||
std::vector<size_t> get_shape_infer_dependencies() const override { return {}; }
|
||||
};
|
||||
|
||||
|
@ -183,8 +183,8 @@ bool layout_optimizer::is_format_supported(program_node& node, format::type fmt)
|
||||
return false;
|
||||
|
||||
if (node.is_type<mvn>() && fmt == format::b_fs_yx_fsv16 &&
|
||||
node.get_dependency(0).get_output_layout().data_type != data_types::i8 &&
|
||||
node.get_dependency(0).get_output_layout().data_type != data_types::u8)
|
||||
node.get_input_layout(0).data_type != data_types::i8 &&
|
||||
node.get_input_layout(0).data_type != data_types::u8)
|
||||
return false;
|
||||
|
||||
if (node.is_type<input_layout>())
|
||||
@ -271,7 +271,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
next.get_preferred_impl_type() == impl_types::onednn &&
|
||||
((fmt_prev == format::byxf && fmt_next == format::byxf) ||
|
||||
(fmt_prev == format::bfyx && fmt_next == format::byxf &&
|
||||
(prev_dt == data_types::f16 && next.get_dependency(0).get_output_layout().feature() <= 8))) &&
|
||||
(prev_dt == data_types::f16 && next.get_input_layout(0).feature() <= 8))) &&
|
||||
is_input_reorder(prev, next))
|
||||
return true;
|
||||
|
||||
@ -531,8 +531,8 @@ layout_optimizer::layout_optimizer(bool output_size_handling_enabled)
|
||||
}
|
||||
|
||||
bool layout_optimizer::is_depthwise(const convolution_node& node) const {
|
||||
const int32_t output_channels = node.get_output_layout().feature();
|
||||
const int32_t input_channels = node.get_dependency(0).get_output_layout().feature();
|
||||
const int32_t output_channels = node.get_output_layout(0).feature();
|
||||
const int32_t input_channels = node.get_input_layout(0).feature();
|
||||
|
||||
return node.get_groups() == static_cast<uint32_t>(input_channels) && input_channels == output_channels;
|
||||
}
|
||||
@ -667,7 +667,7 @@ bool layout_optimizer::convolution_b_fs_yx_fsv16_opt(const layout& input_layout,
|
||||
|
||||
bool layout_optimizer::should_select_b_fs_yx_fsv16_layout(convolution_node const& node, layout const& weights_layout) {
|
||||
auto prim = node.get_primitive();
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
auto const cond_denom = _total_conv > 0 ? 1.0f / static_cast<float>(_total_conv) : 1.0f;
|
||||
auto fully_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, false});
|
||||
auto partially_support_conv_num = _optimized_conv_count.at({format::b_fs_yx_fsv16, true});
|
||||
@ -843,8 +843,8 @@ static bool is_node_for_onednn(reduce_node const& node, format preferred_format)
|
||||
|
||||
static bool is_node_for_onednn(deconvolution_node const& node) {
|
||||
auto prim = node.get_primitive();
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto output_layout = node.get_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
auto output_layout = node.get_output_layout(0);
|
||||
|
||||
if (input_layout.is_dynamic() || output_layout.is_dynamic())
|
||||
return false;
|
||||
@ -920,7 +920,7 @@ bool layout_optimizer::users_for_convolution_byxf_opt(program_node const& node,
|
||||
} else if (user->type() == cldnn::convolution::type_id()) {
|
||||
if (convolution_byxf_opt(node.get_output_layout(),
|
||||
user->calc_output_layout(),
|
||||
user->get_dependency(1).get_output_layout(),
|
||||
user->get_input_layout(1),
|
||||
user->as<convolution>())) {
|
||||
if (!users_for_convolution_byxf_opt(*user, depth - 1))
|
||||
return false;
|
||||
@ -1032,18 +1032,15 @@ bool layout_optimizer::is_mixed_layout(program_node& prev, program_node& next, b
|
||||
return false;
|
||||
}
|
||||
|
||||
layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
convolution_node const& node,
|
||||
layout const& weights_layout) {
|
||||
format layout_optimizer::get_expected_format(convolution_node const& node) {
|
||||
auto prim = node.get_primitive();
|
||||
auto expected_data_type = current_layout.data_type;
|
||||
auto expected_format = current_layout.format;
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto output_layout = node.calc_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
auto output_layout = node.get_output_layout(0);
|
||||
auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(prim->grouped_weights_shape);
|
||||
auto expected_format = output_layout.format;
|
||||
|
||||
if (prim->deformable_mode) {
|
||||
output_layout.format = format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size());
|
||||
return output_layout;
|
||||
return format::adjust_to_rank(format::bfyx, output_layout.get_partial_shape().size());
|
||||
}
|
||||
|
||||
if (input_layout.is_dynamic() || output_layout.is_dynamic()) {
|
||||
@ -1051,10 +1048,9 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
expected_format = format::b_fs_yx_fsv16;
|
||||
else if (input_layout.get_partial_shape().size() == 5)
|
||||
expected_format = format::b_fs_zyx_fsv16;
|
||||
return layout(current_layout.get_partial_shape(), expected_data_type, expected_format);
|
||||
return expected_format;
|
||||
}
|
||||
|
||||
auto expected_tensor = current_layout.get_tensor();
|
||||
const float cond_denom = _total_conv > 0 ? 1.0f / static_cast<float>(_total_conv) : 1.0f;
|
||||
|
||||
bool onednn_valid_post_ops = get_post_ops_count(node) <= 32;
|
||||
@ -1079,22 +1075,18 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
} else {
|
||||
expected_format = imad_case(node);
|
||||
}
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
} else if (_optimization_attributes.b_fs_zyx_fsv16_network &&
|
||||
convolution_b_fs_zyx_fsv16_opt(input_layout, output_layout, weights_layout, prim)) {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
if ((current_layout.data_type == data_types::f32 && current_layout.batch() % 16 == 0) ||
|
||||
(current_layout.data_type == data_types::f16 && current_layout.batch() % 32 == 0))
|
||||
if ((output_layout.data_type == data_types::f32 && output_layout.batch() % 16 == 0) ||
|
||||
(output_layout.data_type == data_types::f16 && output_layout.batch() % 32 == 0))
|
||||
expected_format = cldnn::format::bs_fs_zyx_bsv16_fsv16;
|
||||
else
|
||||
expected_format = cldnn::format::b_fs_zyx_fsv16;
|
||||
|
||||
} else if (current_layout.format == format::bfzyx) {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
} else if (output_layout.format == format::bfzyx) {
|
||||
expected_format = cldnn::format::bfzyx;
|
||||
} else if (_optimization_attributes.bs_fs_yx_bsv16_fsv16_network &&
|
||||
convolution_bs_fs_yx_bsv16_fsv16_opt(node.input().get_output_layout(), output_layout, weights_layout, prim)) {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
expected_format = cldnn::format::bs_fs_yx_bsv16_fsv16;
|
||||
} else if (_optimization_attributes.fs_b_yx_fsv32_network && !node.get_transposed() &&
|
||||
((convolution_fs_b_yx_fsv32_opt(input_layout,
|
||||
@ -1109,72 +1101,67 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
// 2-nd: the previous conv primitive supports fs_b_yx_fsv32 layout and
|
||||
// current conv primitives supports this one with weak restrictions -
|
||||
// that should be cheaper than reordering data to another layout
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
expected_format = format::fs_b_yx_fsv32;
|
||||
} else if (should_select_b_fs_yx_fsv16_layout(node, weights_layout)) {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
expected_format = cldnn::format::b_fs_yx_fsv16;
|
||||
} else if (current_layout.data_type == data_types::f16 &&
|
||||
layout_optimizer::convolution_byxf_opt(input_layout, current_layout, weights_layout, node) &&
|
||||
} else if (output_layout.data_type == data_types::f16 &&
|
||||
layout_optimizer::convolution_byxf_opt(input_layout, output_layout, weights_layout, node) &&
|
||||
(users_for_convolution_byxf_opt(node, 2) || deps_for_convolution_byxf_opt(node, 2)) &&
|
||||
// todo: remove this condition when yxfb optimizations will be disabled
|
||||
current_layout.format != cldnn::format::yxfb && current_layout.batch() == 1) {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
output_layout.format != cldnn::format::yxfb && output_layout.batch() == 1) {
|
||||
expected_format = cldnn::format::byxf;
|
||||
} else if (current_layout.format == format::b_fs_yx_fsv4 ||
|
||||
current_layout.format == format::os_is_yx_osv16_isv4) {
|
||||
} else if (output_layout.format == format::b_fs_yx_fsv4 ||
|
||||
output_layout.format == format::os_is_yx_osv16_isv4) {
|
||||
// imad case
|
||||
// nothing to do, just go out from here.
|
||||
} else if (layout_optimizer::convolution_bfyx_opt(current_layout, weights_layout, prim) || _output_size_handling_enabled || node.get_transposed()) {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
if (current_layout.format == format::b_fs_zyx_fsv16 || current_layout.format == format::bs_fs_zyx_bsv16_fsv16)
|
||||
} else if (layout_optimizer::convolution_bfyx_opt(output_layout, weights_layout, prim) || _output_size_handling_enabled || node.get_transposed()) {
|
||||
{
|
||||
if (output_layout.format == format::b_fs_zyx_fsv16 || output_layout.format == format::bs_fs_zyx_bsv16_fsv16)
|
||||
expected_format = cldnn::format::bfzyx;
|
||||
else
|
||||
expected_format = cldnn::format::bfyx;
|
||||
}
|
||||
} else {
|
||||
expected_tensor = current_layout.get_tensor();
|
||||
expected_format = cldnn::format::yxfb;
|
||||
}
|
||||
}
|
||||
|
||||
return layout(expected_data_type, expected_format, expected_tensor);
|
||||
return expected_format;
|
||||
}
|
||||
|
||||
layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
deconvolution_node const& node,
|
||||
layout const& output_or_weights_layout) {
|
||||
format layout_optimizer::get_expected_format(deconvolution_node const& node) {
|
||||
auto prim = node.get_primitive();
|
||||
auto expected_data_type = current_layout.data_type;
|
||||
auto expected_format = current_layout.format;
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto output_layout = node.calc_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
auto output_layout = node.get_output_layout(0);
|
||||
auto weights_layout = node.weights().get_output_layout().convert_to_weights_layout(node.get_primitive()->grouped_weights_shape);
|
||||
auto expected_format = output_layout.format;
|
||||
|
||||
if (input_layout.is_dynamic() || output_layout.is_dynamic()) {
|
||||
if (input_layout.get_partial_shape().size() <= 4)
|
||||
expected_format = format::b_fs_yx_fsv16;
|
||||
else if (input_layout.get_partial_shape().size() == 5)
|
||||
expected_format = format::b_fs_zyx_fsv16;
|
||||
return layout(current_layout.get_partial_shape(), expected_data_type, expected_format);
|
||||
return expected_format;
|
||||
}
|
||||
|
||||
auto expected_tensor = current_layout.get_tensor();
|
||||
auto expected_shape = output_layout.get_shape();
|
||||
bool use_onednn_impls = _optimization_attributes.use_onednn_impls;
|
||||
|
||||
if (use_onednn_impls && is_node_for_onednn(node)) {
|
||||
// XXX: need to take the situation into consideration where it is called from prepare_primitive_fusing
|
||||
expected_format = node.get_preferred_output_fmt();
|
||||
} else if (_optimization_attributes.b_fs_zyx_fsv16_network &&
|
||||
deconvolution_b_fs_zyx_fsv16_opt(current_layout, output_or_weights_layout, prim)) {
|
||||
if ((current_layout.data_type == data_types::f32 && expected_tensor.batch[0] % 16 == 0) ||
|
||||
(current_layout.data_type == data_types::f16 && expected_tensor.batch[0] % 32 == 0))
|
||||
deconvolution_b_fs_zyx_fsv16_opt(output_layout, weights_layout, prim)) {
|
||||
if ((output_layout.data_type == data_types::f32 && expected_shape[0] % 16 == 0) ||
|
||||
(output_layout.data_type == data_types::f16 && expected_shape[0] % 32 == 0))
|
||||
expected_format = cldnn::format::bs_fs_zyx_bsv16_fsv16;
|
||||
else
|
||||
expected_format = cldnn::format::b_fs_zyx_fsv16;
|
||||
} else if ((_optimization_attributes.b_fs_yx_fsv16_network) &&
|
||||
deconvolution_b_fs_yx_fsv16_opt(current_layout, output_or_weights_layout, prim)) {
|
||||
auto input_tensor = node.get_dependency(0).get_output_layout().get_tensor();
|
||||
int input_features = input_tensor.feature[0];
|
||||
int output_features = expected_tensor.feature[0];
|
||||
deconvolution_b_fs_yx_fsv16_opt(output_layout, weights_layout, prim)) {
|
||||
auto input_shape = input_layout.get_shape();
|
||||
auto input_features = input_shape[1];
|
||||
auto output_features = expected_shape[1];
|
||||
float f_cost = static_cast<float>(input_features * output_features) / (align_to(input_features, 16) * align_to(output_features, 16));
|
||||
float stride_cost = 1 / static_cast<float>(prim->stride[prim->stride.size() - 1]);
|
||||
if (f_cost * stride_cost > 0.1f)
|
||||
@ -1182,33 +1169,80 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
else
|
||||
expected_format = cldnn::format::bfyx;
|
||||
}
|
||||
return layout(expected_data_type, expected_format, expected_tensor);
|
||||
return expected_format;
|
||||
}
|
||||
|
||||
layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
detection_output_node const& node,
|
||||
layout const& output_or_weights_layout) {
|
||||
auto prim = node.get_primitive();
|
||||
auto expected_tensor = current_layout.get_tensor();
|
||||
auto expected_data_type = data_types::f32;
|
||||
auto expected_format = output_or_weights_layout.format;
|
||||
format layout_optimizer::get_expected_format(quantize_node const& node) {
|
||||
auto layout = node.get_output_layout();
|
||||
auto expected = format::any;
|
||||
|
||||
return layout(expected_data_type, expected_format, expected_tensor);
|
||||
}
|
||||
std::function<bool(const program_node& node)> only_gemm_users = [&](const program_node& node) {
|
||||
bool all_users_gemm = true;
|
||||
|
||||
layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
binary_convolution_node const& node,
|
||||
layout const& /*output_or_weights_layout*/) {
|
||||
auto prim = node.get_primitive();
|
||||
auto expected_tensor = current_layout.get_tensor();
|
||||
auto expected_data_type = data_types::bin;
|
||||
auto expected_format = cldnn::format::b_fs_yx_32fp;
|
||||
for (auto user : node.get_users()) {
|
||||
if (user->is_type<reorder>() || user->is_type<reshape>())
|
||||
all_users_gemm &= only_gemm_users(*user);
|
||||
else if (user->is_type<gemm>())
|
||||
all_users_gemm &= true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return layout(expected_data_type, expected_format, expected_tensor);
|
||||
return all_users_gemm;
|
||||
};
|
||||
|
||||
auto use_onednn_impls = _optimization_attributes.use_onednn_impls;
|
||||
|
||||
if (use_onednn_impls) {
|
||||
auto& user = node.get_users().front();
|
||||
if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) {
|
||||
expected = user->get_preferred_input_fmt(user->get_dependency_index(node));
|
||||
} else {
|
||||
expected = format::any;
|
||||
}
|
||||
} else if (only_gemm_users(node)) {
|
||||
// TODO: Gemm is not supporting fsv layouts
|
||||
expected = format::get_default_format(node.get_output_layout().format.dimension());
|
||||
// TODO: check other types for first conv
|
||||
} else if (layout.is_static() && layout.format.spatial_num() == 2 &&
|
||||
(layout.data_type == data_types::i8 || layout.data_type == data_types::u8) &&
|
||||
layout.batch() % 16 == 0) {
|
||||
if (use_onednn_impls && layout.batch() % 32 == 0) {
|
||||
if (node.get_users().size() == 1 && node.get_users().front()->is_type<convolution>()) {
|
||||
auto& conv = node.get_users().front()->as<convolution>();
|
||||
auto ws = conv.get_input_layout(1).get_tensor();
|
||||
if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.feature() == 1)
|
||||
expected = format::bfyx;
|
||||
else
|
||||
expected = format::bs_fs_yx_bsv16_fsv4;
|
||||
|
||||
auto conv_output_layout = conv.get_output_layout();
|
||||
auto weights_layout = conv.weights().get_output_layout().convert_to_weights_layout(conv.get_primitive()->grouped_weights_shape);
|
||||
format expected_conv_fmt = get_expected_format(conv);
|
||||
if (expected == format::bfyx && expected_conv_fmt == format::bs_fs_yx_bsv32_fsv32 && layout.feature() % 32 == 0)
|
||||
expected = expected_conv_fmt;
|
||||
}
|
||||
} else if (layout.feature() > 8) {
|
||||
expected = format::b_fs_yx_fsv16;
|
||||
} else {
|
||||
expected = format::b_fs_yx_fsv4;
|
||||
}
|
||||
} else if (layout.format.spatial_num() == 3 && (layout.data_type == data_types::i8 || layout.data_type == data_types::u8)) {
|
||||
expected = format::b_fs_zyx_fsv16;
|
||||
}
|
||||
|
||||
// In case of input -> ... -> quantize -> concat
|
||||
if (layout.is_static() && expected == format::any
|
||||
&& (node.get_users().size() == 1 && node.get_users().front()->is_type<concatenation>())
|
||||
&& (layout.batch() < 4 && layout.feature() < 4)) {
|
||||
expected = format::get_default_format(layout.get_rank(), false, false);
|
||||
}
|
||||
|
||||
return expected;
|
||||
}
|
||||
|
||||
bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
|
||||
auto in_dt = node.get_dependency(0).get_output_layout(false).data_type;
|
||||
auto in_dt = node.get_input_layout(0).data_type;
|
||||
auto out_dt = node.get_output_layout(false).data_type;
|
||||
|
||||
// Generally, fp32 input does NOT use oneDNN
|
||||
@ -1230,10 +1264,10 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
|
||||
} else if (node.is_type<fully_connected>() || node.is_type<gemm>()) {
|
||||
bool is_fc = node.is_type<fully_connected>();
|
||||
auto wei_dt = is_fc ? node.as<fully_connected>().weights().get_output_layout().data_type :
|
||||
node.as<gemm>().get_dependency(1).get_output_layout().data_type;
|
||||
node.as<gemm>().get_input_layout(1).data_type;
|
||||
return onednn_check_data_types_for_fc_gemm(in_dt, wei_dt, out_dt);
|
||||
} else if (node.is_type<reorder>()) {
|
||||
auto input_fmt = node.get_dependency(0).get_output_layout().format;
|
||||
auto input_fmt = node.get_input_layout(0).format;
|
||||
auto output_fmt = node.get_output_layout().format;
|
||||
|
||||
// For mixed precision case, oneDNN is slower than clDNN
|
||||
@ -1398,7 +1432,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
format::bs_fs_yx_bsv32_fsv16,
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
};
|
||||
if (blocked_formats.find(node.get_dependency(0).get_output_layout().format) != blocked_formats.end()) {
|
||||
if (blocked_formats.find(node.get_input_layout(0).format) != blocked_formats.end()) {
|
||||
preferred_impl = impl_types::ocl;
|
||||
} else {
|
||||
auto& nms_node = node.as<non_max_suppression>();
|
||||
@ -1440,7 +1474,7 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
format::bs_fs_yx_bsv32_fsv32,
|
||||
};
|
||||
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
auto output_layout = node.get_output_layout();
|
||||
|
||||
auto input_fmt = input_layout.format;
|
||||
@ -1597,13 +1631,13 @@ format layout_optimizer::get_preferred_format(program_node& node) {
|
||||
|
||||
if (allow_new_shape_infer) {
|
||||
if (node.is_type<shape_of>())
|
||||
return format::get_default_format(node.get_dependency(0).get_output_layout(false).get_rank());
|
||||
return format::get_default_format(node.get_input_layout(0).get_rank());
|
||||
|
||||
// Let reorder_input pass to check input format instead of output_format in forward investigation, vice versa
|
||||
auto out_lay_rank = node.get_output_layout(false).get_rank();
|
||||
auto dep_size = node.get_dependencies().size();
|
||||
for (size_t i = 0; i < dep_size; i++) {
|
||||
auto in_lay_rank = node.get_dependency(i).get_output_layout(false).get_rank();
|
||||
auto in_lay_rank = node.get_input_layout(i).get_rank();
|
||||
const auto& shape_infer_deps = node.get_shape_infer_dependencies();
|
||||
if (std::find(shape_infer_deps.begin(), shape_infer_deps.end(), i) != shape_infer_deps.end()) {
|
||||
auto fmt = format::get_default_format(in_lay_rank, false, false);
|
||||
@ -1631,80 +1665,11 @@ format layout_optimizer::get_preferred_format(program_node& node) {
|
||||
if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
|
||||
expected = _forcing_map.at(node.id()).first;
|
||||
} else if (node.is_type<convolution>()) {
|
||||
auto& conv_node = node.as<convolution>();
|
||||
auto weights_layout = conv_node.weights().get_output_layout().convert_to_weights_layout(conv_node.get_primitive()->grouped_weights_shape);
|
||||
expected = get_expected_layout(output_layout, conv_node, weights_layout).format;
|
||||
expected = get_expected_format(node.as<convolution>());
|
||||
} else if (node.is_type<binary_convolution>()) {
|
||||
auto& bconv_node = node.as<binary_convolution>();
|
||||
auto weights_layout = bconv_node.weights().get_output_layout().convert_to_weights_layout(false);
|
||||
expected = get_expected_layout(output_layout, bconv_node, weights_layout).format;
|
||||
} else if (node.is_type<detection_output>()) {
|
||||
expected = get_expected_layout(
|
||||
output_layout,
|
||||
node.as<detection_output>(),
|
||||
layout{ data_types::f32, format::bfyx, tensor{} }).format;
|
||||
expected = cldnn::format::b_fs_yx_32fp;
|
||||
} else if (node.is_type<quantize>()) {
|
||||
auto layout = node.get_output_layout();
|
||||
|
||||
std::function<bool(const program_node& node)> only_gemm_users = [&](const program_node& node) {
|
||||
bool all_users_gemm = true;
|
||||
|
||||
for (auto user : node.get_users()) {
|
||||
if (user->is_type<reorder>() || user->is_type<reshape>())
|
||||
all_users_gemm &= only_gemm_users(*user);
|
||||
else if (user->is_type<gemm>())
|
||||
all_users_gemm &= true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
return all_users_gemm;
|
||||
};
|
||||
|
||||
if (use_onednn_impls) {
|
||||
auto& user = node.get_users().front();
|
||||
if (user->get_preferred_input_fmt(user->get_dependency_index(node)) != format::any) {
|
||||
expected = user->get_preferred_input_fmt(user->get_dependency_index(node));
|
||||
} else {
|
||||
expected = format::any;
|
||||
}
|
||||
} else if (only_gemm_users(node)) {
|
||||
// TODO: Gemm is not supporting fsv layouts
|
||||
expected = format::get_default_format(node.get_output_layout().format.dimension());
|
||||
// TODO: check other types for first conv
|
||||
} else if (layout.is_static() && layout.format.spatial_num() == 2 &&
|
||||
(layout.data_type == data_types::i8 || layout.data_type == data_types::u8) &&
|
||||
layout.batch() % 16 == 0) {
|
||||
if (use_onednn_impls && layout.batch() % 32 == 0) {
|
||||
if (node.get_users().size() == 1 && node.get_users().front()->is_type<convolution>()) {
|
||||
auto& conv = node.get_users().front()->as<convolution>();
|
||||
auto ws = conv.get_dependency(1).get_output_layout().get_tensor();
|
||||
if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.feature() == 1)
|
||||
expected = format::bfyx;
|
||||
else
|
||||
expected = format::bs_fs_yx_bsv16_fsv4;
|
||||
|
||||
auto conv_output_layout = conv.get_output_layout();
|
||||
auto weights_layout = conv.weights().get_output_layout().convert_to_weights_layout(conv.get_primitive()->grouped_weights_shape);
|
||||
format expected_conv_fmt = get_expected_layout(conv_output_layout, conv, weights_layout).format;
|
||||
if (expected == format::bfyx && expected_conv_fmt == format::bs_fs_yx_bsv32_fsv32 && layout.feature() % 32 == 0)
|
||||
expected = expected_conv_fmt;
|
||||
}
|
||||
} else if (layout.feature() > 8) {
|
||||
expected = format::b_fs_yx_fsv16;
|
||||
} else {
|
||||
expected = format::b_fs_yx_fsv4;
|
||||
}
|
||||
} else if (layout.format.spatial_num() == 3 && (layout.data_type == data_types::i8 || layout.data_type == data_types::u8)) {
|
||||
expected = format::b_fs_zyx_fsv16;
|
||||
}
|
||||
|
||||
// In case of input -> ... -> quantize -> concat
|
||||
if (layout.is_static() && expected == format::any
|
||||
&& (node.get_users().size() == 1 && node.get_users().front()->is_type<concatenation>())
|
||||
&& (layout.batch() < 4 && layout.feature() < 4)) {
|
||||
expected = format::get_default_format(layout.get_rank(), false, false);
|
||||
}
|
||||
expected = get_expected_format(node.as<quantize>());
|
||||
} else if (node.is_type<reorder>() || node.is_type<input_layout>()) {
|
||||
if (node.is_type<reorder>() && node.as<reorder>().get_primitive()->has_surface_input()) {
|
||||
expected = format::nv12;
|
||||
@ -1712,13 +1677,11 @@ format layout_optimizer::get_preferred_format(program_node& node) {
|
||||
expected = node.get_output_layout().format;
|
||||
}
|
||||
} else if (node.is_type<reshape>()) {
|
||||
expected = format::get_default_format(node.get_output_layout().format.dimension());
|
||||
expected = format::get_default_format(node.get_output_layout().get_rank());
|
||||
} else if (node.is_type<deconvolution>()) {
|
||||
auto& deconv_node = node.as<deconvolution>();
|
||||
auto weights_layout = deconv_node.weights().get_output_layout().convert_to_weights_layout(deconv_node.get_primitive()->grouped_weights_shape);
|
||||
expected = get_expected_layout(output_layout, deconv_node, weights_layout).format;
|
||||
expected = get_expected_format(node.as<deconvolution>());
|
||||
} else if (node.is_type<mvn>()) {
|
||||
auto input_layout = node.get_dependency(0).get_output_layout();
|
||||
auto input_layout = node.get_input_layout(0);
|
||||
if (input_layout.format.dimension() == 5 &&
|
||||
(input_layout.data_type == data_types::f32 || input_layout.data_type == data_types::f16))
|
||||
expected = format::bfzyx;
|
||||
@ -1810,7 +1773,7 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
|
||||
can_optimize_permute = pnode.get_users().size() == 1
|
||||
&& pnode.get_output_layout().data_type == node.get_output_layout().data_type
|
||||
&& !pnode.has_fused_primitives()
|
||||
&& !pnode.is_output() && pnode.get_dependency(0).get_output_layout().is_static()
|
||||
&& !pnode.is_output() && pnode.get_input_layout(0).is_static()
|
||||
&& pnode.is_reverse_rotating_except_batch();
|
||||
}
|
||||
if (!can_optimize_permute) {
|
||||
@ -1846,7 +1809,7 @@ void layout_optimizer::select_preferred_formats_for_onednn(program_node& node, d
|
||||
auto& pnode = node.get_users().front()->as<permute>();
|
||||
auto can_optimize_permute = pnode.get_output_layout().data_type == node.get_output_layout().data_type
|
||||
&& !pnode.has_fused_primitives()
|
||||
&& !pnode.is_output() && pnode.get_dependency(0).get_output_layout().is_static()
|
||||
&& !pnode.is_output() && pnode.get_input_layout(0).is_static()
|
||||
&& pnode.is_rotating_except_batch();
|
||||
if (can_optimize_permute) {
|
||||
dst_fmt = format::byxf;
|
||||
|
@ -799,8 +799,8 @@ primitive_inst::primitive_inst(network& network, program_node const& node, bool
|
||||
, _org_id(node.get_org_primitive_id())
|
||||
, _is_input(node.is_input())
|
||||
, _is_output(node.is_output())
|
||||
, _inputs_memory_count(node.get_primitive()->input_size())
|
||||
, _outputs_memory_count(node.get_primitive()->output_size())
|
||||
, _inputs_memory_count(node.get_inputs_count())
|
||||
, _outputs_memory_count(node.get_outputs_count())
|
||||
, _fused_mem_count(node.get_fused_inputs_count())
|
||||
, _fused_mem_offset((_fused_mem_count > 0 && node.has_fused_dep()) ? node.get_first_fused_dep_idx() : 0)
|
||||
, _can_be_optimized(node.can_be_optimized())
|
||||
|
@ -1054,7 +1054,7 @@ void program::fuse_nodes(program_node &fused_node,
|
||||
fused_primitive_desc local_desc(peer_node.get_primitive());
|
||||
local_desc.f_param = get_node_ptr(peer_node.id())->get_fuse_params();
|
||||
local_desc.total_num_deps = peer_node.get_dependencies().size();
|
||||
local_desc.input_layout = peer_node.get_dependency(0).get_output_layout();
|
||||
local_desc.input_layout = peer_node.get_input_layout(0);
|
||||
local_desc.output_layout = peer_layout;
|
||||
|
||||
if (fused_node.in_shape_of_subgraph && !peer_node.in_shape_of_subgraph) {
|
||||
@ -1350,7 +1350,7 @@ void program::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
|
||||
if (!conv.is_dynamic()) {
|
||||
// In dynamic shape, conv is fixed as a predefined format b_fs_yx_fsv16
|
||||
auto input_size = node->get_dependency(0).get_output_layout().get_tensor();
|
||||
auto input_size = node->get_input_layout(0).get_tensor();
|
||||
auto ifm = static_cast<uint32_t>(input_size.feature[0]);
|
||||
if (conv.get_primitive()->groups == ifm && conv.get_primitive()->groups >= 16)
|
||||
total_dw_conv_layers++;
|
||||
|
@ -219,7 +219,7 @@ void program_node::remove_dependency(program_node& node) {
|
||||
remove_dependency(i);
|
||||
}
|
||||
|
||||
size_t program_node::get_user_index(program_node& node) const {
|
||||
size_t program_node::get_user_index(const program_node& node) const {
|
||||
size_t idx = 0;
|
||||
for (auto& user : users) {
|
||||
if (user == &node)
|
||||
@ -231,7 +231,7 @@ size_t program_node::get_user_index(program_node& node) const {
|
||||
OPENVINO_ASSERT(false, "Search invalid user node" + node.id() + " node");
|
||||
}
|
||||
|
||||
size_t program_node::get_dependency_index(program_node& node) const {
|
||||
size_t program_node::get_dependency_index(const program_node& node) const {
|
||||
for (size_t i = 0; i < dependencies.size(); ++i)
|
||||
if (dependencies[i].first == &node)
|
||||
return i;
|
||||
@ -1268,4 +1268,3 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
|
||||
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
|
||||
|
@ -64,7 +64,7 @@ std::string select_inst::to_string(select_node const& node) {
|
||||
std::stringstream primitive_description;
|
||||
|
||||
json_composite select_info;
|
||||
for (size_t i = 0; i < node.inputs_count(); i++) {
|
||||
for (size_t i = 0; i < node.get_inputs_count(); i++) {
|
||||
select_info.add("input_" + std::to_string(i), node.input(i).id());
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,7 @@ TEST(handle_reshape, skip_reorder_node_to_split_when_onndnn_not_support) {
|
||||
|
||||
ASSERT_NE(prog, nullptr);
|
||||
|
||||
ASSERT_TRUE(prog->get_node("matmul").get_dependency(0).get_output_layout().data_type == data_types::f16);
|
||||
ASSERT_TRUE(prog->get_node("matmul").get_input_layout(0).data_type == data_types::f16);
|
||||
}
|
||||
|
||||
TEST(handle_reshape, correct_parameters_propagation) {
|
||||
@ -123,8 +123,8 @@ TEST(handle_reshape, correct_parameters_propagation) {
|
||||
|
||||
ASSERT_TRUE(prog->get_node("reshape").can_be_optimized());
|
||||
|
||||
auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape();
|
||||
auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape();
|
||||
auto out_shape0 = prog->get_node("e2").get_output_pshape();
|
||||
auto out_shape1 = prog->get_node("reorder").get_output_pshape();
|
||||
|
||||
ov::PartialShape expected_out_shape{2, 12};
|
||||
|
||||
@ -171,8 +171,8 @@ TEST(handle_reshape, correct_parameters_propagation_2_inputs) {
|
||||
ASSERT_TRUE(reshape_split_node.is_type<reshape>());
|
||||
ASSERT_EQ(reshape_split_node.get_dependencies().size(), 2);
|
||||
|
||||
auto out_shape0 = prog->get_node("e2").get_output_layout().get_partial_shape();
|
||||
auto out_shape1 = prog->get_node("reorder").get_output_layout().get_partial_shape();
|
||||
auto out_shape0 = prog->get_node("e2").get_output_pshape();
|
||||
auto out_shape1 = prog->get_node("reorder").get_output_pshape();
|
||||
|
||||
ov::PartialShape expected_out_shape{2, 12};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user