[GPU] fp16-int8 mixed precision (#9483)
* Use fp16-int8 mixed precision, instead of fp32-int8 mixed precision for onednn * Allow quantization fusion into bsv32_fsv16 conv * For conv, do not select bsv16_fsv16. Select bsv32_fsv16 for mixed-layout * depthwise conv is supported even though it is not fp16 * Allow resample kernel to work as cross-layout * test case for cross-layout of resample_opt kernel * Select onednn-friendly format from cldnn conv * Optimization for fp16 mixed precision * Choose mixed layout in case of mixed precision from reorder_inputs * Support for mixed precision from depth_to_space * Do not convert first conv format * Use onednn for FC output of fp16 * Choose bsv8_fsv4 from quantization even when conv kernel size is not 7 * Select cldnn for first conv when input feature depth is 1 * For first conv, use onednn only when kernel size is 7x7 * Use short variable name and added is_i8_u8 helper function Co-authored-by: Kim,SungEun <sungeun.kim@intel.com>
This commit is contained in:
@@ -116,6 +116,10 @@ struct data_type_traits {
|
||||
return (static_cast<uint32_t>(data_type) & float_type_mask) != 0;
|
||||
}
|
||||
|
||||
static bool is_i8_u8(data_types data_type) {
|
||||
return data_type == data_types::i8 || data_type == data_types::u8;
|
||||
}
|
||||
|
||||
static size_t align_of(data_types data_type) {
|
||||
switch (data_type) {
|
||||
case data_types::bin:
|
||||
|
||||
@@ -844,13 +844,17 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
input_data.as<binary_convolution>().get_primitive()->dilation.spatial[0] == 1 &&
|
||||
input_data.as<binary_convolution>().get_primitive()->dilation.spatial[1] == 1;
|
||||
|
||||
auto expected_format = _lo.get_preferred_format(input_data);
|
||||
|
||||
should_fuse |= input_data.is_type<convolution>() && conv_supports_fusings(input_data.as<convolution>()) &&
|
||||
quantize_node.get_scale_shift_opt() &&
|
||||
((out_layout.data_type == data_types::f32 || out_layout.data_type == data_types::f16) ||
|
||||
input_data.get_output_layout().format == format::b_fs_yx_fsv16 ||
|
||||
input_data.get_output_layout().format == format::bs_fs_yx_bsv32_fsv16 ||
|
||||
(_lo.should_select_b_fs_yx_fsv16_layout(input_data.as<convolution>(), input_data.get_dependency(1).get_output_layout()) &&
|
||||
!is_grouped_conv(input_data.as<convolution>())) ||
|
||||
// Avoid fusing to b_fs_yx_fsv16 (and similar) kernels
|
||||
expected_format == cldnn::format::bs_fs_yx_bsv32_fsv16 /* Allow quantization fusing for onednn */ ||
|
||||
((input_data.get_dependency(0).get_output_layout().data_type == data_types::u8 ||
|
||||
input_data.get_dependency(0).get_output_layout().data_type == data_types::i8) &&
|
||||
(out_layout.data_type == data_types::u8 || out_layout.data_type == data_types::i8)));
|
||||
|
||||
@@ -562,16 +562,54 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
|
||||
}
|
||||
};
|
||||
|
||||
const auto reorder_weights_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
|
||||
auto& weights = conv_node.weights();
|
||||
auto weights_layout = weights.get_output_layout();
|
||||
if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type<data>() && !weights.is_constant()) {
|
||||
auto dims = weights_layout.format.dimension();
|
||||
auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx;
|
||||
auto reorder = rf.get_reorder(weights.id(), weights_layout,
|
||||
layout{ weights_layout.data_type, preferred_format, weights_layout.size });
|
||||
if (reorder.first) {
|
||||
p.add_intermediate(reorder.first, conv_node, 1, !reorder.second);
|
||||
const auto reorder_convolution = [&p, &lo, &rf](typed_program_node<convolution>& conv_node) {
|
||||
{
|
||||
// reorder weights convolution
|
||||
auto& weights = conv_node.weights();
|
||||
auto weights_layout = weights.get_output_layout();
|
||||
if (!format::is_simple_data_format(weights_layout.format) && !weights.is_type<data>() && !weights.is_constant()) {
|
||||
auto dims = weights_layout.format.dimension();
|
||||
auto preferred_format = dims <= 4 ? format::bfyx : dims == 5 ? format::bfzyx : format::bfwzyx;
|
||||
auto reorder = rf.get_reorder(weights.id(), weights_layout,
|
||||
layout{ weights_layout.data_type, preferred_format, weights_layout.size });
|
||||
if (reorder.first) {
|
||||
p.add_intermediate(reorder.first, conv_node, 1, !reorder.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<format> wrong_format = {format::b_fs_yx_fsv16, format::bs_fs_yx_bsv32_fsv16};
|
||||
std::vector<format> correct_format = {format::b_fs_yx_fsv32, format::bs_fs_yx_bsv32_fsv32};
|
||||
for (int i = 0; i < wrong_format.size(); i++) {
|
||||
// reorder for onednn mixed-precision conv
|
||||
// If the layouts are like below, change input layout to fsv32.
|
||||
// From:
|
||||
// (bsv32_fsv16.u8) --> conv --> (bsv32_fsv16.fp16)
|
||||
// To:
|
||||
// (bsv32_fsv16.u8) --> reorder --> (bsv32_fsv32.u8) --> conv --> (bsv32_fsv16.fp16)
|
||||
//
|
||||
// Do not apply such change for b=1 first conv
|
||||
|
||||
auto prev_node = conv_node.get_dependencies().front();
|
||||
auto old_layout = prev_node->get_output_layout();
|
||||
auto conv_layout = conv_node.get_output_layout();
|
||||
if (lo.get_optimization_attributes().use_onednn_impls
|
||||
&& conv_layout.format == wrong_format[i]
|
||||
&& data_type_traits::is_i8_u8(old_layout.data_type)
|
||||
&& (old_layout.format == wrong_format[i])
|
||||
&& !(old_layout.size.batch[0] == 1 && old_layout.size.feature[0] <= 4)) {
|
||||
auto new_layout = old_layout;
|
||||
new_layout.format = correct_format[i];
|
||||
auto new_input = rf.get_reorder(prev_node->id(),
|
||||
old_layout,
|
||||
new_layout);
|
||||
|
||||
if (new_input.first) {
|
||||
p.add_intermediate(new_input.first, conv_node, 0, !new_input.second);
|
||||
}
|
||||
|
||||
// Prevent layout propagation as we are using mixed precision for conv
|
||||
conv_node.get_dependencies().front()->set_output_layout(new_layout, false);
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -598,7 +636,7 @@ void reorder_inputs::run(program& p, layout_optimizer& lo, reorder_factory& rf)
|
||||
reorder_input_detection_output,
|
||||
reorder_input_binary_convolution,
|
||||
reorder_input_and_weights_deconvolution,
|
||||
reorder_weights_convolution,
|
||||
reorder_convolution,
|
||||
reorder_input_fully_connected);
|
||||
}
|
||||
|
||||
|
||||
@@ -67,6 +67,14 @@ attach_depth_to_space_impl::attach_depth_to_space_impl() {
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv32_fsv32),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv32_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "data_inst.h"
|
||||
#include "reorder_inst.h"
|
||||
#include "resample_inst.h"
|
||||
#include "reshape_inst.h"
|
||||
#include "generic_layer.hpp"
|
||||
#include <sstream>
|
||||
@@ -208,6 +209,10 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
}
|
||||
}
|
||||
|
||||
// Ref kernels are the main for depth_to_space and region_yolo. It can do anything.
|
||||
if (next.is_type<depth_to_space>() || next.is_type<region_yolo>())
|
||||
return true;
|
||||
|
||||
if (next.is_type<reorder>()) {
|
||||
// Avoid fusing current reorder to fuse next reorder
|
||||
if (next.get_users().size() == 1 && next.get_users().front()->is_type<convolution>() && use_onednn_impls) {
|
||||
@@ -221,6 +226,14 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
return true;
|
||||
}
|
||||
|
||||
// resample_opt kernel can work cross-layout between fsv16 and fsv32
|
||||
if (next.is_type<resample>() &&
|
||||
(fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::b_fs_yx_fsv32
|
||||
|| fmt_prev == format::bs_fs_yx_bsv32_fsv16 || fmt_prev == format::bs_fs_yx_bsv32_fsv32) &&
|
||||
(fmt_next == format::b_fs_yx_fsv16 || fmt_next == format::b_fs_yx_fsv32
|
||||
|| fmt_next == format::bs_fs_yx_bsv32_fsv16 || fmt_next == format::bs_fs_yx_bsv32_fsv32))
|
||||
return true;
|
||||
|
||||
if (next.is_type<pooling>() &&
|
||||
(((prev_simple && next_simple) && (prev_dt == next_dt)) ||
|
||||
((fmt_prev == format::b_fs_yx_fsv4 && fmt_next == format::bfyx) && (prev_dt == data_types::u8 || prev_dt == data_types::i8))))
|
||||
@@ -319,21 +332,29 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
|
||||
// Remove Reorder to support mixed format convolutions of bsv32fsv16 or bsv32fsv32 output
|
||||
if (next.is_type<convolution>() && (prev.is_type<eltwise>() || prev.is_type<quantize>()) &&
|
||||
(fmt_prev == format::bfyx || fmt_prev == format::bs_fs_yx_bsv4_fsv2) &&
|
||||
(fmt_prev == format::bfyx || fmt_prev == format::bs_fs_yx_bsv4_fsv2 || fmt_prev == format::bs_fs_yx_bsv8_fsv4) &&
|
||||
((fmt_next == format::bs_fs_yx_bsv32_fsv32 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4)) ||
|
||||
(fmt_next == format::bs_fs_yx_bsv32_fsv16 && (prev_output_layout.size.feature[0] == 3 || prev_output_layout.size.feature[0] == 4))))
|
||||
return true;
|
||||
|
||||
// Remove Reorder for Convolution: b_fs_yx_fsv32 (i8/u8) -> b_fs_yx_fsv16 (fp32/fp16)
|
||||
if (next.is_type<convolution>() && fmt_prev == format::b_fs_yx_fsv32 && fmt_next == format::b_fs_yx_fsv16 &&
|
||||
!data_type_traits::is_floating_point(prev_dt) && data_type_traits::is_floating_point(next_dt)) {
|
||||
auto& node = prev.get_users().front();
|
||||
// Avoid to fuse padding reorder to previous onednn convolution
|
||||
if (prev.get_preferred_impl_type() == impl_types::onednn &&
|
||||
(node->get_output_layout().data_padding != prev.get_output_layout().data_padding))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
// b_fs_yx_fsv16 (fp32/fp16) -> b_fs_yx_fsv32 (i8/u8)
|
||||
if (next.is_type<convolution>()) {
|
||||
const bool fsv32_to_fsv16 = (((fmt_prev == format::b_fs_yx_fsv32 && fmt_next == format::b_fs_yx_fsv16) ||
|
||||
(fmt_prev == format::bs_fs_yx_bsv32_fsv32 && fmt_next == format::bs_fs_yx_bsv32_fsv16)) &&
|
||||
data_type_traits::is_i8_u8(prev_dt) && data_type_traits::is_floating_point(next_dt));
|
||||
const bool fsv16_to_fsv32 = (((fmt_prev == format::b_fs_yx_fsv16 && fmt_next == format::b_fs_yx_fsv32) ||
|
||||
(fmt_prev == format::bs_fs_yx_bsv32_fsv16 && fmt_next == format::bs_fs_yx_bsv32_fsv32)) &&
|
||||
data_type_traits::is_floating_point(prev_dt) && data_type_traits::is_i8_u8(next_dt));
|
||||
if (fsv32_to_fsv16 || fsv16_to_fsv32) {
|
||||
auto& node = prev.get_users().front();
|
||||
// Avoid to fuse padding reorder to previous onednn convolution
|
||||
if (prev.get_preferred_impl_type() == impl_types::onednn &&
|
||||
(node->get_output_layout().data_padding != prev.get_output_layout().data_padding))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (next.is_type<quantize>())
|
||||
@@ -367,10 +388,12 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
|
||||
}
|
||||
|
||||
bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node* next, format fmt_prev, format fmt_next) {
|
||||
if (next == nullptr) {
|
||||
// Ref kernels are the main for depth_to_space and region_yolo. It can do anything
|
||||
return prev.is_type<depth_to_space>() || prev.is_type<region_yolo>();
|
||||
}
|
||||
// Ref kernels are the main for depth_to_space and region_yolo. It can do anything. Should not see next.
|
||||
if (prev.is_type<depth_to_space>() || prev.is_type<region_yolo>())
|
||||
return true;
|
||||
|
||||
if (next == nullptr)
|
||||
return false;
|
||||
|
||||
auto dt_prev = prev.get_output_layout().data_type;
|
||||
auto dt_next = next->get_output_layout().data_type;
|
||||
@@ -379,6 +402,14 @@ bool layout_optimizer::can_fuse_reorder_to_prev(program_node& prev, program_node
|
||||
if (prev.is_type<reorder>())
|
||||
return true;
|
||||
|
||||
// resample_opt kernel can work cross-layout between fsv16 and fsv32
|
||||
if (prev.is_type<resample>() &&
|
||||
(fmt_prev == format::b_fs_yx_fsv16 || fmt_prev == format::b_fs_yx_fsv32
|
||||
|| fmt_prev == format::bs_fs_yx_bsv32_fsv16 || fmt_prev == format::bs_fs_yx_bsv32_fsv32) &&
|
||||
(fmt_next == format::b_fs_yx_fsv16 || fmt_next == format::b_fs_yx_fsv32
|
||||
|| fmt_next == format::bs_fs_yx_bsv32_fsv16 || fmt_next == format::bs_fs_yx_bsv32_fsv32))
|
||||
return true;
|
||||
|
||||
if (prev.is_type<binary_convolution>() && fmt_next == format::b_fs_yx_fsv16)
|
||||
return true;
|
||||
|
||||
@@ -900,7 +931,7 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
}
|
||||
}
|
||||
|
||||
if (use_onednn_impls) {
|
||||
if (use_onednn_impls && onednn_valid_post_ops) {
|
||||
std::function<bool(const program_node&)> has_any_convolutions_below;
|
||||
has_any_convolutions_below = [&](const program_node& node) -> bool {
|
||||
for (auto& usr : node.get_users()) {
|
||||
@@ -913,11 +944,10 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
|
||||
/* ***************************** OneDNN impls format selection part ****************************** */
|
||||
bool valid_grouped = !is_dw && prim->groups > 1 && (ofm_per_group % compute_block == 0 && ifm_per_group % compute_block == 0);
|
||||
// TODO: uncomment this code when corresponding fsv32 optimizations inside clDNN will be implemented
|
||||
// bool i8_u8_output = output_layout.data_type == data_types::u8 || output_layout.data_type == data_types::i8;
|
||||
bool i8_u8_output = data_type_traits::is_i8_u8(output_layout.data_type);
|
||||
// bool is_first_conv = input_layout.size.feature[0] < 4;
|
||||
|
||||
if (i8_u8_input) {
|
||||
if (i8_u8_output) {
|
||||
if ((non_grouped || valid_grouped || valid_int8_dw) && onednn_valid_post_ops && is_2d) {
|
||||
if (input_layout.size.batch[0] >= 16) {
|
||||
expected_format = cldnn::format::bs_fs_yx_bsv32_fsv32;
|
||||
@@ -937,23 +967,19 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
expected_format = imad_case(node);
|
||||
}
|
||||
expected_tensor = current_layout.size;
|
||||
} else if (input_layout.data_type == data_types::f16 && is_2d) {
|
||||
} else if ((output_layout.data_type == data_types::f16 || output_layout.data_type == data_types::f32) && is_2d) {
|
||||
expected_tensor = current_layout.size;
|
||||
|
||||
if (input_layout.size.batch[0] >= 16 && onednn_valid_post_ops) {
|
||||
if (output_layout.data_type == input_layout.data_type) {
|
||||
if (non_grouped || valid_grouped || is_dw) {
|
||||
expected_format = cldnn::format::bs_fs_yx_bsv32_fsv16;
|
||||
} else {
|
||||
expected_format = cldnn::format::b_fs_yx_fsv16;
|
||||
}
|
||||
if (non_grouped || valid_grouped || is_dw) {
|
||||
expected_format = cldnn::format::bs_fs_yx_bsv32_fsv16;
|
||||
} else {
|
||||
expected_format = cldnn::format::bs_fs_yx_bsv16_fsv16;
|
||||
expected_format = cldnn::format::b_fs_yx_fsv16;
|
||||
}
|
||||
} else {
|
||||
expected_format = cldnn::format::b_fs_yx_fsv16;
|
||||
}
|
||||
} else if (input_layout.data_type == data_types::f16 &&
|
||||
} else if (output_layout.data_type == data_types::f16 &&
|
||||
convolution_bs_fs_yx_bsv16_fsv16_opt(input_layout, output_layout, weights_layout, prim) &&
|
||||
(output_layout.data_type == input_layout.data_type ||
|
||||
!data_type_traits::is_floating_point(input_layout.data_type)) && is_2d) {
|
||||
@@ -973,7 +999,11 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout,
|
||||
//}
|
||||
} else {
|
||||
/* *************************** Native impls format selection part ************************** */
|
||||
if (i8_u8_input) {
|
||||
if (use_onednn_impls && i8_u8_input) {
|
||||
// It is here because of post operation condition for onednn.
|
||||
// Use fsv32 for onednn friendliness.
|
||||
expected_format = cldnn::format::b_fs_yx_fsv32;
|
||||
} else if (i8_u8_input) {
|
||||
if ((_optimization_attributes.b_fs_yx_fsv16_network &&
|
||||
convolution_b_fs_yx_fsv16_opt(input_layout, output_layout, weights_layout, prim))) {
|
||||
expected_format = cldnn::format::b_fs_yx_fsv16;
|
||||
@@ -1155,10 +1185,11 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
|
||||
auto wei_dt = is_conv ? node.as<convolution>().weights().get_output_layout().data_type :
|
||||
node.as<deconvolution>().weights().get_output_layout().data_type;
|
||||
|
||||
if ((in_dt == data_types::f16 && wei_dt == data_types::f16) && (out_dt == data_types::f16 || out_dt == data_types::f32 || out_dt == data_types::i8))
|
||||
if ((in_dt == data_types::f16 && wei_dt == data_types::f16) &&
|
||||
(out_dt == data_types::f16 || out_dt == data_types::f32 || out_dt == data_types::i8 || out_dt == data_types::u8))
|
||||
return true;
|
||||
if ((in_dt == data_types::i8 || in_dt == data_types::u8) && wei_dt == data_types::i8 &&
|
||||
(out_dt == data_types::f32 || out_dt == data_types::i32 || out_dt == data_types::i8 || out_dt == data_types::u8))
|
||||
(out_dt == data_types::f32 || out_dt == data_types::i32 || out_dt == data_types::f16 || out_dt == data_types::i8 || out_dt == data_types::u8))
|
||||
return true;
|
||||
} else if (node.is_type<fully_connected>()) {
|
||||
auto& fc_node = node.as<fully_connected>();
|
||||
@@ -1170,7 +1201,7 @@ bool layout_optimizer::are_data_types_suitable_for_onednn(program_node& node) {
|
||||
if (in_dt == data_types::f32 && wei_dt == data_types::f32)
|
||||
return true;
|
||||
if ((in_dt == data_types::i8 || in_dt == data_types::u8) && (wei_dt == data_types::i8) &&
|
||||
(out_dt == data_types::i8 || out_dt == data_types::u8 || out_dt == data_types::i32 || out_dt == data_types::f32))
|
||||
(out_dt == data_types::i8 || out_dt == data_types::u8 || out_dt == data_types::i32 || out_dt == data_types::f16 || out_dt == data_types::f32))
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1259,6 +1290,8 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
|
||||
auto input_fmt = input_layout.format;
|
||||
auto output_fmt = output_layout.format;
|
||||
auto input_dt = input_layout.data_type;
|
||||
auto output_dt = output_layout.data_type;
|
||||
|
||||
preferred_impl = impl_types::onednn;
|
||||
|
||||
@@ -1273,14 +1306,22 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
}
|
||||
|
||||
// Native impl works faster for this type of reorder
|
||||
if (input_layout.format == format::bfyx && output_layout.format == format::bfyx) {
|
||||
if (input_fmt == format::bfyx && output_fmt == format::bfyx) {
|
||||
preferred_impl = impl_types::ocl;
|
||||
}
|
||||
|
||||
// onednn reorder doesn't support different number of dimensions in input and output layouts
|
||||
if (input_layout.format.dimension() != output_layout.format.dimension()) {
|
||||
if (input_fmt.dimension() != output_fmt.dimension()) {
|
||||
preferred_impl = impl_types::ocl;
|
||||
}
|
||||
|
||||
// For mixed precision case, onednn is slower than cldnn
|
||||
if (input_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(input_dt))
|
||||
preferred_impl = impl_types::ocl;
|
||||
if (output_fmt == format::b_fs_yx_fsv16 && data_type_traits::is_i8_u8(output_dt))
|
||||
preferred_impl = impl_types::ocl;
|
||||
if (output_fmt == format::bfyx && output_dt == data_types::f32)
|
||||
preferred_impl = impl_types::ocl;
|
||||
} else if (node.is_type<pooling>() || node.is_type<convolution>() || node.is_type<deconvolution>()) {
|
||||
if (!_optimization_attributes.use_onednn_impls)
|
||||
return impl_types::ocl;
|
||||
@@ -1330,13 +1371,11 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
|
||||
auto& conv = node.as<convolution>();
|
||||
auto input_layout = conv.input().get_output_layout();
|
||||
auto output_layout = conv.get_output_layout();
|
||||
bool fp16_input = input_layout.data_type == data_types::f16;
|
||||
bool has_groups = conv.get_primitive()->groups > 1;
|
||||
bool is_depthwise = conv.get_primitive()->groups == input_layout.size.feature[0];
|
||||
bool first_conv = input_layout.size.feature[0] <= 4;
|
||||
bool enable_onednn_dw_fp16_conv = fp16_input && is_depthwise;
|
||||
if (((has_groups && !enable_onednn_dw_fp16_conv) || first_conv) &&
|
||||
(output_layout.format == format::b_fs_yx_fsv16 || output_layout.format == format::bs_fs_yx_bsv32_fsv16) &&
|
||||
if (((has_groups && !is_depthwise) || first_conv) &&
|
||||
(output_layout.format == format::b_fs_yx_fsv16) &&
|
||||
!needs_onednn_bfyx_to_blocked(format::bfyx, output_layout.format, input_layout, conv))
|
||||
impl_candidate = impl_types::ocl;
|
||||
if (conv.get_output_layout().format == format::b_fs_yx_fsv32 && first_conv)
|
||||
@@ -1522,7 +1561,7 @@ format layout_optimizer::get_preferred_format(program_node& node) {
|
||||
if (node.get_users().size() == 1 && node.get_users().front()->is_type<convolution>()) {
|
||||
auto& conv = node.get_users().front()->as<convolution>();
|
||||
auto ws = conv.get_dependency(1).get_output_layout().size;
|
||||
if (data_type_traits::is_floating_point(conv.get_output_layout().data_type) || ws.spatial[0] != 7 || conv.get_primitive()->groups > 1)
|
||||
if (ws.spatial[0] != 7 || conv.get_primitive()->groups > 1 || layout.size.feature[0] == 1)
|
||||
expected = format::bfyx;
|
||||
else
|
||||
expected = format::bs_fs_yx_bsv8_fsv4;
|
||||
|
||||
@@ -203,7 +203,7 @@ std::shared_ptr<cldnn::program> Program::BuildProgram(const std::vector<std::sha
|
||||
try {
|
||||
program = cldnn::program::build_program(*m_engine, *m_topology, options);
|
||||
} catch (std::exception& e) {
|
||||
IE_THROW() << "cldnn program build failed!" << e.what();
|
||||
IE_THROW() << "cldnn program build failed! " << e.what();
|
||||
}
|
||||
CleanupBuild();
|
||||
|
||||
|
||||
@@ -337,7 +337,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
|
||||
// Conversion to FP32 might be needed for quantized models that face any fp16 related issues (e.g. overflow) for non-quantized layers
|
||||
// With this key users can work-around such issues
|
||||
if (!config.enable_fp16_for_quantized_models || use_onednn) {
|
||||
if (!config.enable_fp16_for_quantized_models) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ngraph::pass::ConvertPrecision>(precisions_array {{ ngraph::element::f16, ngraph::element::f32 }});
|
||||
manager.run_passes(func);
|
||||
|
||||
@@ -2168,10 +2168,13 @@ struct resample_opt_random_test : testing::TestWithParam<resample_opt_random_tes
|
||||
prim_opt.pads_begin = params.pads_begin;
|
||||
prim_opt.pads_end = params.pads_end;
|
||||
topo_opt.add(prim_opt);
|
||||
topo_opt.add(reorder("res_to_bfyx", "resample_opt", format::bfyx, params.input_type));
|
||||
topo_opt.add(reorder("to_output_type", "resample_opt", params.out_format, params.input_type));
|
||||
topo_opt.add(reorder("res_to_bfyx", "to_output_type", format::bfyx, params.input_type));
|
||||
|
||||
auto build_opts_opt = build_options();
|
||||
build_opts_opt.set_option(build_option::outputs({"resample_opt", "res_to_bfyx"}));
|
||||
build_opts_opt.set_option(build_option::outputs({"to_output_type", "res_to_bfyx"}));
|
||||
// optimize_data is turned on to test cross-layout
|
||||
build_opts_opt.set_option(build_option::optimize_data(true));
|
||||
|
||||
network net_opt(engine, topo_opt, build_opts_opt);
|
||||
|
||||
@@ -2227,5 +2230,6 @@ INSTANTIATE_TEST_SUITE_P(resample_opt_smoke_linear_onnx,
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample_type::linear_onnx, 1, format::b_fs_yx_fsv32, format::b_fs_yx_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample_type::linear_onnx, 1, format::bs_fs_yx_bsv32_fsv16, format::bs_fs_yx_bsv32_fsv16, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample_type::linear_onnx, 1, format::bs_fs_yx_bsv32_fsv32, format::bs_fs_yx_bsv32_fsv32, {}, {}},
|
||||
{ data_types::f16, {1, 128, 13, 13}, {1, 128, 26, 26}, 1, resample_type::linear_onnx, 1, format::b_fs_yx_fsv16, format::b_fs_yx_fsv32, {}, {}},
|
||||
}
|
||||
));
|
||||
|
||||
Reference in New Issue
Block a user