diff --git a/src/plugins/intel_gpu/src/graph/program_node.cpp b/src/plugins/intel_gpu/src/graph/program_node.cpp index 77e9a770fa4..02cf2bb193c 100644 --- a/src/plugins/intel_gpu/src/graph/program_node.cpp +++ b/src/plugins/intel_gpu/src/graph/program_node.cpp @@ -989,16 +989,23 @@ void program_node::init_onednn_primitive_attributes() { update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx); } } else { - // convolution using post-op output scales can only be int8/uint8 - if (idx == 0 && !has_out_scales(attrs) && !is_type() && !is_type() && - !(is_type() && data_type_traits::is_floating_point(output_layout.data_type))) { - int mask = in.count() > 1 ? 2 : 0; - attrs->set_output_scales(mask, {DNNL_RUNTIME_F32_VAL}); - update_onednn_post_op_list(onednn_post_op_type::scale, dep_idx); - } else { + auto input_datatype = get_dependency(0).get_output_layout().data_type; + // convolution using post-op output scales can only be used when i8/u8 input (which use integer accumulator) + bool cant_use_output_scales = + idx != 0 || + has_out_scales(attrs) || + is_type() || + is_type() || + (is_type() && data_type_traits::is_floating_point(input_datatype)) || + (is_type() && data_type_traits::is_floating_point(input_datatype)); + if (cant_use_output_scales) { dnnl::memory::desc in_desc = onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab, true); post_ops.append_binary(dnnl::algorithm::binary_mul, in_desc); update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx); + } else { + int mask = in.count() > 1 ? 2 : 0; + attrs->set_output_scales(mask, {DNNL_RUNTIME_F32_VAL}); + update_onednn_post_op_list(onednn_post_op_type::scale, dep_idx); } } } else if (desc.is_type()) { diff --git a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp index c58150ec45d..9ec7a6d247e 100644 --- a/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp +++ b/src/plugins/intel_gpu/tests/fusions/convolution_fusion_test.cpp @@ -1461,7 +1461,7 @@ TEST_P(conv_fp32_scale_activation_quantize_i8_eltwise_fp32_quantize_i8, basic) { reorder("reorder_bfyx", "quantize_1", p.default_format, data_types::f32) ); - tolerance = 1.f; + tolerance = 2.f; execute(p); }