From 195f5df2e86e2dd091670d817a06e27c2ee3094d Mon Sep 17 00:00:00 2001 From: Jade Cho Date: Mon, 4 Jul 2022 19:53:50 +0900 Subject: [PATCH] set zero-point as immediate value (#12002) --- .../graph/impls/onednn/convolution_onednn.cpp | 32 ++++++++++++------- .../src/graph/impls/onednn/utils.cpp | 20 ++++++------ .../src/graph/impls/onednn/utils.hpp | 4 +-- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index d530ca9db56..139d7af6367 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -104,27 +104,35 @@ protected: return args; } + template + static void set_activation_zero_points_attr(const std::shared_ptr& attrs, cldnn::data_node& node) { + int32_t zp_val = DNNL_RUNTIME_S32_VAL; + bool is_per_tensor = onednn::is_per_tensor(node, zp_val); + if (is_per_tensor) { + attrs->set_zero_points(DNNL_ARG_SRC, 0, {zp_val}); + } else { + memory::ptr s32_mem = onednn::convert_zp_data_to_s32(node.get_attached_memory_ptr()); + node.attach_memory(s32_mem, false); + attrs->set_zero_points(DNNL_ARG_SRC, 2, {DNNL_RUNTIME_S32_VAL}); + } + } + static std::shared_ptr get_primitive_attributes(const typed_program_node& arg) { auto attrs = arg.get_onednn_primitive_attributes(); if (arg.activations_zero_points_term()) { auto& a_zp = arg.activations_zero_points(); + auto a_zp_dtype = a_zp.get_output_layout().data_type; - memory::ptr s32_mem; - if (a_zp.get_output_layout().data_type == data_types::i8) { - onednn::make_per_tensor_if_possible::type>(a_zp.as()); - s32_mem = onednn::convert_zp_data_to_s32::type>(a_zp.as().get_attached_memory_ptr()); - } else if (a_zp.get_output_layout().data_type == data_types::u8) { - onednn::make_per_tensor_if_possible::type>(a_zp.as()); - s32_mem = onednn::convert_zp_data_to_s32::type>(a_zp.as().get_attached_memory_ptr()); - } else { + if (!data_type_traits::is_i8_u8(a_zp_dtype)) { throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution"); } - a_zp.as().attach_memory(s32_mem, false); - int mask = a_zp.get_output_layout().count() > 1 ? 2 : 0; - - attrs->set_zero_points(DNNL_ARG_SRC, mask, {DNNL_RUNTIME_S32_VAL}); + if (a_zp_dtype == data_types::i8) { + set_activation_zero_points_attr::type>(attrs, a_zp.as()); + } else { // if (a_zp_dtype == data_types::u8) + set_activation_zero_points_attr::type>(attrs, a_zp.as()); + } } if (arg.weights_zero_points_term()) { diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index a261531b0e0..08c44b2fb7a 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -497,7 +497,7 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) { } template -void make_per_tensor_if_possible(cldnn::data_node& node) { +bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) { auto ptr = node.get_attached_memory_ptr(); auto engine = ptr->get_engine(); auto& stream = engine->get_program_stream(); @@ -505,19 +505,19 @@ void make_per_tensor_if_possible(cldnn::data_node& node) { mem_lock old_data {ptr, stream}; auto val = old_data[0]; for (size_t i = 1; i < num_elems; i++) { - if (val != old_data[i]) - return; + if (val != old_data[i]) { + zp_val = DNNL_RUNTIME_S32_VAL; + return false; + } } - auto l = layout {node.get_output_layout().data_type, node.get_output_layout().format, tensor{1, 1, 1, 1}}; - auto new_mem = engine->allocate_memory(l); - mem_lock new_data{new_mem, stream}; - new_data[0] = val; - node.attach_memory(new_mem, false); + zp_val = val; + return true; } -template void make_per_tensor_if_possible(cldnn::data_node& node); -template void make_per_tensor_if_possible(cldnn::data_node& node); +template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); +template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); + } // namespace onednn } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp index 8338d3fa1dc..528b7eab7b8 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp @@ -35,9 +35,9 @@ cldnn::format find_format(dnnl::memory::desc desc, bool is_grouped = false); int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc); -// If the values in the tensor are identical, make it as per-tensor value +// Check if data node is per-tensor template -void make_per_tensor_if_possible(cldnn::data_node& node); +bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); } // namespace onednn } // namespace cldnn