set zero-point as immediate value (#12002)

2022-07-04 19:53:50 +09:00 · 2022-07-04 19:53:50 +09:00 · 195f5df2e8
commit 195f5df2e8
parent 88784c2b6f
3 changed files with 32 additions and 24 deletions
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp
@ -104,27 +104,35 @@ protected:
        return args;
    }

+    template <typename T>
+    static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs, cldnn::data_node& node) {
+        int32_t zp_val = DNNL_RUNTIME_S32_VAL;
+        bool is_per_tensor = onednn::is_per_tensor<T>(node, zp_val);
+        if (is_per_tensor) {
+            attrs->set_zero_points(DNNL_ARG_SRC, 0, {zp_val});
+        } else {
+            memory::ptr s32_mem = onednn::convert_zp_data_to_s32<T>(node.get_attached_memory_ptr());
+            node.attach_memory(s32_mem, false);
+            attrs->set_zero_points(DNNL_ARG_SRC, 2, {DNNL_RUNTIME_S32_VAL});
+        }
+    }
+
    static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg) {
        auto attrs = arg.get_onednn_primitive_attributes();

        if (arg.activations_zero_points_term()) {
            auto& a_zp = arg.activations_zero_points();
+            auto a_zp_dtype = a_zp.get_output_layout().data_type;

-            memory::ptr s32_mem;
-            if (a_zp.get_output_layout().data_type == data_types::i8) {
-                onednn::make_per_tensor_if_possible<data_type_to_type<data_types::i8>::type>(a_zp.as<data>());
-                s32_mem = onednn::convert_zp_data_to_s32<data_type_to_type<data_types::i8>::type>(a_zp.as<data>().get_attached_memory_ptr());
-            } else if (a_zp.get_output_layout().data_type == data_types::u8) {
-                onednn::make_per_tensor_if_possible<data_type_to_type<data_types::u8>::type>(a_zp.as<data>());
-                s32_mem = onednn::convert_zp_data_to_s32<data_type_to_type<data_types::u8>::type>(a_zp.as<data>().get_attached_memory_ptr());
-            } else {
+            if (!data_type_traits::is_i8_u8(a_zp_dtype)) {
                throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution");
            }
-            a_zp.as<data>().attach_memory(s32_mem, false);

-            int mask = a_zp.get_output_layout().count() > 1 ? 2 : 0;
-
-            attrs->set_zero_points(DNNL_ARG_SRC, mask, {DNNL_RUNTIME_S32_VAL});
+            if (a_zp_dtype == data_types::i8) {
+                set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>());
+            } else { // if (a_zp_dtype == data_types::u8)
+                set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>());
+            }
        }

        if (arg.weights_zero_points_term()) {
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp
@ -497,7 +497,7 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) {
 }

 template <typename T>
-void make_per_tensor_if_possible(cldnn::data_node& node) {
+bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) {
    auto ptr = node.get_attached_memory_ptr();
    auto engine = ptr->get_engine();
    auto& stream = engine->get_program_stream();
@ -505,19 +505,19 @@ void make_per_tensor_if_possible(cldnn::data_node& node) {
    mem_lock<T, mem_lock_type::read> old_data {ptr, stream};
    auto val = old_data[0];
    for (size_t i = 1; i < num_elems; i++) {
-        if (val != old_data[i])
-            return;
+        if (val != old_data[i]) {
+            zp_val = DNNL_RUNTIME_S32_VAL;
+            return false;
+        }
    }

-    auto l = layout {node.get_output_layout().data_type, node.get_output_layout().format, tensor{1, 1, 1, 1}};
-    auto new_mem = engine->allocate_memory(l);
-    mem_lock<T, mem_lock_type::write> new_data{new_mem, stream};
-    new_data[0] = val;
-    node.attach_memory(new_mem, false);
+    zp_val = val;
+    return true;
 }

-template void make_per_tensor_if_possible<int8_t>(cldnn::data_node& node);
-template void make_per_tensor_if_possible<uint8_t>(cldnn::data_node& node);
+template bool is_per_tensor<int8_t>(cldnn::data_node& node, int32_t& zp_val);
+template bool is_per_tensor<uint8_t>(cldnn::data_node& node, int32_t& zp_val);
+

 }  // namespace onednn
 }  // namespace cldnn
--- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp
+++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp
@ -35,9 +35,9 @@ cldnn::format find_format(dnnl::memory::desc desc, bool is_grouped = false);

 int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc);

-// If the values in the tensor are identical, make it as per-tensor value
+// Check if data node is per-tensor
 template <typename T>
-void make_per_tensor_if_possible(cldnn::data_node& node);
+bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val);

 }  // namespace onednn
 }  // namespace cldnn