set zero-point as immediate value (#12002)

This commit is contained in:
Jade Cho 2022-07-04 19:53:50 +09:00 committed by GitHub
parent 88784c2b6f
commit 195f5df2e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 24 deletions

View File

@ -104,27 +104,35 @@ protected:
return args;
}
template <typename T>
static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs, cldnn::data_node& node) {
int32_t zp_val = DNNL_RUNTIME_S32_VAL;
bool is_per_tensor = onednn::is_per_tensor<T>(node, zp_val);
if (is_per_tensor) {
attrs->set_zero_points(DNNL_ARG_SRC, 0, {zp_val});
} else {
memory::ptr s32_mem = onednn::convert_zp_data_to_s32<T>(node.get_attached_memory_ptr());
node.attach_memory(s32_mem, false);
attrs->set_zero_points(DNNL_ARG_SRC, 2, {DNNL_RUNTIME_S32_VAL});
}
}
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg) {
auto attrs = arg.get_onednn_primitive_attributes();
if (arg.activations_zero_points_term()) {
auto& a_zp = arg.activations_zero_points();
auto a_zp_dtype = a_zp.get_output_layout().data_type;
memory::ptr s32_mem;
if (a_zp.get_output_layout().data_type == data_types::i8) {
onednn::make_per_tensor_if_possible<data_type_to_type<data_types::i8>::type>(a_zp.as<data>());
s32_mem = onednn::convert_zp_data_to_s32<data_type_to_type<data_types::i8>::type>(a_zp.as<data>().get_attached_memory_ptr());
} else if (a_zp.get_output_layout().data_type == data_types::u8) {
onednn::make_per_tensor_if_possible<data_type_to_type<data_types::u8>::type>(a_zp.as<data>());
s32_mem = onednn::convert_zp_data_to_s32<data_type_to_type<data_types::u8>::type>(a_zp.as<data>().get_attached_memory_ptr());
} else {
if (!data_type_traits::is_i8_u8(a_zp_dtype)) {
throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution");
}
a_zp.as<data>().attach_memory(s32_mem, false);
int mask = a_zp.get_output_layout().count() > 1 ? 2 : 0;
attrs->set_zero_points(DNNL_ARG_SRC, mask, {DNNL_RUNTIME_S32_VAL});
if (a_zp_dtype == data_types::i8) {
set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>());
} else { // if (a_zp_dtype == data_types::u8)
set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>());
}
}
if (arg.weights_zero_points_term()) {

View File

@ -497,7 +497,7 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) {
}
template <typename T>
void make_per_tensor_if_possible(cldnn::data_node& node) {
bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) {
auto ptr = node.get_attached_memory_ptr();
auto engine = ptr->get_engine();
auto& stream = engine->get_program_stream();
@ -505,19 +505,19 @@ void make_per_tensor_if_possible(cldnn::data_node& node) {
mem_lock<T, mem_lock_type::read> old_data {ptr, stream};
auto val = old_data[0];
for (size_t i = 1; i < num_elems; i++) {
if (val != old_data[i])
return;
if (val != old_data[i]) {
zp_val = DNNL_RUNTIME_S32_VAL;
return false;
}
}
auto l = layout {node.get_output_layout().data_type, node.get_output_layout().format, tensor{1, 1, 1, 1}};
auto new_mem = engine->allocate_memory(l);
mem_lock<T, mem_lock_type::write> new_data{new_mem, stream};
new_data[0] = val;
node.attach_memory(new_mem, false);
zp_val = val;
return true;
}
template void make_per_tensor_if_possible<int8_t>(cldnn::data_node& node);
template void make_per_tensor_if_possible<uint8_t>(cldnn::data_node& node);
template bool is_per_tensor<int8_t>(cldnn::data_node& node, int32_t& zp_val);
template bool is_per_tensor<uint8_t>(cldnn::data_node& node, int32_t& zp_val);
} // namespace onednn
} // namespace cldnn

View File

@ -35,9 +35,9 @@ cldnn::format find_format(dnnl::memory::desc desc, bool is_grouped = false);
int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc);
// If the values in the tensor are identical, make it as per-tensor value
// Check if data node is per-tensor
template <typename T>
void make_per_tensor_if_possible(cldnn::data_node& node);
bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val);
} // namespace onednn
} // namespace cldnn