set zero-point as immediate value (#12002)
This commit is contained in:
parent
88784c2b6f
commit
195f5df2e8
@ -104,27 +104,35 @@ protected:
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs, cldnn::data_node& node) {
|
||||||
|
int32_t zp_val = DNNL_RUNTIME_S32_VAL;
|
||||||
|
bool is_per_tensor = onednn::is_per_tensor<T>(node, zp_val);
|
||||||
|
if (is_per_tensor) {
|
||||||
|
attrs->set_zero_points(DNNL_ARG_SRC, 0, {zp_val});
|
||||||
|
} else {
|
||||||
|
memory::ptr s32_mem = onednn::convert_zp_data_to_s32<T>(node.get_attached_memory_ptr());
|
||||||
|
node.attach_memory(s32_mem, false);
|
||||||
|
attrs->set_zero_points(DNNL_ARG_SRC, 2, {DNNL_RUNTIME_S32_VAL});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg) {
|
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg) {
|
||||||
auto attrs = arg.get_onednn_primitive_attributes();
|
auto attrs = arg.get_onednn_primitive_attributes();
|
||||||
|
|
||||||
if (arg.activations_zero_points_term()) {
|
if (arg.activations_zero_points_term()) {
|
||||||
auto& a_zp = arg.activations_zero_points();
|
auto& a_zp = arg.activations_zero_points();
|
||||||
|
auto a_zp_dtype = a_zp.get_output_layout().data_type;
|
||||||
|
|
||||||
memory::ptr s32_mem;
|
if (!data_type_traits::is_i8_u8(a_zp_dtype)) {
|
||||||
if (a_zp.get_output_layout().data_type == data_types::i8) {
|
|
||||||
onednn::make_per_tensor_if_possible<data_type_to_type<data_types::i8>::type>(a_zp.as<data>());
|
|
||||||
s32_mem = onednn::convert_zp_data_to_s32<data_type_to_type<data_types::i8>::type>(a_zp.as<data>().get_attached_memory_ptr());
|
|
||||||
} else if (a_zp.get_output_layout().data_type == data_types::u8) {
|
|
||||||
onednn::make_per_tensor_if_possible<data_type_to_type<data_types::u8>::type>(a_zp.as<data>());
|
|
||||||
s32_mem = onednn::convert_zp_data_to_s32<data_type_to_type<data_types::u8>::type>(a_zp.as<data>().get_attached_memory_ptr());
|
|
||||||
} else {
|
|
||||||
throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution");
|
throw std::runtime_error("Unsupported data type for activations zero points for oneDNN convolution");
|
||||||
}
|
}
|
||||||
a_zp.as<data>().attach_memory(s32_mem, false);
|
|
||||||
|
|
||||||
int mask = a_zp.get_output_layout().count() > 1 ? 2 : 0;
|
if (a_zp_dtype == data_types::i8) {
|
||||||
|
set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>());
|
||||||
attrs->set_zero_points(DNNL_ARG_SRC, mask, {DNNL_RUNTIME_S32_VAL});
|
} else { // if (a_zp_dtype == data_types::u8)
|
||||||
|
set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arg.weights_zero_points_term()) {
|
if (arg.weights_zero_points_term()) {
|
||||||
|
@ -497,7 +497,7 @@ dnnl::algorithm convert_activation_func(cldnn::activation_func func) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void make_per_tensor_if_possible(cldnn::data_node& node) {
|
bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) {
|
||||||
auto ptr = node.get_attached_memory_ptr();
|
auto ptr = node.get_attached_memory_ptr();
|
||||||
auto engine = ptr->get_engine();
|
auto engine = ptr->get_engine();
|
||||||
auto& stream = engine->get_program_stream();
|
auto& stream = engine->get_program_stream();
|
||||||
@ -505,19 +505,19 @@ void make_per_tensor_if_possible(cldnn::data_node& node) {
|
|||||||
mem_lock<T, mem_lock_type::read> old_data {ptr, stream};
|
mem_lock<T, mem_lock_type::read> old_data {ptr, stream};
|
||||||
auto val = old_data[0];
|
auto val = old_data[0];
|
||||||
for (size_t i = 1; i < num_elems; i++) {
|
for (size_t i = 1; i < num_elems; i++) {
|
||||||
if (val != old_data[i])
|
if (val != old_data[i]) {
|
||||||
return;
|
zp_val = DNNL_RUNTIME_S32_VAL;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto l = layout {node.get_output_layout().data_type, node.get_output_layout().format, tensor{1, 1, 1, 1}};
|
zp_val = val;
|
||||||
auto new_mem = engine->allocate_memory(l);
|
return true;
|
||||||
mem_lock<T, mem_lock_type::write> new_data{new_mem, stream};
|
|
||||||
new_data[0] = val;
|
|
||||||
node.attach_memory(new_mem, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template void make_per_tensor_if_possible<int8_t>(cldnn::data_node& node);
|
template bool is_per_tensor<int8_t>(cldnn::data_node& node, int32_t& zp_val);
|
||||||
template void make_per_tensor_if_possible<uint8_t>(cldnn::data_node& node);
|
template bool is_per_tensor<uint8_t>(cldnn::data_node& node, int32_t& zp_val);
|
||||||
|
|
||||||
|
|
||||||
} // namespace onednn
|
} // namespace onednn
|
||||||
} // namespace cldnn
|
} // namespace cldnn
|
||||||
|
@ -35,9 +35,9 @@ cldnn::format find_format(dnnl::memory::desc desc, bool is_grouped = false);
|
|||||||
|
|
||||||
int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc);
|
int64_t get_f_offset(cldnn::layout&& l, dnnl::memory::desc&& desc);
|
||||||
|
|
||||||
// If the values in the tensor are identical, make it as per-tensor value
|
// Check if data node is per-tensor
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void make_per_tensor_if_possible(cldnn::data_node& node);
|
bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val);
|
||||||
|
|
||||||
} // namespace onednn
|
} // namespace onednn
|
||||||
} // namespace cldnn
|
} // namespace cldnn
|
||||||
|
Loading…
Reference in New Issue
Block a user