[GPU] Serialization logic updates for OneDNN 3.0 (#15182)
* [GPU] The draft for integration oneDNN3.0 Initial PR. 1. Support oneDNN3.0 API 2. Use binary_mul post_opt instead of oscale channel-wise mask(2) 3. Disable some post-opt fusing because of no eltwise scale API eltw(non_linear)+eltw(linear), eltw+sum+eltw(linear) Signed-off-by: hyunback <hyunback.kim@intel.com> * Fix hardwish issue in 3.0 hard coded hardswish parameter(2.7) is changed alpha and beta from user's required input. Signed-off-by: hyunback <hyunback.kim@intel.com> * clean up code Signed-off-by: hyunback <hyunback.kim@intel.com> * Apply code review comment and fix ci issue Signed-off-by: hyunback <hyunback.kim@intel.com> * Remove setting dst scale - ACC issue - No perf gain compared binary_mul Signed-off-by: hyunback <hyunback.kim@intel.com> * gpu serialization for onednn 3.0 * missed changes * add onednn engine creator when loading model from cache * fixed to use mem_dep index * updated to save zero_point_mask for serialization * fixed onednn fc serialization logic * updated the logic to check if onednn is enabled --------- Signed-off-by: hyunback <hyunback.kim@intel.com> Co-authored-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
parent
bb18069f85
commit
c2518f1e4a
@ -50,9 +50,9 @@ protected:
|
||||
}
|
||||
|
||||
static std::shared_ptr<dnnl::concat::primitive_desc> get_concatenation_primitive_descriptor(const kernel_impl_params& impl_params,
|
||||
cldnn::engine& engine,
|
||||
const dnnl::primitive_attr& attr,
|
||||
const int64_t axis) {
|
||||
auto& engine = impl_params.prog->get_engine();
|
||||
std::vector<dnnl::memory::desc> input_mds;
|
||||
for (size_t i = 0; i < impl_params.input_layouts.size(); i++) {
|
||||
input_mds.push_back(onednn::layout_to_memory_desc(impl_params.get_input_layout(i)));
|
||||
@ -102,8 +102,8 @@ public:
|
||||
ib >> prim_axis;
|
||||
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
auto desc = get_concatenation_descriptor(*impl_params, prim_axis, ib.get_engine());
|
||||
_pd = *desc;
|
||||
auto prim_desc = get_concatenation_primitive_descriptor(*impl_params, ib.get_engine(), *_attrs, prim_axis);
|
||||
_pd = *prim_desc;
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
@ -119,7 +119,7 @@ public:
|
||||
return make_unique<concatenation_onednn>(engine, config);
|
||||
auto prim = impl_params.typed_desc<concatenation>();
|
||||
auto attr = arg.get_onednn_primitive_attributes();
|
||||
auto prim_desc = get_concatenation_primitive_descriptor(impl_params, *attr, prim->axis);
|
||||
auto prim_desc = get_concatenation_primitive_descriptor(impl_params, impl_params.prog->get_engine(), *attr, prim->axis);
|
||||
|
||||
return cldnn::make_unique<concatenation_onednn>(engine, config, attr, *prim_desc);
|
||||
}
|
||||
|
@ -83,16 +83,24 @@ protected:
|
||||
return args;
|
||||
}
|
||||
|
||||
int _zero_point_mask;
|
||||
void set_zero_point_mask(int zero_point_mask) {
|
||||
_zero_point_mask = zero_point_mask;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs, cldnn::data_node& node) {
|
||||
static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs,
|
||||
cldnn::data_node& node, int& zero_point_mask) {
|
||||
int32_t zp_val = DNNL_RUNTIME_S32_VAL;
|
||||
bool is_per_tensor = onednn::is_per_tensor<T>(node, zp_val);
|
||||
memory::ptr s32_mem = onednn::convert_zp_data_to_s32<T>(node.get_attached_memory_ptr());
|
||||
node.attach_memory(s32_mem, false);
|
||||
attrs->set_zero_points_mask(DNNL_ARG_SRC, is_per_tensor ? 0 : 2);
|
||||
zero_point_mask = is_per_tensor ? 0 : 2;
|
||||
attrs->set_zero_points_mask(DNNL_ARG_SRC, zero_point_mask);
|
||||
}
|
||||
|
||||
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg) {
|
||||
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg,
|
||||
int& zero_point_mask) {
|
||||
auto attrs = arg.get_onednn_primitive_attributes();
|
||||
|
||||
if (arg.activations_zero_points_term()) {
|
||||
@ -104,9 +112,9 @@ protected:
|
||||
}
|
||||
|
||||
if (a_zp_dtype == data_types::i8) {
|
||||
set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>());
|
||||
set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>(), zero_point_mask);
|
||||
} else { // if (a_zp_dtype == data_types::u8)
|
||||
set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>());
|
||||
set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>(), zero_point_mask);
|
||||
}
|
||||
}
|
||||
|
||||
@ -162,7 +170,16 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::save(ob);
|
||||
|
||||
ob << make_data(&_desc->data, sizeof(dnnl_convolution_desc_t));
|
||||
ob << _zero_point_mask;
|
||||
|
||||
const dnnl::convolution_forward::primitive_desc *typed_pd
|
||||
= reinterpret_cast<const dnnl::convolution_forward::primitive_desc *>(&_pd);
|
||||
|
||||
ob << typed_pd->get_strides();
|
||||
ob << typed_pd->get_dilations();
|
||||
ob << typed_pd->get_padding_l();
|
||||
ob << typed_pd->get_padding_r();
|
||||
ob << typed_pd->bias_desc().is_zero();
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
prim_cache = _prim.get_cache_blob();
|
||||
@ -174,16 +191,51 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::load(ib);
|
||||
|
||||
const char dummy_mem[sizeof(dnnl::convolution_forward::desc)] = {};
|
||||
const dnnl::convolution_forward::desc *dummy_opdesc
|
||||
= reinterpret_cast<const dnnl::convolution_forward::desc *>(&dummy_mem[0]);
|
||||
_desc = std::make_shared<dnnl::convolution_forward::desc>(std::move(*dummy_opdesc));
|
||||
ib >> make_data(&_desc->data, sizeof(dnnl_convolution_desc_t));
|
||||
ib >> _zero_point_mask;
|
||||
if (_zero_point_mask != -1) {
|
||||
_attrs->set_zero_points_mask(DNNL_ARG_SRC, _zero_point_mask);
|
||||
}
|
||||
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
|
||||
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
|
||||
auto weights_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(1), dnnl::memory::format_tag::any);
|
||||
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout(), dnnl::memory::format_tag::undef);
|
||||
|
||||
dnnl::memory::dims strides;
|
||||
dnnl::memory::dims dilates;
|
||||
dnnl::memory::dims padding_l;
|
||||
dnnl::memory::dims padding_r;
|
||||
ib >> strides;
|
||||
ib >> dilates;
|
||||
ib >> padding_l;
|
||||
ib >> padding_r;
|
||||
|
||||
bool zero_bias;
|
||||
ib >> zero_bias;
|
||||
|
||||
if (zero_bias) {
|
||||
auto prim_desc = std::make_shared<dnnl::convolution_forward::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct,
|
||||
input_md, weights_md, output_md,
|
||||
strides, dilates, padding_l, padding_r,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
} else {
|
||||
auto bias_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(2), dnnl::memory::format_tag::any, true);
|
||||
auto prim_desc = std::make_shared<dnnl::convolution_forward::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct,
|
||||
input_md, weights_md, bias_md, output_md,
|
||||
strides, dilates, padding_l, padding_r,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
||||
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
|
||||
_prim = dnnl::primitive(_pd, prim_cache);
|
||||
#endif
|
||||
}
|
||||
@ -191,11 +243,15 @@ public:
|
||||
static std::unique_ptr<primitive_impl> create(const convolution_node& arg, const kernel_impl_params& impl_params) {
|
||||
auto& engine = impl_params.prog->get_engine();
|
||||
auto& config = impl_params.prog->get_config();
|
||||
auto attr = get_primitive_attributes(arg);
|
||||
int zero_point_mask = -1;
|
||||
auto attr = get_primitive_attributes(arg, zero_point_mask);
|
||||
|
||||
auto prim_desc = get_convolution_primitive_descriptor(impl_params, *attr);
|
||||
|
||||
return cldnn::make_unique<convolution_onednn>(engine, config, attr, *prim_desc, get_weights_reorder(impl_params, *prim_desc, arg.get_transposed()));
|
||||
auto conv_onednn_impl = cldnn::make_unique<convolution_onednn>(engine, config, attr, *prim_desc,
|
||||
get_weights_reorder(impl_params, *prim_desc, arg.get_transposed()));
|
||||
conv_onednn_impl->set_zero_point_mask(zero_point_mask);
|
||||
return conv_onednn_impl;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -89,7 +89,14 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::save(ob);
|
||||
|
||||
ob << make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t));
|
||||
const dnnl::deconvolution_forward::primitive_desc *typed_pd
|
||||
= reinterpret_cast<const dnnl::deconvolution_forward::primitive_desc *>(&_pd);
|
||||
|
||||
ob << typed_pd->get_strides();
|
||||
ob << typed_pd->get_dilations();
|
||||
ob << typed_pd->get_padding_l();
|
||||
ob << typed_pd->get_padding_r();
|
||||
ob << typed_pd->bias_desc().is_zero();
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
prim_cache = _prim.get_cache_blob();
|
||||
@ -101,16 +108,46 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::load(ib);
|
||||
|
||||
const char dummy_mem[sizeof(dnnl::deconvolution_forward::desc)] = {};
|
||||
const dnnl::deconvolution_forward::desc *dummy_opdesc
|
||||
= reinterpret_cast<const dnnl::deconvolution_forward::desc *>(&dummy_mem[0]);
|
||||
_desc = std::make_shared<dnnl::deconvolution_forward::desc>(std::move(*dummy_opdesc));
|
||||
ib >> make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t));
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
|
||||
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
|
||||
auto weights_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(1), dnnl::memory::format_tag::any);
|
||||
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout(), dnnl::memory::format_tag::undef);
|
||||
|
||||
dnnl::memory::dims strides;
|
||||
dnnl::memory::dims dilates;
|
||||
dnnl::memory::dims padding_l;
|
||||
dnnl::memory::dims padding_r;
|
||||
ib >> strides;
|
||||
ib >> dilates;
|
||||
ib >> padding_l;
|
||||
ib >> padding_r;
|
||||
|
||||
bool zero_bias;
|
||||
ib >> zero_bias;
|
||||
|
||||
if (zero_bias) {
|
||||
auto prim_desc = std::make_shared<dnnl::deconvolution_forward::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct,
|
||||
input_md, weights_md, output_md,
|
||||
strides, dilates, padding_l, padding_r,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
} else {
|
||||
auto bias_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(2), dnnl::memory::format_tag::any, true);
|
||||
auto prim_desc = std::make_shared<dnnl::deconvolution_forward::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct,
|
||||
input_md, weights_md, bias_md, output_md,
|
||||
strides, dilates, padding_l, padding_r,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
||||
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
|
||||
_prim = dnnl::primitive(_pd, prim_cache);
|
||||
#endif
|
||||
}
|
||||
|
@ -96,10 +96,8 @@ protected:
|
||||
}
|
||||
|
||||
static std::shared_ptr<dnnl::inner_product_forward::primitive_desc> get_fully_connected_primitive_descriptor(const kernel_impl_params& impl_params,
|
||||
cldnn::engine& engine, size_t input_size, bool has_bias,
|
||||
const dnnl::primitive_attr& attr = dnnl::primitive_attr()) {
|
||||
auto& engine = impl_params.prog->get_engine();
|
||||
auto prim = impl_params.typed_desc<fully_connected>();
|
||||
|
||||
auto input_layout = impl_params.get_input_layout(0);
|
||||
auto weights_layout = impl_params.get_input_layout(1);
|
||||
auto output_layout = impl_params.get_output_layout();
|
||||
@ -107,24 +105,24 @@ protected:
|
||||
auto input_pshape = input_layout.get_partial_shape();
|
||||
auto weights_pshape = weights_layout.get_partial_shape();
|
||||
|
||||
int64_t feature = input_pshape[std::min(prim->input_size, static_cast<size_t>(4)) - 1].get_length();
|
||||
if (prim->input_size == 3) {
|
||||
int64_t feature = input_pshape[std::min(input_size, static_cast<size_t>(4)) - 1].get_length();
|
||||
if (input_size == 3) {
|
||||
feature = std::max({input_layout.spatial(0), input_layout.spatial(1), input_layout.spatial(2)});
|
||||
}
|
||||
|
||||
if (prim->input_size > 3) {
|
||||
if (input_size > 3) {
|
||||
input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature));
|
||||
}
|
||||
if (weights_pshape.size() != 2) {
|
||||
weights_layout.set_partial_shape(reshape_to_2d(weights_pshape, feature));
|
||||
}
|
||||
if (prim->input_size == 3) {
|
||||
if (input_size == 3) {
|
||||
output_layout.set_partial_shape({ input_layout.batch(), input_layout.feature(), weights_layout.batch(), 1 });
|
||||
} else {
|
||||
output_layout.set_partial_shape({ input_layout.batch(), weights_layout.batch() });
|
||||
}
|
||||
|
||||
if (prim->input_size == 3) {
|
||||
if (input_size == 3) {
|
||||
combine_bf_with_first_spatial_dim(input_layout);
|
||||
combine_bf_with_first_spatial_dim(output_layout);
|
||||
}
|
||||
@ -133,7 +131,7 @@ protected:
|
||||
auto weights_md = onednn::layout_to_memory_desc(weights_layout, dnnl::memory::format_tag::any);
|
||||
auto output_md = onednn::layout_to_memory_desc(output_layout, dnnl::memory::format_tag::ab, false);
|
||||
|
||||
if (!prim->bias.empty()) {
|
||||
if (has_bias) {
|
||||
auto bias_md = onednn::layout_to_memory_desc(impl_params.get_input_layout(2), dnnl::memory::format_tag::any, true);
|
||||
return std::make_shared<dnnl::inner_product_forward::primitive_desc>(
|
||||
engine.get_onednn_engine(),
|
||||
@ -159,7 +157,12 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::save(ob);
|
||||
|
||||
ob << make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t));
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
|
||||
auto prim = impl_params->typed_desc<fully_connected>();
|
||||
size_t input_size = prim->input_size;
|
||||
bool has_bias = !prim->bias.empty();
|
||||
ob << input_size;
|
||||
ob << has_bias;
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
prim_cache = _prim.get_cache_blob();
|
||||
@ -171,16 +174,18 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::load(ib);
|
||||
|
||||
const char dummy_mem[sizeof(dnnl::inner_product_forward::desc)] = {};
|
||||
const dnnl::inner_product_forward::desc *dummy_opdesc
|
||||
= reinterpret_cast<const dnnl::inner_product_forward::desc *>(&dummy_mem[0]);
|
||||
_desc = std::make_shared<dnnl::inner_product_forward::desc>(std::move(*dummy_opdesc));
|
||||
ib >> make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t));
|
||||
size_t input_size = 2;
|
||||
bool has_bias = false;
|
||||
ib >> input_size;
|
||||
ib >> has_bias;
|
||||
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
auto prim_desc = get_fully_connected_primitive_descriptor(*impl_params, ib.get_engine(), input_size, has_bias, *_attrs);
|
||||
_pd = *prim_desc;
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
||||
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
|
||||
_prim = dnnl::primitive(_pd, prim_cache);
|
||||
#endif
|
||||
}
|
||||
@ -189,7 +194,9 @@ public:
|
||||
auto& engine = impl_params.prog->get_engine();
|
||||
auto& config = impl_params.prog->get_config();
|
||||
auto attr = arg.get_onednn_primitive_attributes();
|
||||
auto prim_desc = get_fully_connected_primitive_descriptor(impl_params, *attr);
|
||||
auto prim = impl_params.typed_desc<fully_connected>();
|
||||
auto prim_desc = get_fully_connected_primitive_descriptor(impl_params, impl_params.prog->get_engine(),
|
||||
prim->input_size, !prim->bias.empty(), *attr);
|
||||
|
||||
return cldnn::make_unique<fully_connected_onednn>(engine, config, attr, *prim_desc, get_weights_reorder(impl_params, *prim_desc));
|
||||
}
|
||||
|
@ -53,11 +53,21 @@ protected:
|
||||
}
|
||||
}
|
||||
|
||||
static std::shared_ptr<dnnl::matmul::primitive_desc> get_gemm_primitive_descriptor(const kernel_impl_params& impl_params,
|
||||
const dnnl::primitive_attr& attr = dnnl::primitive_attr()) {
|
||||
auto& engine = impl_params.prog->get_engine();
|
||||
static void get_gemm_primitive_md(const kernel_impl_params& impl_params,
|
||||
dnnl::memory::data_type& in0_dt,
|
||||
dnnl::memory::data_type& in1_dt,
|
||||
dnnl::memory::data_type& out_dt,
|
||||
dnnl::memory::dims& in0_dims,
|
||||
dnnl::memory::dims& in1_dims,
|
||||
dnnl::memory::dims& out_dims,
|
||||
dnnl::memory::format_tag& in0_fmt,
|
||||
dnnl::memory::format_tag& in1_fmt,
|
||||
dnnl::memory::format_tag& out_fmt,
|
||||
bool gemm_with_bias,
|
||||
dnnl::memory::data_type& bias_dt,
|
||||
dnnl::memory::dims& bias_dims,
|
||||
dnnl::memory::format_tag& bias_fmt) {
|
||||
auto prim = impl_params.typed_desc<gemm>();
|
||||
auto gemm_with_bias = prim->dependencies().size() == 3;
|
||||
auto out_l = impl_params.get_output_layout();
|
||||
|
||||
std::vector<layout> in_layouts { impl_params.get_input_layout(0), impl_params.get_input_layout(1) };
|
||||
@ -84,17 +94,17 @@ protected:
|
||||
|
||||
size_t rank = cldnn::format::dimension(out_l.format);
|
||||
|
||||
dnnl::memory::data_type in0_dt = onednn::convert_data_type(in0_l.data_type);
|
||||
dnnl::memory::data_type in1_dt = onednn::convert_data_type(in1_l.data_type);
|
||||
dnnl::memory::data_type out_dt = onednn::convert_data_type(out_l.data_type);
|
||||
in0_dt = onednn::convert_data_type(in0_l.data_type);
|
||||
in1_dt = onednn::convert_data_type(in1_l.data_type);
|
||||
out_dt = onednn::convert_data_type(out_l.data_type);
|
||||
|
||||
dnnl::memory::dims in0_dims = onednn::convert_gemm_tensor(in0_l.get_tensor(), rank, batched_dims_can_be_removed);
|
||||
dnnl::memory::dims in1_dims = onednn::convert_gemm_tensor(in1_l.get_tensor(), rank, batched_dims_can_be_removed);
|
||||
dnnl::memory::dims out_dims = onednn::convert_gemm_tensor(out_l.get_tensor(), rank, batched_dims_can_be_removed);
|
||||
in0_dims = onednn::convert_gemm_tensor(in0_l.get_tensor(), rank, batched_dims_can_be_removed);
|
||||
in1_dims = onednn::convert_gemm_tensor(in1_l.get_tensor(), rank, batched_dims_can_be_removed);
|
||||
out_dims = onednn::convert_gemm_tensor(out_l.get_tensor(), rank, batched_dims_can_be_removed);
|
||||
|
||||
dnnl::memory::format_tag in0_fmt = onednn::convert_gemm_data_format(in0_dims);
|
||||
dnnl::memory::format_tag in1_fmt = onednn::convert_gemm_data_format(in1_dims);
|
||||
dnnl::memory::format_tag out_fmt = onednn::convert_gemm_data_format(out_dims);
|
||||
in0_fmt = onednn::convert_gemm_data_format(in0_dims);
|
||||
in1_fmt = onednn::convert_gemm_data_format(in1_dims);
|
||||
out_fmt = onednn::convert_gemm_data_format(out_dims);
|
||||
|
||||
if (prim->transpose_input0) {
|
||||
in0_fmt = transpose_format(in0_fmt);
|
||||
@ -106,16 +116,44 @@ protected:
|
||||
std::swap(in1_dims[in1_dims.size() - 1], in1_dims[in1_dims.size() - 2]);
|
||||
}
|
||||
|
||||
if (gemm_with_bias) {
|
||||
auto bias_l = impl_params.get_input_layout(2);
|
||||
auto bias_rank = cldnn::format::dimension(bias_l.format);
|
||||
bias_dt = onednn::convert_data_type(bias_l.data_type);
|
||||
bias_dims = onednn::convert_gemm_tensor(bias_l.get_tensor(), bias_rank, batched_dims_can_be_removed);
|
||||
bias_fmt = onednn::convert_gemm_data_format(bias_dims);
|
||||
}
|
||||
}
|
||||
|
||||
static std::shared_ptr<dnnl::matmul::primitive_desc> get_gemm_primitive_descriptor(const kernel_impl_params& impl_params,
|
||||
const dnnl::primitive_attr& attr = dnnl::primitive_attr()) {
|
||||
auto& engine = impl_params.prog->get_engine();
|
||||
auto prim = impl_params.typed_desc<gemm>();
|
||||
auto gemm_with_bias = prim->dependencies().size() == 3;
|
||||
|
||||
dnnl::memory::data_type in0_dt;
|
||||
dnnl::memory::data_type in1_dt;
|
||||
dnnl::memory::data_type out_dt;
|
||||
dnnl::memory::data_type bias_dt;
|
||||
|
||||
dnnl::memory::dims in0_dims;
|
||||
dnnl::memory::dims in1_dims;
|
||||
dnnl::memory::dims out_dims;
|
||||
dnnl::memory::dims bias_dims;
|
||||
|
||||
dnnl::memory::format_tag in0_fmt;
|
||||
dnnl::memory::format_tag in1_fmt;
|
||||
dnnl::memory::format_tag out_fmt;
|
||||
dnnl::memory::format_tag bias_fmt;
|
||||
|
||||
get_gemm_primitive_md(impl_params, in0_dt, in1_dt, out_dt, in0_dims, in1_dims, out_dims, in0_fmt, in1_fmt, out_fmt,
|
||||
gemm_with_bias, bias_dt, bias_dims, bias_fmt);
|
||||
|
||||
dnnl::memory::desc in0_md(in0_dims, in0_dt, in0_fmt);
|
||||
dnnl::memory::desc in1_md(in1_dims, in1_dt, in1_fmt);
|
||||
dnnl::memory::desc out_md(out_dims, out_dt, out_fmt);
|
||||
|
||||
if (gemm_with_bias) {
|
||||
auto bias_l = impl_params.get_input_layout(2);
|
||||
auto bias_rank = cldnn::format::dimension(bias_l.format);
|
||||
dnnl::memory::data_type bias_dt = onednn::convert_data_type(bias_l.data_type);
|
||||
dnnl::memory::dims bias_dims = onednn::convert_gemm_tensor(bias_l.get_tensor(), bias_rank, batched_dims_can_be_removed);
|
||||
dnnl::memory::format_tag bias_fmt = onednn::convert_gemm_data_format(bias_dims);
|
||||
dnnl::memory::desc bias_md(bias_dims, bias_dt, bias_fmt);
|
||||
|
||||
return std::make_shared<dnnl::matmul::primitive_desc>(
|
||||
@ -140,7 +178,47 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::save(ob);
|
||||
|
||||
ob << make_data(&_desc->data, sizeof(dnnl_matmul_desc_t));
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
|
||||
auto prim = impl_params->typed_desc<gemm>();
|
||||
bool gemm_with_bias = prim->dependencies().size() == 3;
|
||||
|
||||
ob << gemm_with_bias;
|
||||
|
||||
dnnl::memory::data_type in0_dt;
|
||||
dnnl::memory::data_type in1_dt;
|
||||
dnnl::memory::data_type out_dt;
|
||||
dnnl::memory::data_type bias_dt;
|
||||
|
||||
dnnl::memory::dims in0_dims;
|
||||
dnnl::memory::dims in1_dims;
|
||||
dnnl::memory::dims out_dims;
|
||||
dnnl::memory::dims bias_dims;
|
||||
|
||||
dnnl::memory::format_tag in0_fmt;
|
||||
dnnl::memory::format_tag in1_fmt;
|
||||
dnnl::memory::format_tag out_fmt;
|
||||
dnnl::memory::format_tag bias_fmt;
|
||||
|
||||
get_gemm_primitive_md(*impl_params, in0_dt, in1_dt, out_dt, in0_dims, in1_dims, out_dims, in0_fmt, in1_fmt, out_fmt,
|
||||
gemm_with_bias, bias_dt, bias_dims, bias_fmt);
|
||||
|
||||
ob << make_data(&in0_dt, sizeof(dnnl::memory::data_type));
|
||||
ob << make_data(&in1_dt, sizeof(dnnl::memory::data_type));
|
||||
ob << make_data(&out_dt, sizeof(dnnl::memory::data_type));
|
||||
|
||||
ob << in0_dims;
|
||||
ob << in1_dims;
|
||||
ob << out_dims;
|
||||
|
||||
ob << make_data(&in0_fmt, sizeof(dnnl::memory::format_tag));
|
||||
ob << make_data(&in1_fmt, sizeof(dnnl::memory::format_tag));
|
||||
ob << make_data(&out_fmt, sizeof(dnnl::memory::format_tag));
|
||||
|
||||
if (gemm_with_bias) {
|
||||
ob << make_data(&bias_dt, sizeof(dnnl::memory::data_type));
|
||||
ob << bias_dims;
|
||||
ob << make_data(&bias_fmt, sizeof(dnnl::memory::format_tag));
|
||||
}
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
prim_cache = _prim.get_cache_blob();
|
||||
@ -152,16 +230,72 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::load(ib);
|
||||
|
||||
const char dummy_mem[sizeof(dnnl::matmul::desc)] = {};
|
||||
const dnnl::matmul::desc *dummy_opdesc
|
||||
= reinterpret_cast<const dnnl::matmul::desc *>(&dummy_mem[0]);
|
||||
_desc = std::make_shared<dnnl::matmul::desc>(std::move(*dummy_opdesc));
|
||||
ib >> make_data(&_desc->data, sizeof(dnnl_matmul_desc_t));
|
||||
bool gemm_with_bias;
|
||||
ib >> gemm_with_bias;
|
||||
|
||||
dnnl::memory::data_type in0_dt;
|
||||
dnnl::memory::data_type in1_dt;
|
||||
dnnl::memory::data_type out_dt;
|
||||
dnnl::memory::data_type bias_dt;
|
||||
|
||||
dnnl::memory::dims in0_dims;
|
||||
dnnl::memory::dims in1_dims;
|
||||
dnnl::memory::dims out_dims;
|
||||
dnnl::memory::dims bias_dims;
|
||||
|
||||
dnnl::memory::format_tag in0_fmt;
|
||||
dnnl::memory::format_tag in1_fmt;
|
||||
dnnl::memory::format_tag out_fmt;
|
||||
dnnl::memory::format_tag bias_fmt;
|
||||
|
||||
ib >> make_data(&in0_dt, sizeof(dnnl::memory::data_type));
|
||||
ib >> make_data(&in1_dt, sizeof(dnnl::memory::data_type));
|
||||
ib >> make_data(&out_dt, sizeof(dnnl::memory::data_type));
|
||||
|
||||
ib >> in0_dims;
|
||||
ib >> in1_dims;
|
||||
ib >> out_dims;
|
||||
|
||||
ib >> make_data(&in0_fmt, sizeof(dnnl::memory::format_tag));
|
||||
ib >> make_data(&in1_fmt, sizeof(dnnl::memory::format_tag));
|
||||
ib >> make_data(&out_fmt, sizeof(dnnl::memory::format_tag));
|
||||
|
||||
if (gemm_with_bias) {
|
||||
ib >> make_data(&bias_dt, sizeof(dnnl::memory::data_type));
|
||||
ib >> bias_dims;
|
||||
ib >> make_data(&bias_fmt, sizeof(dnnl::memory::format_tag));
|
||||
}
|
||||
|
||||
dnnl::memory::desc in0_md(in0_dims, in0_dt, in0_fmt);
|
||||
dnnl::memory::desc in1_md(in1_dims, in1_dt, in1_fmt);
|
||||
dnnl::memory::desc out_md(out_dims, out_dt, out_fmt);
|
||||
|
||||
if (gemm_with_bias) {
|
||||
dnnl::memory::desc bias_md(bias_dims, bias_dt, bias_fmt);
|
||||
|
||||
auto prim_desc = std::make_shared<dnnl::matmul::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
in0_md,
|
||||
in1_md,
|
||||
bias_md,
|
||||
out_md,
|
||||
*_attrs.get());
|
||||
|
||||
_pd = *prim_desc;
|
||||
} else {
|
||||
auto prim_desc = std::make_shared<dnnl::matmul::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
in0_md,
|
||||
in1_md,
|
||||
out_md,
|
||||
*_attrs.get());
|
||||
|
||||
_pd = *prim_desc;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
||||
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
|
||||
_prim = dnnl::primitive(_pd, prim_cache);
|
||||
#endif
|
||||
}
|
||||
|
@ -87,7 +87,16 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::save(ob);
|
||||
|
||||
ob << make_data(&_desc->data, sizeof(dnnl_pooling_desc_t));
|
||||
const dnnl::pooling_forward::primitive_desc *typed_pd
|
||||
= reinterpret_cast<const dnnl::pooling_forward::primitive_desc *>(&_pd);
|
||||
|
||||
dnnl::algorithm alg = typed_pd->get_algorithm();
|
||||
ob << make_data(&alg, sizeof(dnnl::algorithm));
|
||||
ob << typed_pd->get_strides();
|
||||
ob << typed_pd->get_kernel();
|
||||
ob << typed_pd->get_dilations();
|
||||
ob << typed_pd->get_padding_l();
|
||||
ob << typed_pd->get_padding_r();
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
prim_cache = _prim.get_cache_blob();
|
||||
@ -99,16 +108,42 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::load(ib);
|
||||
|
||||
const char dummy_mem[sizeof(dnnl::pooling_forward::desc)] = {};
|
||||
const dnnl::pooling_forward::desc *dummy_opdesc
|
||||
= reinterpret_cast<const dnnl::pooling_forward::desc *>(&dummy_mem[0]);
|
||||
_desc = std::make_shared<dnnl::pooling_forward::desc>(std::move(*dummy_opdesc));
|
||||
ib >> make_data(&_desc->data, sizeof(dnnl_pooling_desc_t));
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
|
||||
dnnl::algorithm alg;
|
||||
ib >> make_data(&alg, sizeof(dnnl::algorithm));
|
||||
|
||||
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
|
||||
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
|
||||
|
||||
dnnl::memory::dims stride;
|
||||
dnnl::memory::dims kernel;
|
||||
dnnl::memory::dims dilation;
|
||||
dnnl::memory::dims pad_l;
|
||||
dnnl::memory::dims pad_r;
|
||||
ib >> stride;
|
||||
ib >> kernel;
|
||||
ib >> dilation;
|
||||
ib >> pad_l;
|
||||
ib >> pad_r;
|
||||
|
||||
auto prim_desc = std::make_shared<dnnl::pooling_forward::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
dnnl::prop_kind::forward_inference,
|
||||
alg,
|
||||
input_md,
|
||||
output_md,
|
||||
stride,
|
||||
kernel,
|
||||
dilation,
|
||||
pad_l,
|
||||
pad_r,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
||||
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
|
||||
_prim = dnnl::primitive(_pd, prim_cache);
|
||||
#endif
|
||||
}
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#define ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
|
||||
#include "primitive_inst.h"
|
||||
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
|
||||
#include "intel_gpu/runtime/error_handler.hpp"
|
||||
@ -74,47 +76,23 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
// [ dnnl::cache_blob ]
|
||||
void save(BinaryOutputBuffer& ob) const override {
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
if (_attrs.get() == nullptr) {
|
||||
if (_attrs->get() == nullptr) {
|
||||
ob << false;
|
||||
} else {
|
||||
ob << true;
|
||||
}
|
||||
|
||||
if (_attrs.get() != nullptr) {
|
||||
if (_attrs->get() != nullptr) {
|
||||
{
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
std::vector<int32_t> zero_points;
|
||||
|
||||
_attrs.get()->get_output_scales(mask, scales);
|
||||
ob << mask << scales;
|
||||
|
||||
scales.clear();
|
||||
_attrs.get()->get_scales(DNNL_ARG_SRC_0, mask, scales);
|
||||
ob << mask << scales;
|
||||
scales.clear();
|
||||
_attrs.get()->get_scales(DNNL_ARG_SRC_1, mask, scales);
|
||||
ob << mask << scales;
|
||||
|
||||
_attrs.get()->get_zero_points(DNNL_ARG_SRC, mask, zero_points);
|
||||
ob << mask << zero_points;
|
||||
zero_points.clear();
|
||||
_attrs.get()->get_zero_points(DNNL_ARG_WEIGHTS, mask, zero_points);
|
||||
ob << mask << zero_points;
|
||||
zero_points.clear();
|
||||
_attrs.get()->get_zero_points(DNNL_ARG_DST, mask, zero_points);
|
||||
ob << mask << zero_points;
|
||||
}
|
||||
{
|
||||
dnnl::scratchpad_mode _scratchpad_mode = _attrs.get()->get_scratchpad_mode();
|
||||
dnnl::scratchpad_mode _scratchpad_mode = _attrs->get_scratchpad_mode();
|
||||
ob << make_data(&_scratchpad_mode, sizeof(dnnl::scratchpad_mode));
|
||||
}
|
||||
{
|
||||
dnnl::fpmath_mode _fmath_mode = _attrs.get()->get_fpmath_mode();
|
||||
dnnl::fpmath_mode _fmath_mode = _attrs->get_fpmath_mode();
|
||||
ob << make_data(&_fmath_mode, sizeof(dnnl::fpmath_mode));
|
||||
}
|
||||
{
|
||||
const dnnl::post_ops _post_ops = _attrs.get()->get_post_ops();
|
||||
const dnnl::post_ops _post_ops = _attrs->get_post_ops();
|
||||
|
||||
ob << _post_ops.len();
|
||||
for (int idx = 0; idx < _post_ops.len(); ++idx) {
|
||||
@ -133,13 +111,11 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
ob << zero_point;
|
||||
ob << make_data(&data_type, sizeof(dnnl::memory::data_type));
|
||||
} else if (_kind == dnnl::primitive::kind::eltwise) {
|
||||
float scale;
|
||||
dnnl::algorithm aalgorithm;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
_post_ops.get_params_eltwise(idx, scale, aalgorithm, alpha, beta);
|
||||
ob << scale;
|
||||
_post_ops.get_params_eltwise(idx, aalgorithm, alpha, beta);
|
||||
ob << make_data(&aalgorithm, sizeof(dnnl::algorithm));
|
||||
ob << alpha;
|
||||
ob << beta;
|
||||
@ -147,24 +123,16 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
dnnl::memory::data_type weights_data_type;
|
||||
dnnl::memory::data_type bias_data_type;
|
||||
dnnl::memory::data_type dst_data_type;
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
dnnl::memory::dim kernel_size;
|
||||
dnnl::memory::dim stride_size;
|
||||
dnnl::memory::dim padding_l_size;
|
||||
|
||||
try {
|
||||
_post_ops.get_params_dw_k3s1p1(idx, weights_data_type, bias_data_type, dst_data_type, mask, scales);
|
||||
int stride = 1;
|
||||
ob << stride;
|
||||
} catch (...) {
|
||||
_post_ops.get_params_dw_k3s2p1(idx, weights_data_type, bias_data_type, dst_data_type, mask, scales);
|
||||
int stride = 2;
|
||||
ob << stride;
|
||||
}
|
||||
_post_ops.get_params_dw(idx, weights_data_type, bias_data_type, dst_data_type, kernel_size, stride_size, padding_l_size);
|
||||
|
||||
ob << make_data(&weights_data_type, sizeof(dnnl::memory::data_type));
|
||||
ob << make_data(&bias_data_type, sizeof(dnnl::memory::data_type));
|
||||
ob << make_data(&dst_data_type, sizeof(dnnl::memory::data_type));
|
||||
ob << mask;
|
||||
ob << scales;
|
||||
ob << kernel_size << stride_size << padding_l_size;
|
||||
} else if (_kind == dnnl::primitive::kind::binary) {
|
||||
dnnl::algorithm aalgorithm;
|
||||
dnnl::memory::desc src1_desc;
|
||||
@ -172,7 +140,6 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
_post_ops.get_params_binary(idx, aalgorithm, src1_desc);
|
||||
|
||||
ob << make_data(&aalgorithm, sizeof(dnnl::algorithm));
|
||||
ob << make_data(&src1_desc, sizeof(dnnl::memory::desc));
|
||||
} else if (_kind == dnnl::primitive::kind::prelu) {
|
||||
int mask;
|
||||
|
||||
@ -184,14 +151,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
}
|
||||
{
|
||||
float scale, shift;
|
||||
_attrs.get()->get_rnn_data_qparams(scale, shift);
|
||||
_attrs->get_rnn_data_qparams(scale, shift);
|
||||
ob << scale << shift;
|
||||
}
|
||||
{
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
|
||||
_attrs.get()->get_rnn_weights_qparams(mask, scales);
|
||||
_attrs->get_rnn_weights_qparams(mask, scales);
|
||||
|
||||
ob << mask;
|
||||
ob << scales;
|
||||
@ -200,7 +167,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
|
||||
_attrs.get()->get_rnn_weights_projection_qparams(mask, scales);
|
||||
_attrs->get_rnn_weights_projection_qparams(mask, scales);
|
||||
|
||||
ob << mask;
|
||||
ob << scales;
|
||||
@ -216,71 +183,31 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
|
||||
if (has_attrs) {
|
||||
{
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
ib >> mask >> scales;
|
||||
|
||||
_attrs.get()->set_output_scales(mask, scales);
|
||||
}
|
||||
{
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
bool default_output_scales = true;
|
||||
|
||||
_attrs.get()->get_output_scales(mask, scales);
|
||||
for (float scale : scales) {
|
||||
if (scale != 1.) {
|
||||
default_output_scales = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
scales.clear();
|
||||
ib >> mask >> scales;
|
||||
if (default_output_scales)
|
||||
_attrs.get()->set_scales(DNNL_ARG_SRC_0, mask, scales);
|
||||
scales.clear();
|
||||
ib >> mask >> scales;
|
||||
if (default_output_scales)
|
||||
_attrs.get()->set_scales(DNNL_ARG_SRC_1, mask, scales);
|
||||
}
|
||||
{
|
||||
int mask;
|
||||
std::vector<int32_t> zero_points;
|
||||
ib >> mask >> zero_points;
|
||||
_attrs.get()->set_zero_points(DNNL_ARG_SRC, mask, zero_points);
|
||||
zero_points.clear();
|
||||
ib >> mask >> zero_points;
|
||||
_attrs.get()->set_zero_points(DNNL_ARG_WEIGHTS, mask, zero_points);
|
||||
zero_points.clear();
|
||||
ib >> mask >> zero_points;
|
||||
_attrs.get()->set_zero_points(DNNL_ARG_DST, mask, zero_points);
|
||||
}
|
||||
{
|
||||
dnnl::scratchpad_mode _scratchpad_mode;
|
||||
dnnl::scratchpad_mode _scratchpad_mode = dnnl::scratchpad_mode::library;
|
||||
ib >> make_data(&_scratchpad_mode, sizeof(dnnl::scratchpad_mode));
|
||||
_attrs.get()->set_scratchpad_mode(_scratchpad_mode);
|
||||
_attrs->set_scratchpad_mode(_scratchpad_mode);
|
||||
}
|
||||
{
|
||||
dnnl::fpmath_mode _fmath_mode;
|
||||
dnnl::fpmath_mode _fmath_mode = dnnl::fpmath_mode::any;
|
||||
ib >> make_data(&_fmath_mode, sizeof(dnnl::fpmath_mode));
|
||||
_attrs.get()->set_fpmath_mode(_fmath_mode);
|
||||
_attrs->set_fpmath_mode(_fmath_mode);
|
||||
}
|
||||
{
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
const std::vector<cldnn::fused_primitive_desc_onednn>& fused_desc = impl_params->fused_desc_onednn;
|
||||
dnnl::post_ops _post_ops;
|
||||
|
||||
int post_ops_len;
|
||||
|
||||
ib >> post_ops_len;
|
||||
for (int idx = 0; idx < post_ops_len; ++idx) {
|
||||
dnnl::primitive::kind _kind;
|
||||
dnnl::primitive::kind _kind = dnnl::primitive::kind::undef;
|
||||
|
||||
ib >> make_data(&_kind, sizeof(dnnl::primitive::kind));
|
||||
|
||||
if (_kind == dnnl::primitive::kind::sum) {
|
||||
float scale;
|
||||
int32_t zero_point;
|
||||
dnnl::memory::data_type data_type;
|
||||
dnnl::memory::data_type data_type = dnnl::memory::data_type::undef;
|
||||
|
||||
ib >> scale;
|
||||
ib >> zero_point;
|
||||
@ -288,44 +215,38 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
|
||||
_post_ops.append_sum(scale, zero_point, data_type);
|
||||
} else if (_kind == dnnl::primitive::kind::eltwise) {
|
||||
float scale;
|
||||
dnnl::algorithm aalgorithm;
|
||||
dnnl::algorithm aalgorithm = dnnl::algorithm::undef;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
ib >> scale;
|
||||
ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm));
|
||||
ib >> alpha;
|
||||
ib >> beta;
|
||||
_post_ops.append_eltwise(scale, aalgorithm, alpha, beta);
|
||||
_post_ops.append_eltwise(aalgorithm, alpha, beta);
|
||||
} else if (_kind == dnnl::primitive::kind::convolution) {
|
||||
int stride;
|
||||
dnnl::memory::data_type weights_data_type;
|
||||
dnnl::memory::data_type bias_data_type;
|
||||
dnnl::memory::data_type dst_data_type;
|
||||
int mask;
|
||||
std::vector<float> scales;
|
||||
dnnl::memory::data_type weights_data_type = dnnl::memory::data_type::undef;
|
||||
dnnl::memory::data_type bias_data_type = dnnl::memory::data_type::undef;
|
||||
dnnl::memory::data_type dst_data_type = dnnl::memory::data_type::undef;
|
||||
dnnl::memory::dim kernel_size;
|
||||
dnnl::memory::dim stride_size;
|
||||
dnnl::memory::dim padding_l_size;
|
||||
|
||||
ib >> stride;
|
||||
ib >> make_data(&weights_data_type, sizeof(dnnl::memory::data_type));
|
||||
ib >> make_data(&bias_data_type, sizeof(dnnl::memory::data_type));
|
||||
ib >> make_data(&dst_data_type, sizeof(dnnl::memory::data_type));
|
||||
ib >> mask;
|
||||
ib >> scales;
|
||||
ib >> kernel_size >> stride_size >> padding_l_size;
|
||||
|
||||
if (stride == 1) {
|
||||
_post_ops.append_dw_k3s1p1(weights_data_type, bias_data_type, dst_data_type, mask, scales);
|
||||
} else {
|
||||
_post_ops.append_dw_k3s2p1(weights_data_type, bias_data_type, dst_data_type, mask, scales);
|
||||
}
|
||||
_post_ops.append_dw(weights_data_type, bias_data_type, dst_data_type,
|
||||
kernel_size, stride_size, padding_l_size);
|
||||
} else if (_kind == dnnl::primitive::kind::binary) {
|
||||
dnnl::algorithm aalgorithm;
|
||||
dnnl::memory::desc src1_desc;
|
||||
|
||||
dnnl::algorithm aalgorithm = dnnl::algorithm::undef;
|
||||
ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm));
|
||||
ib >> make_data(&src1_desc, sizeof(dnnl::memory::desc));
|
||||
|
||||
_post_ops.append_binary(aalgorithm, src1_desc);
|
||||
dnnl::memory::desc md = onednn::layout_to_memory_desc(
|
||||
impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
|
||||
fused_desc.at(idx).tag, fused_desc.at(idx).flatten);
|
||||
|
||||
_post_ops.append_binary(aalgorithm, md);
|
||||
} else if (_kind == dnnl::primitive::kind::prelu) {
|
||||
int mask;
|
||||
ib >> mask;
|
||||
@ -333,14 +254,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
}
|
||||
}
|
||||
|
||||
_attrs.get()->set_post_ops(_post_ops);
|
||||
_attrs->set_post_ops(_post_ops);
|
||||
}
|
||||
{
|
||||
float scale;
|
||||
float shift;
|
||||
|
||||
ib >> scale >> shift;
|
||||
_attrs.get()->set_rnn_data_qparams(scale, shift);
|
||||
_attrs->set_rnn_data_qparams(scale, shift);
|
||||
}
|
||||
{
|
||||
int mask;
|
||||
@ -349,7 +270,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
ib >> mask;
|
||||
ib >> scales;
|
||||
|
||||
_attrs.get()->set_rnn_weights_qparams(mask, scales);
|
||||
_attrs->set_rnn_weights_qparams(mask, scales);
|
||||
}
|
||||
{
|
||||
int mask;
|
||||
@ -358,7 +279,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
ib >> mask;
|
||||
ib >> scales;
|
||||
|
||||
_attrs.get()->set_rnn_weights_projection_qparams(mask, scales);
|
||||
_attrs->set_rnn_weights_projection_qparams(mask, scales);
|
||||
}
|
||||
|
||||
_engine = &ib.get_engine();
|
||||
|
@ -101,7 +101,13 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::save(ob);
|
||||
|
||||
ob << make_data(&_desc->data, sizeof(dnnl_reduction_desc_t));
|
||||
const dnnl::reduction::primitive_desc *typed_pd
|
||||
= reinterpret_cast<const dnnl::reduction::primitive_desc *>(&_pd);
|
||||
|
||||
dnnl::algorithm alg = typed_pd->get_algorithm();
|
||||
ob << make_data(&alg, sizeof(dnnl::algorithm));
|
||||
ob << typed_pd->get_p();
|
||||
ob << typed_pd->get_epsilon();
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
prim_cache = _prim.get_cache_blob();
|
||||
@ -113,13 +119,30 @@ public:
|
||||
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
|
||||
parent::load(ib);
|
||||
|
||||
_desc = std::make_shared<dnnl::reduction::desc>();
|
||||
ib >> make_data(&_desc->data, sizeof(dnnl_reduction_desc_t));
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
|
||||
dnnl::algorithm alg;
|
||||
ib >> make_data(&alg, sizeof(dnnl::algorithm));
|
||||
|
||||
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
|
||||
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
|
||||
|
||||
float p, eps;
|
||||
ib >> p >> eps;
|
||||
|
||||
auto prim_desc = std::make_shared<dnnl::reduction::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
alg,
|
||||
input_md,
|
||||
output_md,
|
||||
p,
|
||||
eps,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
||||
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
|
||||
_prim = dnnl::primitive(_pd, prim_cache);
|
||||
#endif
|
||||
}
|
||||
|
@ -78,8 +78,17 @@ public:
|
||||
parent::load(ib);
|
||||
|
||||
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
|
||||
auto desc = get_reorder_descriptor(*impl_params, *_attrs, ib.get_engine());
|
||||
_pd = *desc;
|
||||
|
||||
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
|
||||
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
|
||||
|
||||
auto prim_desc = std::make_shared<dnnl::reorder::primitive_desc>(
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
input_md,
|
||||
ib.get_engine().get_onednn_engine(),
|
||||
output_md,
|
||||
*_attrs.get());
|
||||
_pd = *prim_desc;
|
||||
|
||||
std::vector<uint8_t> prim_cache;
|
||||
ib >> prim_cache;
|
||||
|
@ -95,6 +95,8 @@ struct fused_primitive_desc_onednn {
|
||||
onednn_post_op_type op_type; // onednn post-operation type
|
||||
size_t mem_offset; // index of a memory buffer for current post-operation
|
||||
size_t mem_dep; // memory dependency for working with fused node
|
||||
dnnl::memory::format_tag tag;
|
||||
bool flatten;
|
||||
};
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
} // namespace cldnn
|
||||
|
@ -1162,6 +1162,8 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
|
||||
}
|
||||
|
||||
void kernel_impl_params::load(BinaryInputBuffer& ib) {
|
||||
prog = nullptr;
|
||||
desc = nullptr;
|
||||
ib >> has_runtime_layouts;
|
||||
ib >> unique_id;
|
||||
ib >> input_layouts;
|
||||
|
@ -1077,17 +1077,6 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
_impl_params->save(ob);
|
||||
ob.setKernlImplParams(_impl_params.get());
|
||||
|
||||
if (_impl != nullptr) {
|
||||
ob << true;
|
||||
kernel_arguments_data args = _impl->get_arguments(*this);
|
||||
kernel_arguments_data_idx args_idx;
|
||||
convert_args(args, args_idx);
|
||||
_impl->set_arguments(args_idx);
|
||||
ob << _impl;
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
|
||||
ob << _node_output_layout;
|
||||
ob << has_mutable_input();
|
||||
ob << mem_allocated();
|
||||
@ -1140,6 +1129,17 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
|
||||
const auto _allocation_type = ibuf->get_allocation_type();
|
||||
ob << make_data(&_allocation_type, sizeof(_allocation_type));
|
||||
}
|
||||
|
||||
if (_impl != nullptr) {
|
||||
ob << true;
|
||||
kernel_arguments_data args = _impl->get_arguments(*this);
|
||||
kernel_arguments_data_idx args_idx;
|
||||
convert_args(args, args_idx);
|
||||
_impl->set_arguments(args_idx);
|
||||
ob << _impl;
|
||||
} else {
|
||||
ob << false;
|
||||
}
|
||||
}
|
||||
|
||||
void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const {
|
||||
@ -1185,13 +1185,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
_impl_params->load(ib);
|
||||
ib.setKernlImplParams(_impl_params.get());
|
||||
|
||||
bool has_impl;
|
||||
ib >> has_impl;
|
||||
if (has_impl) {
|
||||
_impl.release();
|
||||
ib >> _impl;
|
||||
}
|
||||
|
||||
ib >> _node_output_layout;
|
||||
ib >> _has_mutable_input;
|
||||
ib >> _mem_allocated;
|
||||
@ -1268,5 +1261,12 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
|
||||
|
||||
_intermediates_memory[i] = get_network().get_engine().allocate_memory(ibuf_layout, _allocation_type);
|
||||
}
|
||||
|
||||
bool has_impl;
|
||||
ib >> has_impl;
|
||||
if (has_impl) {
|
||||
_impl.release();
|
||||
ib >> _impl;
|
||||
}
|
||||
}
|
||||
} // namespace cldnn
|
||||
|
@ -897,8 +897,10 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
size_t empty_mem = 0xff;
|
||||
|
||||
// Add information about post-operation into the list, update indices
|
||||
auto update_onednn_post_op_list = [&](onednn_post_op_type type, size_t m_dep) {
|
||||
fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep };
|
||||
auto update_onednn_post_op_list = [&](onednn_post_op_type type, size_t m_dep,
|
||||
dnnl::memory::format_tag tag = dnnl::memory::format_tag::undef,
|
||||
bool flatten = false) {
|
||||
fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep, tag, flatten };
|
||||
fused_ops.push_back(cur_op_desc);
|
||||
|
||||
auto has_memory_buffers = type == onednn_post_op_type::binary_add ||
|
||||
@ -970,7 +972,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
} else if (desc.typed_desc<eltwise>()->mode == eltwise_mode::prod) {
|
||||
dnnl::memory::desc in_desc = onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_mul, in_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx, dnnl::memory::format_tag::ab, true);
|
||||
} else {
|
||||
std::stringstream error_msg;
|
||||
error_msg << "Unsupported eltwise mode: " << static_cast<int>(desc.typed_desc<eltwise>()->mode) << ". ";
|
||||
@ -996,7 +998,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto in_scale = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
|
||||
if (q_param->has_pre_shift) {
|
||||
@ -1007,7 +1009,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto in_shift = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1038,7 +1040,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto out_scale = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1050,7 +1052,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto out_shift = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1078,9 +1080,9 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
dnnl::memory::desc in_hi_desc = onednn::layout_to_memory_desc(in_hi, dnnl::memory::format_tag::ab, true);
|
||||
|
||||
post_ops.append_binary(clamp_max, in_lo_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_max, dep_idx - 2);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_max, dep_idx - 2, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(clamp_min, in_hi_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_min, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_min, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1097,7 +1099,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto in_scale = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
|
||||
if (q_param->has_pre_shift) {
|
||||
@ -1108,7 +1110,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto in_shift = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1135,7 +1137,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto out_scale = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1147,7 +1149,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
auto out_shift = get_dependency(dep_idx++).get_output_layout();
|
||||
dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, dnnl::memory::format_tag::ab, true);
|
||||
post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,6 +66,16 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
|
||||
if (m_program->m_max_batch > 1)
|
||||
m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));
|
||||
|
||||
bool need_onednn_engine = false;
|
||||
ib >> need_onednn_engine;
|
||||
if (need_onednn_engine) {
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
get_engine().create_onednn_engine(config);
|
||||
#else
|
||||
IE_THROW() << "[GPU] Current model cache requires OneDNN, but cannot use it.";
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
}
|
||||
|
||||
ib >> m_program->inputLayouts;
|
||||
ib >> primitiveIDs;
|
||||
ib >> outputDims;
|
||||
@ -470,6 +480,17 @@ std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::v
|
||||
// [ ov::intel_gpu::Graph::outputDims ]
|
||||
// [ cldnn::network ]
|
||||
void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
|
||||
bool need_onednn_engine = false;
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
try {
|
||||
get_engine().get_onednn_engine();
|
||||
need_onednn_engine = true;
|
||||
} catch (ov::AssertFailure &) {
|
||||
need_onednn_engine = false;
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
ob << need_onednn_engine;
|
||||
|
||||
ob << m_program->inputLayouts;
|
||||
ob << primitiveIDs;
|
||||
ob << outputDims;
|
||||
|
Loading…
Reference in New Issue
Block a user