[GPU] Serialization logic updates for OneDNN 3.0 (#15182)

* [GPU] The draft for integration oneDNN3.0

Initial PR.

1. Support oneDNN3.0 API
2. Use binary_mul post_opt instead of oscale channel-wise mask(2)
3. Disable some post-opt fusing because of no eltwise scale API
    eltw(non_linear)+eltw(linear), eltw+sum+eltw(linear)

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Fix hardwish issue in 3.0

hard coded hardswish parameter(2.7) is changed alpha and beta from user's required input.

Signed-off-by: hyunback <hyunback.kim@intel.com>

* clean up code

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Apply code review comment and fix ci issue

Signed-off-by: hyunback <hyunback.kim@intel.com>

* Remove setting dst scale

- ACC issue
- No perf gain compared binary_mul

Signed-off-by: hyunback <hyunback.kim@intel.com>

* gpu serialization for onednn 3.0

* missed changes

* add onednn engine creator when loading model from cache

* fixed to use mem_dep index

* updated to save zero_point_mask for serialization

* fixed onednn fc serialization logic

* updated the logic to check if onednn is enabled

---------

Signed-off-by: hyunback <hyunback.kim@intel.com>
Co-authored-by: hyunback <hyunback.kim@intel.com>
This commit is contained in:
Eddy Kim 2023-01-31 02:41:25 +09:00 committed by GitHub
parent bb18069f85
commit c2518f1e4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 484 additions and 235 deletions

View File

@ -50,9 +50,9 @@ protected:
}
static std::shared_ptr<dnnl::concat::primitive_desc> get_concatenation_primitive_descriptor(const kernel_impl_params& impl_params,
cldnn::engine& engine,
const dnnl::primitive_attr& attr,
const int64_t axis) {
auto& engine = impl_params.prog->get_engine();
std::vector<dnnl::memory::desc> input_mds;
for (size_t i = 0; i < impl_params.input_layouts.size(); i++) {
input_mds.push_back(onednn::layout_to_memory_desc(impl_params.get_input_layout(i)));
@ -102,8 +102,8 @@ public:
ib >> prim_axis;
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
auto desc = get_concatenation_descriptor(*impl_params, prim_axis, ib.get_engine());
_pd = *desc;
auto prim_desc = get_concatenation_primitive_descriptor(*impl_params, ib.get_engine(), *_attrs, prim_axis);
_pd = *prim_desc;
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
@ -119,7 +119,7 @@ public:
return make_unique<concatenation_onednn>(engine, config);
auto prim = impl_params.typed_desc<concatenation>();
auto attr = arg.get_onednn_primitive_attributes();
auto prim_desc = get_concatenation_primitive_descriptor(impl_params, *attr, prim->axis);
auto prim_desc = get_concatenation_primitive_descriptor(impl_params, impl_params.prog->get_engine(), *attr, prim->axis);
return cldnn::make_unique<concatenation_onednn>(engine, config, attr, *prim_desc);
}

View File

@ -83,16 +83,24 @@ protected:
return args;
}
int _zero_point_mask;
void set_zero_point_mask(int zero_point_mask) {
_zero_point_mask = zero_point_mask;
}
template <typename T>
static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs, cldnn::data_node& node) {
static void set_activation_zero_points_attr(const std::shared_ptr<dnnl::primitive_attr>& attrs,
cldnn::data_node& node, int& zero_point_mask) {
int32_t zp_val = DNNL_RUNTIME_S32_VAL;
bool is_per_tensor = onednn::is_per_tensor<T>(node, zp_val);
memory::ptr s32_mem = onednn::convert_zp_data_to_s32<T>(node.get_attached_memory_ptr());
node.attach_memory(s32_mem, false);
attrs->set_zero_points_mask(DNNL_ARG_SRC, is_per_tensor ? 0 : 2);
zero_point_mask = is_per_tensor ? 0 : 2;
attrs->set_zero_points_mask(DNNL_ARG_SRC, zero_point_mask);
}
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg) {
static std::shared_ptr<dnnl::primitive_attr> get_primitive_attributes(const typed_program_node<convolution>& arg,
int& zero_point_mask) {
auto attrs = arg.get_onednn_primitive_attributes();
if (arg.activations_zero_points_term()) {
@ -104,9 +112,9 @@ protected:
}
if (a_zp_dtype == data_types::i8) {
set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>());
set_activation_zero_points_attr<data_type_to_type<data_types::i8>::type>(attrs, a_zp.as<data>(), zero_point_mask);
} else { // if (a_zp_dtype == data_types::u8)
set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>());
set_activation_zero_points_attr<data_type_to_type<data_types::u8>::type>(attrs, a_zp.as<data>(), zero_point_mask);
}
}
@ -162,7 +170,16 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::save(ob);
ob << make_data(&_desc->data, sizeof(dnnl_convolution_desc_t));
ob << _zero_point_mask;
const dnnl::convolution_forward::primitive_desc *typed_pd
= reinterpret_cast<const dnnl::convolution_forward::primitive_desc *>(&_pd);
ob << typed_pd->get_strides();
ob << typed_pd->get_dilations();
ob << typed_pd->get_padding_l();
ob << typed_pd->get_padding_r();
ob << typed_pd->bias_desc().is_zero();
std::vector<uint8_t> prim_cache;
prim_cache = _prim.get_cache_blob();
@ -174,16 +191,51 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::load(ib);
const char dummy_mem[sizeof(dnnl::convolution_forward::desc)] = {};
const dnnl::convolution_forward::desc *dummy_opdesc
= reinterpret_cast<const dnnl::convolution_forward::desc *>(&dummy_mem[0]);
_desc = std::make_shared<dnnl::convolution_forward::desc>(std::move(*dummy_opdesc));
ib >> make_data(&_desc->data, sizeof(dnnl_convolution_desc_t));
ib >> _zero_point_mask;
if (_zero_point_mask != -1) {
_attrs->set_zero_points_mask(DNNL_ARG_SRC, _zero_point_mask);
}
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
auto weights_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(1), dnnl::memory::format_tag::any);
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout(), dnnl::memory::format_tag::undef);
dnnl::memory::dims strides;
dnnl::memory::dims dilates;
dnnl::memory::dims padding_l;
dnnl::memory::dims padding_r;
ib >> strides;
ib >> dilates;
ib >> padding_l;
ib >> padding_r;
bool zero_bias;
ib >> zero_bias;
if (zero_bias) {
auto prim_desc = std::make_shared<dnnl::convolution_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct,
input_md, weights_md, output_md,
strides, dilates, padding_l, padding_r,
*_attrs.get());
_pd = *prim_desc;
} else {
auto bias_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(2), dnnl::memory::format_tag::any, true);
auto prim_desc = std::make_shared<dnnl::convolution_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct,
input_md, weights_md, bias_md, output_md,
strides, dilates, padding_l, padding_r,
*_attrs.get());
_pd = *prim_desc;
}
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
_prim = dnnl::primitive(_pd, prim_cache);
#endif
}
@ -191,11 +243,15 @@ public:
static std::unique_ptr<primitive_impl> create(const convolution_node& arg, const kernel_impl_params& impl_params) {
auto& engine = impl_params.prog->get_engine();
auto& config = impl_params.prog->get_config();
auto attr = get_primitive_attributes(arg);
int zero_point_mask = -1;
auto attr = get_primitive_attributes(arg, zero_point_mask);
auto prim_desc = get_convolution_primitive_descriptor(impl_params, *attr);
return cldnn::make_unique<convolution_onednn>(engine, config, attr, *prim_desc, get_weights_reorder(impl_params, *prim_desc, arg.get_transposed()));
auto conv_onednn_impl = cldnn::make_unique<convolution_onednn>(engine, config, attr, *prim_desc,
get_weights_reorder(impl_params, *prim_desc, arg.get_transposed()));
conv_onednn_impl->set_zero_point_mask(zero_point_mask);
return conv_onednn_impl;
}
};

View File

@ -89,7 +89,14 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::save(ob);
ob << make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t));
const dnnl::deconvolution_forward::primitive_desc *typed_pd
= reinterpret_cast<const dnnl::deconvolution_forward::primitive_desc *>(&_pd);
ob << typed_pd->get_strides();
ob << typed_pd->get_dilations();
ob << typed_pd->get_padding_l();
ob << typed_pd->get_padding_r();
ob << typed_pd->bias_desc().is_zero();
std::vector<uint8_t> prim_cache;
prim_cache = _prim.get_cache_blob();
@ -101,16 +108,46 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::load(ib);
const char dummy_mem[sizeof(dnnl::deconvolution_forward::desc)] = {};
const dnnl::deconvolution_forward::desc *dummy_opdesc
= reinterpret_cast<const dnnl::deconvolution_forward::desc *>(&dummy_mem[0]);
_desc = std::make_shared<dnnl::deconvolution_forward::desc>(std::move(*dummy_opdesc));
ib >> make_data(&_desc->data, sizeof(dnnl_deconvolution_desc_t));
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
auto weights_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(1), dnnl::memory::format_tag::any);
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout(), dnnl::memory::format_tag::undef);
dnnl::memory::dims strides;
dnnl::memory::dims dilates;
dnnl::memory::dims padding_l;
dnnl::memory::dims padding_r;
ib >> strides;
ib >> dilates;
ib >> padding_l;
ib >> padding_r;
bool zero_bias;
ib >> zero_bias;
if (zero_bias) {
auto prim_desc = std::make_shared<dnnl::deconvolution_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct,
input_md, weights_md, output_md,
strides, dilates, padding_l, padding_r,
*_attrs.get());
_pd = *prim_desc;
} else {
auto bias_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(2), dnnl::memory::format_tag::any, true);
auto prim_desc = std::make_shared<dnnl::deconvolution_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct,
input_md, weights_md, bias_md, output_md,
strides, dilates, padding_l, padding_r,
*_attrs.get());
_pd = *prim_desc;
}
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
_prim = dnnl::primitive(_pd, prim_cache);
#endif
}

View File

@ -96,10 +96,8 @@ protected:
}
static std::shared_ptr<dnnl::inner_product_forward::primitive_desc> get_fully_connected_primitive_descriptor(const kernel_impl_params& impl_params,
cldnn::engine& engine, size_t input_size, bool has_bias,
const dnnl::primitive_attr& attr = dnnl::primitive_attr()) {
auto& engine = impl_params.prog->get_engine();
auto prim = impl_params.typed_desc<fully_connected>();
auto input_layout = impl_params.get_input_layout(0);
auto weights_layout = impl_params.get_input_layout(1);
auto output_layout = impl_params.get_output_layout();
@ -107,24 +105,24 @@ protected:
auto input_pshape = input_layout.get_partial_shape();
auto weights_pshape = weights_layout.get_partial_shape();
int64_t feature = input_pshape[std::min(prim->input_size, static_cast<size_t>(4)) - 1].get_length();
if (prim->input_size == 3) {
int64_t feature = input_pshape[std::min(input_size, static_cast<size_t>(4)) - 1].get_length();
if (input_size == 3) {
feature = std::max({input_layout.spatial(0), input_layout.spatial(1), input_layout.spatial(2)});
}
if (prim->input_size > 3) {
if (input_size > 3) {
input_layout.set_partial_shape(reshape_to_2d(input_pshape, feature));
}
if (weights_pshape.size() != 2) {
weights_layout.set_partial_shape(reshape_to_2d(weights_pshape, feature));
}
if (prim->input_size == 3) {
if (input_size == 3) {
output_layout.set_partial_shape({ input_layout.batch(), input_layout.feature(), weights_layout.batch(), 1 });
} else {
output_layout.set_partial_shape({ input_layout.batch(), weights_layout.batch() });
}
if (prim->input_size == 3) {
if (input_size == 3) {
combine_bf_with_first_spatial_dim(input_layout);
combine_bf_with_first_spatial_dim(output_layout);
}
@ -133,7 +131,7 @@ protected:
auto weights_md = onednn::layout_to_memory_desc(weights_layout, dnnl::memory::format_tag::any);
auto output_md = onednn::layout_to_memory_desc(output_layout, dnnl::memory::format_tag::ab, false);
if (!prim->bias.empty()) {
if (has_bias) {
auto bias_md = onednn::layout_to_memory_desc(impl_params.get_input_layout(2), dnnl::memory::format_tag::any, true);
return std::make_shared<dnnl::inner_product_forward::primitive_desc>(
engine.get_onednn_engine(),
@ -159,7 +157,12 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::save(ob);
ob << make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t));
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
auto prim = impl_params->typed_desc<fully_connected>();
size_t input_size = prim->input_size;
bool has_bias = !prim->bias.empty();
ob << input_size;
ob << has_bias;
std::vector<uint8_t> prim_cache;
prim_cache = _prim.get_cache_blob();
@ -171,16 +174,18 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::load(ib);
const char dummy_mem[sizeof(dnnl::inner_product_forward::desc)] = {};
const dnnl::inner_product_forward::desc *dummy_opdesc
= reinterpret_cast<const dnnl::inner_product_forward::desc *>(&dummy_mem[0]);
_desc = std::make_shared<dnnl::inner_product_forward::desc>(std::move(*dummy_opdesc));
ib >> make_data(&_desc->data, sizeof(dnnl_inner_product_desc_t));
size_t input_size = 2;
bool has_bias = false;
ib >> input_size;
ib >> has_bias;
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
auto prim_desc = get_fully_connected_primitive_descriptor(*impl_params, ib.get_engine(), input_size, has_bias, *_attrs);
_pd = *prim_desc;
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
_prim = dnnl::primitive(_pd, prim_cache);
#endif
}
@ -189,7 +194,9 @@ public:
auto& engine = impl_params.prog->get_engine();
auto& config = impl_params.prog->get_config();
auto attr = arg.get_onednn_primitive_attributes();
auto prim_desc = get_fully_connected_primitive_descriptor(impl_params, *attr);
auto prim = impl_params.typed_desc<fully_connected>();
auto prim_desc = get_fully_connected_primitive_descriptor(impl_params, impl_params.prog->get_engine(),
prim->input_size, !prim->bias.empty(), *attr);
return cldnn::make_unique<fully_connected_onednn>(engine, config, attr, *prim_desc, get_weights_reorder(impl_params, *prim_desc));
}

View File

@ -53,11 +53,21 @@ protected:
}
}
static std::shared_ptr<dnnl::matmul::primitive_desc> get_gemm_primitive_descriptor(const kernel_impl_params& impl_params,
const dnnl::primitive_attr& attr = dnnl::primitive_attr()) {
auto& engine = impl_params.prog->get_engine();
static void get_gemm_primitive_md(const kernel_impl_params& impl_params,
dnnl::memory::data_type& in0_dt,
dnnl::memory::data_type& in1_dt,
dnnl::memory::data_type& out_dt,
dnnl::memory::dims& in0_dims,
dnnl::memory::dims& in1_dims,
dnnl::memory::dims& out_dims,
dnnl::memory::format_tag& in0_fmt,
dnnl::memory::format_tag& in1_fmt,
dnnl::memory::format_tag& out_fmt,
bool gemm_with_bias,
dnnl::memory::data_type& bias_dt,
dnnl::memory::dims& bias_dims,
dnnl::memory::format_tag& bias_fmt) {
auto prim = impl_params.typed_desc<gemm>();
auto gemm_with_bias = prim->dependencies().size() == 3;
auto out_l = impl_params.get_output_layout();
std::vector<layout> in_layouts { impl_params.get_input_layout(0), impl_params.get_input_layout(1) };
@ -84,17 +94,17 @@ protected:
size_t rank = cldnn::format::dimension(out_l.format);
dnnl::memory::data_type in0_dt = onednn::convert_data_type(in0_l.data_type);
dnnl::memory::data_type in1_dt = onednn::convert_data_type(in1_l.data_type);
dnnl::memory::data_type out_dt = onednn::convert_data_type(out_l.data_type);
in0_dt = onednn::convert_data_type(in0_l.data_type);
in1_dt = onednn::convert_data_type(in1_l.data_type);
out_dt = onednn::convert_data_type(out_l.data_type);
dnnl::memory::dims in0_dims = onednn::convert_gemm_tensor(in0_l.get_tensor(), rank, batched_dims_can_be_removed);
dnnl::memory::dims in1_dims = onednn::convert_gemm_tensor(in1_l.get_tensor(), rank, batched_dims_can_be_removed);
dnnl::memory::dims out_dims = onednn::convert_gemm_tensor(out_l.get_tensor(), rank, batched_dims_can_be_removed);
in0_dims = onednn::convert_gemm_tensor(in0_l.get_tensor(), rank, batched_dims_can_be_removed);
in1_dims = onednn::convert_gemm_tensor(in1_l.get_tensor(), rank, batched_dims_can_be_removed);
out_dims = onednn::convert_gemm_tensor(out_l.get_tensor(), rank, batched_dims_can_be_removed);
dnnl::memory::format_tag in0_fmt = onednn::convert_gemm_data_format(in0_dims);
dnnl::memory::format_tag in1_fmt = onednn::convert_gemm_data_format(in1_dims);
dnnl::memory::format_tag out_fmt = onednn::convert_gemm_data_format(out_dims);
in0_fmt = onednn::convert_gemm_data_format(in0_dims);
in1_fmt = onednn::convert_gemm_data_format(in1_dims);
out_fmt = onednn::convert_gemm_data_format(out_dims);
if (prim->transpose_input0) {
in0_fmt = transpose_format(in0_fmt);
@ -106,16 +116,44 @@ protected:
std::swap(in1_dims[in1_dims.size() - 1], in1_dims[in1_dims.size() - 2]);
}
if (gemm_with_bias) {
auto bias_l = impl_params.get_input_layout(2);
auto bias_rank = cldnn::format::dimension(bias_l.format);
bias_dt = onednn::convert_data_type(bias_l.data_type);
bias_dims = onednn::convert_gemm_tensor(bias_l.get_tensor(), bias_rank, batched_dims_can_be_removed);
bias_fmt = onednn::convert_gemm_data_format(bias_dims);
}
}
static std::shared_ptr<dnnl::matmul::primitive_desc> get_gemm_primitive_descriptor(const kernel_impl_params& impl_params,
const dnnl::primitive_attr& attr = dnnl::primitive_attr()) {
auto& engine = impl_params.prog->get_engine();
auto prim = impl_params.typed_desc<gemm>();
auto gemm_with_bias = prim->dependencies().size() == 3;
dnnl::memory::data_type in0_dt;
dnnl::memory::data_type in1_dt;
dnnl::memory::data_type out_dt;
dnnl::memory::data_type bias_dt;
dnnl::memory::dims in0_dims;
dnnl::memory::dims in1_dims;
dnnl::memory::dims out_dims;
dnnl::memory::dims bias_dims;
dnnl::memory::format_tag in0_fmt;
dnnl::memory::format_tag in1_fmt;
dnnl::memory::format_tag out_fmt;
dnnl::memory::format_tag bias_fmt;
get_gemm_primitive_md(impl_params, in0_dt, in1_dt, out_dt, in0_dims, in1_dims, out_dims, in0_fmt, in1_fmt, out_fmt,
gemm_with_bias, bias_dt, bias_dims, bias_fmt);
dnnl::memory::desc in0_md(in0_dims, in0_dt, in0_fmt);
dnnl::memory::desc in1_md(in1_dims, in1_dt, in1_fmt);
dnnl::memory::desc out_md(out_dims, out_dt, out_fmt);
if (gemm_with_bias) {
auto bias_l = impl_params.get_input_layout(2);
auto bias_rank = cldnn::format::dimension(bias_l.format);
dnnl::memory::data_type bias_dt = onednn::convert_data_type(bias_l.data_type);
dnnl::memory::dims bias_dims = onednn::convert_gemm_tensor(bias_l.get_tensor(), bias_rank, batched_dims_can_be_removed);
dnnl::memory::format_tag bias_fmt = onednn::convert_gemm_data_format(bias_dims);
dnnl::memory::desc bias_md(bias_dims, bias_dt, bias_fmt);
return std::make_shared<dnnl::matmul::primitive_desc>(
@ -140,7 +178,47 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::save(ob);
ob << make_data(&_desc->data, sizeof(dnnl_matmul_desc_t));
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ob.getKernlImplParams());
auto prim = impl_params->typed_desc<gemm>();
bool gemm_with_bias = prim->dependencies().size() == 3;
ob << gemm_with_bias;
dnnl::memory::data_type in0_dt;
dnnl::memory::data_type in1_dt;
dnnl::memory::data_type out_dt;
dnnl::memory::data_type bias_dt;
dnnl::memory::dims in0_dims;
dnnl::memory::dims in1_dims;
dnnl::memory::dims out_dims;
dnnl::memory::dims bias_dims;
dnnl::memory::format_tag in0_fmt;
dnnl::memory::format_tag in1_fmt;
dnnl::memory::format_tag out_fmt;
dnnl::memory::format_tag bias_fmt;
get_gemm_primitive_md(*impl_params, in0_dt, in1_dt, out_dt, in0_dims, in1_dims, out_dims, in0_fmt, in1_fmt, out_fmt,
gemm_with_bias, bias_dt, bias_dims, bias_fmt);
ob << make_data(&in0_dt, sizeof(dnnl::memory::data_type));
ob << make_data(&in1_dt, sizeof(dnnl::memory::data_type));
ob << make_data(&out_dt, sizeof(dnnl::memory::data_type));
ob << in0_dims;
ob << in1_dims;
ob << out_dims;
ob << make_data(&in0_fmt, sizeof(dnnl::memory::format_tag));
ob << make_data(&in1_fmt, sizeof(dnnl::memory::format_tag));
ob << make_data(&out_fmt, sizeof(dnnl::memory::format_tag));
if (gemm_with_bias) {
ob << make_data(&bias_dt, sizeof(dnnl::memory::data_type));
ob << bias_dims;
ob << make_data(&bias_fmt, sizeof(dnnl::memory::format_tag));
}
std::vector<uint8_t> prim_cache;
prim_cache = _prim.get_cache_blob();
@ -152,16 +230,72 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::load(ib);
const char dummy_mem[sizeof(dnnl::matmul::desc)] = {};
const dnnl::matmul::desc *dummy_opdesc
= reinterpret_cast<const dnnl::matmul::desc *>(&dummy_mem[0]);
_desc = std::make_shared<dnnl::matmul::desc>(std::move(*dummy_opdesc));
ib >> make_data(&_desc->data, sizeof(dnnl_matmul_desc_t));
bool gemm_with_bias;
ib >> gemm_with_bias;
dnnl::memory::data_type in0_dt;
dnnl::memory::data_type in1_dt;
dnnl::memory::data_type out_dt;
dnnl::memory::data_type bias_dt;
dnnl::memory::dims in0_dims;
dnnl::memory::dims in1_dims;
dnnl::memory::dims out_dims;
dnnl::memory::dims bias_dims;
dnnl::memory::format_tag in0_fmt;
dnnl::memory::format_tag in1_fmt;
dnnl::memory::format_tag out_fmt;
dnnl::memory::format_tag bias_fmt;
ib >> make_data(&in0_dt, sizeof(dnnl::memory::data_type));
ib >> make_data(&in1_dt, sizeof(dnnl::memory::data_type));
ib >> make_data(&out_dt, sizeof(dnnl::memory::data_type));
ib >> in0_dims;
ib >> in1_dims;
ib >> out_dims;
ib >> make_data(&in0_fmt, sizeof(dnnl::memory::format_tag));
ib >> make_data(&in1_fmt, sizeof(dnnl::memory::format_tag));
ib >> make_data(&out_fmt, sizeof(dnnl::memory::format_tag));
if (gemm_with_bias) {
ib >> make_data(&bias_dt, sizeof(dnnl::memory::data_type));
ib >> bias_dims;
ib >> make_data(&bias_fmt, sizeof(dnnl::memory::format_tag));
}
dnnl::memory::desc in0_md(in0_dims, in0_dt, in0_fmt);
dnnl::memory::desc in1_md(in1_dims, in1_dt, in1_fmt);
dnnl::memory::desc out_md(out_dims, out_dt, out_fmt);
if (gemm_with_bias) {
dnnl::memory::desc bias_md(bias_dims, bias_dt, bias_fmt);
auto prim_desc = std::make_shared<dnnl::matmul::primitive_desc>(
ib.get_engine().get_onednn_engine(),
in0_md,
in1_md,
bias_md,
out_md,
*_attrs.get());
_pd = *prim_desc;
} else {
auto prim_desc = std::make_shared<dnnl::matmul::primitive_desc>(
ib.get_engine().get_onednn_engine(),
in0_md,
in1_md,
out_md,
*_attrs.get());
_pd = *prim_desc;
}
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
_prim = dnnl::primitive(_pd, prim_cache);
#endif
}

View File

@ -87,7 +87,16 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::save(ob);
ob << make_data(&_desc->data, sizeof(dnnl_pooling_desc_t));
const dnnl::pooling_forward::primitive_desc *typed_pd
= reinterpret_cast<const dnnl::pooling_forward::primitive_desc *>(&_pd);
dnnl::algorithm alg = typed_pd->get_algorithm();
ob << make_data(&alg, sizeof(dnnl::algorithm));
ob << typed_pd->get_strides();
ob << typed_pd->get_kernel();
ob << typed_pd->get_dilations();
ob << typed_pd->get_padding_l();
ob << typed_pd->get_padding_r();
std::vector<uint8_t> prim_cache;
prim_cache = _prim.get_cache_blob();
@ -99,16 +108,42 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::load(ib);
const char dummy_mem[sizeof(dnnl::pooling_forward::desc)] = {};
const dnnl::pooling_forward::desc *dummy_opdesc
= reinterpret_cast<const dnnl::pooling_forward::desc *>(&dummy_mem[0]);
_desc = std::make_shared<dnnl::pooling_forward::desc>(std::move(*dummy_opdesc));
ib >> make_data(&_desc->data, sizeof(dnnl_pooling_desc_t));
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
dnnl::algorithm alg;
ib >> make_data(&alg, sizeof(dnnl::algorithm));
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
dnnl::memory::dims stride;
dnnl::memory::dims kernel;
dnnl::memory::dims dilation;
dnnl::memory::dims pad_l;
dnnl::memory::dims pad_r;
ib >> stride;
ib >> kernel;
ib >> dilation;
ib >> pad_l;
ib >> pad_r;
auto prim_desc = std::make_shared<dnnl::pooling_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference,
alg,
input_md,
output_md,
stride,
kernel,
dilation,
pad_l,
pad_r,
*_attrs.get());
_pd = *prim_desc;
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
_prim = dnnl::primitive(_pd, prim_cache);
#endif
}

View File

@ -4,6 +4,8 @@
#pragma once
#define ONEDNN_PRIMITIVE_SERIALIZATION
#include "primitive_inst.h"
#include "intel_gpu/graph/serialization/binary_buffer.hpp"
#include "intel_gpu/runtime/error_handler.hpp"
@ -74,47 +76,23 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
// [ dnnl::cache_blob ]
void save(BinaryOutputBuffer& ob) const override {
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
if (_attrs.get() == nullptr) {
if (_attrs->get() == nullptr) {
ob << false;
} else {
ob << true;
}
if (_attrs.get() != nullptr) {
if (_attrs->get() != nullptr) {
{
int mask;
std::vector<float> scales;
std::vector<int32_t> zero_points;
_attrs.get()->get_output_scales(mask, scales);
ob << mask << scales;
scales.clear();
_attrs.get()->get_scales(DNNL_ARG_SRC_0, mask, scales);
ob << mask << scales;
scales.clear();
_attrs.get()->get_scales(DNNL_ARG_SRC_1, mask, scales);
ob << mask << scales;
_attrs.get()->get_zero_points(DNNL_ARG_SRC, mask, zero_points);
ob << mask << zero_points;
zero_points.clear();
_attrs.get()->get_zero_points(DNNL_ARG_WEIGHTS, mask, zero_points);
ob << mask << zero_points;
zero_points.clear();
_attrs.get()->get_zero_points(DNNL_ARG_DST, mask, zero_points);
ob << mask << zero_points;
}
{
dnnl::scratchpad_mode _scratchpad_mode = _attrs.get()->get_scratchpad_mode();
dnnl::scratchpad_mode _scratchpad_mode = _attrs->get_scratchpad_mode();
ob << make_data(&_scratchpad_mode, sizeof(dnnl::scratchpad_mode));
}
{
dnnl::fpmath_mode _fmath_mode = _attrs.get()->get_fpmath_mode();
dnnl::fpmath_mode _fmath_mode = _attrs->get_fpmath_mode();
ob << make_data(&_fmath_mode, sizeof(dnnl::fpmath_mode));
}
{
const dnnl::post_ops _post_ops = _attrs.get()->get_post_ops();
const dnnl::post_ops _post_ops = _attrs->get_post_ops();
ob << _post_ops.len();
for (int idx = 0; idx < _post_ops.len(); ++idx) {
@ -133,13 +111,11 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
ob << zero_point;
ob << make_data(&data_type, sizeof(dnnl::memory::data_type));
} else if (_kind == dnnl::primitive::kind::eltwise) {
float scale;
dnnl::algorithm aalgorithm;
float alpha;
float beta;
_post_ops.get_params_eltwise(idx, scale, aalgorithm, alpha, beta);
ob << scale;
_post_ops.get_params_eltwise(idx, aalgorithm, alpha, beta);
ob << make_data(&aalgorithm, sizeof(dnnl::algorithm));
ob << alpha;
ob << beta;
@ -147,24 +123,16 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
dnnl::memory::data_type weights_data_type;
dnnl::memory::data_type bias_data_type;
dnnl::memory::data_type dst_data_type;
int mask;
std::vector<float> scales;
dnnl::memory::dim kernel_size;
dnnl::memory::dim stride_size;
dnnl::memory::dim padding_l_size;
try {
_post_ops.get_params_dw_k3s1p1(idx, weights_data_type, bias_data_type, dst_data_type, mask, scales);
int stride = 1;
ob << stride;
} catch (...) {
_post_ops.get_params_dw_k3s2p1(idx, weights_data_type, bias_data_type, dst_data_type, mask, scales);
int stride = 2;
ob << stride;
}
_post_ops.get_params_dw(idx, weights_data_type, bias_data_type, dst_data_type, kernel_size, stride_size, padding_l_size);
ob << make_data(&weights_data_type, sizeof(dnnl::memory::data_type));
ob << make_data(&bias_data_type, sizeof(dnnl::memory::data_type));
ob << make_data(&dst_data_type, sizeof(dnnl::memory::data_type));
ob << mask;
ob << scales;
ob << kernel_size << stride_size << padding_l_size;
} else if (_kind == dnnl::primitive::kind::binary) {
dnnl::algorithm aalgorithm;
dnnl::memory::desc src1_desc;
@ -172,7 +140,6 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
_post_ops.get_params_binary(idx, aalgorithm, src1_desc);
ob << make_data(&aalgorithm, sizeof(dnnl::algorithm));
ob << make_data(&src1_desc, sizeof(dnnl::memory::desc));
} else if (_kind == dnnl::primitive::kind::prelu) {
int mask;
@ -184,14 +151,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
}
{
float scale, shift;
_attrs.get()->get_rnn_data_qparams(scale, shift);
_attrs->get_rnn_data_qparams(scale, shift);
ob << scale << shift;
}
{
int mask;
std::vector<float> scales;
_attrs.get()->get_rnn_weights_qparams(mask, scales);
_attrs->get_rnn_weights_qparams(mask, scales);
ob << mask;
ob << scales;
@ -200,7 +167,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
int mask;
std::vector<float> scales;
_attrs.get()->get_rnn_weights_projection_qparams(mask, scales);
_attrs->get_rnn_weights_projection_qparams(mask, scales);
ob << mask;
ob << scales;
@ -216,71 +183,31 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
if (has_attrs) {
{
int mask;
std::vector<float> scales;
ib >> mask >> scales;
_attrs.get()->set_output_scales(mask, scales);
}
{
int mask;
std::vector<float> scales;
bool default_output_scales = true;
_attrs.get()->get_output_scales(mask, scales);
for (float scale : scales) {
if (scale != 1.) {
default_output_scales = false;
break;
}
}
scales.clear();
ib >> mask >> scales;
if (default_output_scales)
_attrs.get()->set_scales(DNNL_ARG_SRC_0, mask, scales);
scales.clear();
ib >> mask >> scales;
if (default_output_scales)
_attrs.get()->set_scales(DNNL_ARG_SRC_1, mask, scales);
}
{
int mask;
std::vector<int32_t> zero_points;
ib >> mask >> zero_points;
_attrs.get()->set_zero_points(DNNL_ARG_SRC, mask, zero_points);
zero_points.clear();
ib >> mask >> zero_points;
_attrs.get()->set_zero_points(DNNL_ARG_WEIGHTS, mask, zero_points);
zero_points.clear();
ib >> mask >> zero_points;
_attrs.get()->set_zero_points(DNNL_ARG_DST, mask, zero_points);
}
{
dnnl::scratchpad_mode _scratchpad_mode;
dnnl::scratchpad_mode _scratchpad_mode = dnnl::scratchpad_mode::library;
ib >> make_data(&_scratchpad_mode, sizeof(dnnl::scratchpad_mode));
_attrs.get()->set_scratchpad_mode(_scratchpad_mode);
_attrs->set_scratchpad_mode(_scratchpad_mode);
}
{
dnnl::fpmath_mode _fmath_mode;
dnnl::fpmath_mode _fmath_mode = dnnl::fpmath_mode::any;
ib >> make_data(&_fmath_mode, sizeof(dnnl::fpmath_mode));
_attrs.get()->set_fpmath_mode(_fmath_mode);
_attrs->set_fpmath_mode(_fmath_mode);
}
{
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
const std::vector<cldnn::fused_primitive_desc_onednn>& fused_desc = impl_params->fused_desc_onednn;
dnnl::post_ops _post_ops;
int post_ops_len;
ib >> post_ops_len;
for (int idx = 0; idx < post_ops_len; ++idx) {
dnnl::primitive::kind _kind;
dnnl::primitive::kind _kind = dnnl::primitive::kind::undef;
ib >> make_data(&_kind, sizeof(dnnl::primitive::kind));
if (_kind == dnnl::primitive::kind::sum) {
float scale;
int32_t zero_point;
dnnl::memory::data_type data_type;
dnnl::memory::data_type data_type = dnnl::memory::data_type::undef;
ib >> scale;
ib >> zero_point;
@ -288,44 +215,38 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
_post_ops.append_sum(scale, zero_point, data_type);
} else if (_kind == dnnl::primitive::kind::eltwise) {
float scale;
dnnl::algorithm aalgorithm;
dnnl::algorithm aalgorithm = dnnl::algorithm::undef;
float alpha;
float beta;
ib >> scale;
ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm));
ib >> alpha;
ib >> beta;
_post_ops.append_eltwise(scale, aalgorithm, alpha, beta);
_post_ops.append_eltwise(aalgorithm, alpha, beta);
} else if (_kind == dnnl::primitive::kind::convolution) {
int stride;
dnnl::memory::data_type weights_data_type;
dnnl::memory::data_type bias_data_type;
dnnl::memory::data_type dst_data_type;
int mask;
std::vector<float> scales;
dnnl::memory::data_type weights_data_type = dnnl::memory::data_type::undef;
dnnl::memory::data_type bias_data_type = dnnl::memory::data_type::undef;
dnnl::memory::data_type dst_data_type = dnnl::memory::data_type::undef;
dnnl::memory::dim kernel_size;
dnnl::memory::dim stride_size;
dnnl::memory::dim padding_l_size;
ib >> stride;
ib >> make_data(&weights_data_type, sizeof(dnnl::memory::data_type));
ib >> make_data(&bias_data_type, sizeof(dnnl::memory::data_type));
ib >> make_data(&dst_data_type, sizeof(dnnl::memory::data_type));
ib >> mask;
ib >> scales;
ib >> kernel_size >> stride_size >> padding_l_size;
if (stride == 1) {
_post_ops.append_dw_k3s1p1(weights_data_type, bias_data_type, dst_data_type, mask, scales);
} else {
_post_ops.append_dw_k3s2p1(weights_data_type, bias_data_type, dst_data_type, mask, scales);
}
_post_ops.append_dw(weights_data_type, bias_data_type, dst_data_type,
kernel_size, stride_size, padding_l_size);
} else if (_kind == dnnl::primitive::kind::binary) {
dnnl::algorithm aalgorithm;
dnnl::memory::desc src1_desc;
dnnl::algorithm aalgorithm = dnnl::algorithm::undef;
ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm));
ib >> make_data(&src1_desc, sizeof(dnnl::memory::desc));
_post_ops.append_binary(aalgorithm, src1_desc);
dnnl::memory::desc md = onednn::layout_to_memory_desc(
impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
fused_desc.at(idx).tag, fused_desc.at(idx).flatten);
_post_ops.append_binary(aalgorithm, md);
} else if (_kind == dnnl::primitive::kind::prelu) {
int mask;
ib >> mask;
@ -333,14 +254,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
}
}
_attrs.get()->set_post_ops(_post_ops);
_attrs->set_post_ops(_post_ops);
}
{
float scale;
float shift;
ib >> scale >> shift;
_attrs.get()->set_rnn_data_qparams(scale, shift);
_attrs->set_rnn_data_qparams(scale, shift);
}
{
int mask;
@ -349,7 +270,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
ib >> mask;
ib >> scales;
_attrs.get()->set_rnn_weights_qparams(mask, scales);
_attrs->set_rnn_weights_qparams(mask, scales);
}
{
int mask;
@ -358,7 +279,7 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
ib >> mask;
ib >> scales;
_attrs.get()->set_rnn_weights_projection_qparams(mask, scales);
_attrs->set_rnn_weights_projection_qparams(mask, scales);
}
_engine = &ib.get_engine();

View File

@ -101,7 +101,13 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::save(ob);
ob << make_data(&_desc->data, sizeof(dnnl_reduction_desc_t));
const dnnl::reduction::primitive_desc *typed_pd
= reinterpret_cast<const dnnl::reduction::primitive_desc *>(&_pd);
dnnl::algorithm alg = typed_pd->get_algorithm();
ob << make_data(&alg, sizeof(dnnl::algorithm));
ob << typed_pd->get_p();
ob << typed_pd->get_epsilon();
std::vector<uint8_t> prim_cache;
prim_cache = _prim.get_cache_blob();
@ -113,13 +119,30 @@ public:
#ifdef ONEDNN_PRIMITIVE_SERIALIZATION
parent::load(ib);
_desc = std::make_shared<dnnl::reduction::desc>();
ib >> make_data(&_desc->data, sizeof(dnnl_reduction_desc_t));
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
dnnl::algorithm alg;
ib >> make_data(&alg, sizeof(dnnl::algorithm));
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
float p, eps;
ib >> p >> eps;
auto prim_desc = std::make_shared<dnnl::reduction::primitive_desc>(
ib.get_engine().get_onednn_engine(),
alg,
input_md,
output_md,
p,
eps,
*_attrs.get());
_pd = *prim_desc;
std::vector<uint8_t> prim_cache;
ib >> prim_cache;
_pd = dnnl::primitive_desc(&_desc->data, _attrs.get(), ib.get_engine().get_onednn_engine(), nullptr);
_prim = dnnl::primitive(_pd, prim_cache);
#endif
}

View File

@ -78,8 +78,17 @@ public:
parent::load(ib);
const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernlImplParams());
auto desc = get_reorder_descriptor(*impl_params, *_attrs, ib.get_engine());
_pd = *desc;
auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0));
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout());
auto prim_desc = std::make_shared<dnnl::reorder::primitive_desc>(
ib.get_engine().get_onednn_engine(),
input_md,
ib.get_engine().get_onednn_engine(),
output_md,
*_attrs.get());
_pd = *prim_desc;
std::vector<uint8_t> prim_cache;
ib >> prim_cache;

View File

@ -95,6 +95,8 @@ struct fused_primitive_desc_onednn {
onednn_post_op_type op_type; // onednn post-operation type
size_t mem_offset; // index of a memory buffer for current post-operation
size_t mem_dep; // memory dependency for working with fused node
dnnl::memory::format_tag tag;
bool flatten;
};
#endif // ENABLE_ONEDNN_FOR_GPU
} // namespace cldnn

View File

@ -1162,6 +1162,8 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
}
void kernel_impl_params::load(BinaryInputBuffer& ib) {
prog = nullptr;
desc = nullptr;
ib >> has_runtime_layouts;
ib >> unique_id;
ib >> input_layouts;

View File

@ -1077,17 +1077,6 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
_impl_params->save(ob);
ob.setKernlImplParams(_impl_params.get());
if (_impl != nullptr) {
ob << true;
kernel_arguments_data args = _impl->get_arguments(*this);
kernel_arguments_data_idx args_idx;
convert_args(args, args_idx);
_impl->set_arguments(args_idx);
ob << _impl;
} else {
ob << false;
}
ob << _node_output_layout;
ob << has_mutable_input();
ob << mem_allocated();
@ -1140,6 +1129,17 @@ void primitive_inst::save(cldnn::BinaryOutputBuffer& ob) const {
const auto _allocation_type = ibuf->get_allocation_type();
ob << make_data(&_allocation_type, sizeof(_allocation_type));
}
if (_impl != nullptr) {
ob << true;
kernel_arguments_data args = _impl->get_arguments(*this);
kernel_arguments_data_idx args_idx;
convert_args(args, args_idx);
_impl->set_arguments(args_idx);
ob << _impl;
} else {
ob << false;
}
}
void primitive_inst::convert_args(const kernel_arguments_data& args, kernel_arguments_data_idx& args_idx) const {
@ -1185,13 +1185,6 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
_impl_params->load(ib);
ib.setKernlImplParams(_impl_params.get());
bool has_impl;
ib >> has_impl;
if (has_impl) {
_impl.release();
ib >> _impl;
}
ib >> _node_output_layout;
ib >> _has_mutable_input;
ib >> _mem_allocated;
@ -1268,5 +1261,12 @@ void primitive_inst::load(cldnn::BinaryInputBuffer& ib) {
_intermediates_memory[i] = get_network().get_engine().allocate_memory(ibuf_layout, _allocation_type);
}
bool has_impl;
ib >> has_impl;
if (has_impl) {
_impl.release();
ib >> _impl;
}
}
} // namespace cldnn

View File

@ -897,8 +897,10 @@ void program_node::init_onednn_primitive_attributes() {
size_t empty_mem = 0xff;
// Add information about post-operation into the list, update indices
auto update_onednn_post_op_list = [&](onednn_post_op_type type, size_t m_dep) {
fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep };
auto update_onednn_post_op_list = [&](onednn_post_op_type type, size_t m_dep,
dnnl::memory::format_tag tag = dnnl::memory::format_tag::undef,
bool flatten = false) {
fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep, tag, flatten };
fused_ops.push_back(cur_op_desc);
auto has_memory_buffers = type == onednn_post_op_type::binary_add ||
@ -970,7 +972,7 @@ void program_node::init_onednn_primitive_attributes() {
} else if (desc.typed_desc<eltwise>()->mode == eltwise_mode::prod) {
dnnl::memory::desc in_desc = onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_mul, in_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx, dnnl::memory::format_tag::ab, true);
} else {
std::stringstream error_msg;
error_msg << "Unsupported eltwise mode: " << static_cast<int>(desc.typed_desc<eltwise>()->mode) << ". ";
@ -996,7 +998,7 @@ void program_node::init_onednn_primitive_attributes() {
auto in_scale = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
if (q_param->has_pre_shift) {
@ -1007,7 +1009,7 @@ void program_node::init_onednn_primitive_attributes() {
auto in_shift = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
}
@ -1038,7 +1040,7 @@ void program_node::init_onednn_primitive_attributes() {
auto out_scale = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
@ -1050,7 +1052,7 @@ void program_node::init_onednn_primitive_attributes() {
auto out_shift = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
}
@ -1078,9 +1080,9 @@ void program_node::init_onednn_primitive_attributes() {
dnnl::memory::desc in_hi_desc = onednn::layout_to_memory_desc(in_hi, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(clamp_max, in_lo_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_max, dep_idx - 2);
update_onednn_post_op_list(onednn_post_op_type::binary_max, dep_idx - 2, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(clamp_min, in_hi_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_min, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_min, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
@ -1097,7 +1099,7 @@ void program_node::init_onednn_primitive_attributes() {
auto in_scale = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc in_scale_desc = onednn::layout_to_memory_desc(in_scale, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_mul, in_scale_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
if (q_param->has_pre_shift) {
@ -1108,7 +1110,7 @@ void program_node::init_onednn_primitive_attributes() {
auto in_shift = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc in_shift_desc = onednn::layout_to_memory_desc(in_shift, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_add, in_shift_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
}
@ -1135,7 +1137,7 @@ void program_node::init_onednn_primitive_attributes() {
auto out_scale = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc out_scale_desc = onednn::layout_to_memory_desc(out_scale, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_mul, out_scale_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_mul, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
@ -1147,7 +1149,7 @@ void program_node::init_onednn_primitive_attributes() {
auto out_shift = get_dependency(dep_idx++).get_output_layout();
dnnl::memory::desc out_shift_desc = onednn::layout_to_memory_desc(out_shift, dnnl::memory::format_tag::ab, true);
post_ops.append_binary(dnnl::algorithm::binary_add, out_shift_desc);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1);
update_onednn_post_op_list(onednn_post_op_type::binary_add, dep_idx - 1, dnnl::memory::format_tag::ab, true);
}
}
}

View File

@ -66,6 +66,16 @@ Graph::Graph(cldnn::BinaryInputBuffer &ib, RemoteContextImpl::Ptr context, const
if (m_program->m_max_batch > 1)
m_config.set_property(ov::intel_gpu::max_dynamic_batch(m_program->m_max_batch));
bool need_onednn_engine = false;
ib >> need_onednn_engine;
if (need_onednn_engine) {
#ifdef ENABLE_ONEDNN_FOR_GPU
get_engine().create_onednn_engine(config);
#else
IE_THROW() << "[GPU] Current model cache requires OneDNN, but cannot use it.";
#endif // ENABLE_ONEDNN_FOR_GPU
}
ib >> m_program->inputLayouts;
ib >> primitiveIDs;
ib >> outputDims;
@ -470,6 +480,17 @@ std::shared_ptr<ngraph::Function> Graph::GetExecGraphInfoByPrimitivesInfo(std::v
// [ ov::intel_gpu::Graph::outputDims ]
// [ cldnn::network ]
void Graph::Export(cldnn::BinaryOutputBuffer &ob) {
bool need_onednn_engine = false;
#ifdef ENABLE_ONEDNN_FOR_GPU
try {
get_engine().get_onednn_engine();
need_onednn_engine = true;
} catch (ov::AssertFailure &) {
need_onednn_engine = false;
}
#endif // ENABLE_ONEDNN_FOR_GPU
ob << need_onednn_engine;
ob << m_program->inputLayouts;
ob << primitiveIDs;
ob << outputDims;