[GPU] Fix OneDNN primitive attr serialization logic (#16654)
* fix onednn primitive attr serialization logic * added an onednn fc fusing serialization test * added gemm fusing serialization tests
This commit is contained in:
@@ -120,6 +120,8 @@ struct fused_primitive_desc_onednn {
|
||||
size_t mem_dep; // memory dependency for working with fused node
|
||||
dnnl::memory::format_tag tag;
|
||||
bool flatten;
|
||||
dnnl::memory::dims dims;
|
||||
dnnl::memory::data_type dt;
|
||||
};
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
} // namespace cldnn
|
||||
|
||||
@@ -249,11 +249,16 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
|
||||
dnnl::algorithm aalgorithm = dnnl::algorithm::undef;
|
||||
ib >> make_data(&aalgorithm, sizeof(dnnl::algorithm));
|
||||
|
||||
dnnl::memory::desc md = onednn::layout_to_memory_desc(
|
||||
impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
|
||||
fused_desc.at(idx).tag, fused_desc.at(idx).flatten);
|
||||
if (fused_desc.at(idx).dims.size() > 0) {
|
||||
_post_ops.append_binary(aalgorithm,
|
||||
dnnl::memory::desc(fused_desc.at(idx).dims, fused_desc.at(idx).dt, fused_desc.at(idx).tag));
|
||||
} else {
|
||||
dnnl::memory::desc md = onednn::layout_to_memory_desc(
|
||||
impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
|
||||
fused_desc.at(idx).tag, fused_desc.at(idx).flatten);
|
||||
|
||||
_post_ops.append_binary(aalgorithm, md);
|
||||
_post_ops.append_binary(aalgorithm, md);
|
||||
}
|
||||
} else if (_kind == dnnl::primitive::kind::prelu) {
|
||||
int mask;
|
||||
ib >> mask;
|
||||
|
||||
@@ -118,7 +118,13 @@ void kernel_impl_params::save(BinaryOutputBuffer& ob) const {
|
||||
size_t num_fused_prims = fused_desc_onednn.size();
|
||||
ob << num_fused_prims;
|
||||
for (auto fused_prim : fused_desc_onednn) {
|
||||
ob << make_data(&fused_prim, sizeof(fused_primitive_desc_onednn));
|
||||
ob << make_data(&fused_prim.op_type, sizeof(onednn_post_op_type));
|
||||
ob << fused_prim.mem_offset;
|
||||
ob << fused_prim.mem_dep;
|
||||
ob << make_data(&fused_prim.tag, sizeof(dnnl::memory::format_tag));
|
||||
ob << fused_prim.flatten;
|
||||
ob << fused_prim.dims;
|
||||
ob << make_data(&fused_prim.dt, sizeof(dnnl::memory::data_type));
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
ob << primary_input_idx;
|
||||
@@ -187,7 +193,13 @@ void kernel_impl_params::load(BinaryInputBuffer& ib) {
|
||||
ib >> num_fused_prims;
|
||||
fused_desc_onednn.resize(num_fused_prims);
|
||||
for (size_t idx = 0; idx < num_fused_prims; ++idx) {
|
||||
ib >> make_data(&fused_desc_onednn[idx], sizeof(fused_primitive_desc_onednn));
|
||||
ib >> make_data(&fused_desc_onednn[idx].op_type, sizeof(onednn_post_op_type));
|
||||
ib >> fused_desc_onednn[idx].mem_offset;
|
||||
ib >> fused_desc_onednn[idx].mem_dep;
|
||||
ib >> make_data(&fused_desc_onednn[idx].tag, sizeof(dnnl::memory::format_tag));
|
||||
ib >> fused_desc_onednn[idx].flatten;
|
||||
ib >> fused_desc_onednn[idx].dims;
|
||||
ib >> make_data(&fused_desc_onednn[idx].dt, sizeof(dnnl::memory::data_type));
|
||||
}
|
||||
#endif // ENABLE_ONEDNN_FOR_GPU
|
||||
ib >> primary_input_idx;
|
||||
|
||||
@@ -1108,7 +1108,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->dependencies().size(); i++) {
|
||||
log_memory_to_file(get_primitive(inst->id())->dep_memory_ptr(i),
|
||||
get_stream(),
|
||||
"program" + std::to_string(get_program()->get_id()) +
|
||||
"program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 1) +
|
||||
"_network" + std::to_string(get_id()) +
|
||||
"_" + layer_name + "_src" + std::to_string(i),
|
||||
debug_config->dump_layers_raw);
|
||||
@@ -1125,7 +1125,7 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
|
||||
for (size_t i = 0; i < get_primitive(inst->id())->outputs_memory_count(); i++) {
|
||||
log_memory_to_file(get_primitive(inst->id())->output_memory_ptr(i),
|
||||
get_stream(),
|
||||
"program" + std::to_string(get_program()->get_id()) +
|
||||
"program" + std::to_string((get_program() != nullptr) ? get_program()->get_id() : 1) +
|
||||
"_network" + std::to_string(get_id()) +
|
||||
"_" + layer_name + "_dst" + std::to_string(i),
|
||||
debug_config->dump_layers_raw);
|
||||
|
||||
@@ -918,8 +918,10 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
// Add information about post-operation into the list, update indices
|
||||
auto update_onednn_post_op_list = [&](onednn_post_op_type type, size_t m_dep,
|
||||
dnnl::memory::format_tag tag = dnnl::memory::format_tag::undef,
|
||||
bool flatten = false) {
|
||||
fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep, tag, flatten };
|
||||
bool flatten = false,
|
||||
dnnl::memory::dims dims = {},
|
||||
dnnl::memory::data_type dt = dnnl::memory::data_type::undef) {
|
||||
fused_primitive_desc_onednn cur_op_desc = { type, memory_offset, m_dep, tag, flatten, dims, dt };
|
||||
fused_ops.push_back(cur_op_desc);
|
||||
|
||||
auto has_memory_buffers = type == onednn_post_op_type::binary_add ||
|
||||
@@ -984,14 +986,14 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
cldnn::onednn::combine_bf_with_first_spatial_dim(in);
|
||||
}
|
||||
post_ops.append_binary(alg, onednn::layout_to_memory_desc(in, dnnl::memory::format_tag::ab));
|
||||
update_onednn_post_op_list(op_type, dep_idx);
|
||||
update_onednn_post_op_list(op_type, dep_idx, dnnl::memory::format_tag::ab);
|
||||
} else if (is_type<gemm>()) {
|
||||
size_t rank = cldnn::format::dimension(in.format);
|
||||
dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in.batch() == 1);
|
||||
dnnl::memory::data_type dt = onednn::convert_data_type(in.data_type);
|
||||
dnnl::memory::format_tag fmt = onednn::convert_gemm_data_format(dims);
|
||||
post_ops.append_binary(alg, dnnl::memory::desc(dims, dt, fmt));
|
||||
update_onednn_post_op_list(op_type, dep_idx);
|
||||
update_onednn_post_op_list(op_type, dep_idx, fmt, false, dims, dt);
|
||||
} else {
|
||||
post_ops.append_binary(alg, onednn::layout_to_memory_desc(in));
|
||||
update_onednn_post_op_list(op_type, dep_idx);
|
||||
|
||||
@@ -78,7 +78,7 @@ public:
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
class FullyConnectedFusingTestOneDNN : public BaseFusingTest<fully_connected_test_params> {
|
||||
public:
|
||||
void execute(fully_connected_test_params& p) {
|
||||
void execute(fully_connected_test_params& p, bool is_caching_test = false) {
|
||||
// Onednn post operation has issue in a machine that does not support imad.
|
||||
if (!engine.get_device_info().supports_immad)
|
||||
return;
|
||||
@@ -103,12 +103,12 @@ public:
|
||||
ov::intel_gpu::ImplementationDesc fc_ocl_impl = { ocl_forcing_format, p.ocl_kernel_name /*fully_connected_gpu_bfyx_ref*/};
|
||||
cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "fc_prim", fc_ocl_impl } }));
|
||||
}
|
||||
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, cfg_fused);
|
||||
network_fused.set_input_data("input", input_prim);
|
||||
network_not_fused.set_input_data("input", input_prim);
|
||||
network::ptr network_not_fused = get_network(this->engine, this->topology_non_fused, cfg_not_fused, get_test_stream_ptr(), is_caching_test);
|
||||
network::ptr network_fused = get_network(this->engine, this->topology_fused, cfg_fused, get_test_stream_ptr(), is_caching_test);
|
||||
network_fused->set_input_data("input", input_prim);
|
||||
network_not_fused->set_input_data("input", input_prim);
|
||||
|
||||
compare(network_not_fused, network_fused, p);
|
||||
compare(*network_not_fused, *network_fused, p);
|
||||
}
|
||||
|
||||
layout get_input_layout(fully_connected_test_params& p) {
|
||||
@@ -440,6 +440,25 @@ TEST_P(fc_int8_inputs_fused_fp32_sum, basic) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(fc_int8_inputs_fused_fp32_sum, basic_cached) {
|
||||
auto p = GetParam();
|
||||
auto shift_layout = layout{ ov::PartialShape{p.weights_shape[0]}, p.default_type, p.default_format };
|
||||
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("weights", get_mem(get_weights_layout(p))),
|
||||
data("bias", get_mem(get_bias_layout(p))),
|
||||
data("shift_data", get_mem(shift_layout, 1)),
|
||||
fully_connected("fc_prim", input_info("input"), "weights", "bias", cldnn::data_types::f32, padding(), get_output_dim_size(p), get_input_weights_rank(p)),
|
||||
eltwise("shift", { input_info("fc_prim"), input_info("shift_data") }, eltwise_mode::sum, cldnn::data_types::f32),
|
||||
crop("crop", input_info("shift"), get_output_layout(p).get_tensor(), { 0, 0, 0, 0 }),
|
||||
reorder("reorder_bfyx", input_info("crop"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
tolerance = 1.f;
|
||||
execute(p, true);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, fc_int8_inputs_fused_fp32_sum, ::testing::ValuesIn(std::vector<fully_connected_test_params>{
|
||||
// OneDNN has issue with small shapes - ticket 7064
|
||||
// fully_connected_test_params{ CASE_FC_U8S8_3D_1, 2, 4 },
|
||||
|
||||
@@ -40,7 +40,7 @@ struct gemm_test_params {
|
||||
class GemmFusingTest : public ::BaseFusingTest<gemm_test_params> {
|
||||
public:
|
||||
|
||||
void execute(gemm_test_params& p) {
|
||||
void execute(gemm_test_params& p, bool is_caching_test = false) {
|
||||
auto input0_prim = get_mem(get_input_layout(p, 0));
|
||||
auto input1_prim = get_mem(get_input_layout(p, 1));
|
||||
|
||||
@@ -51,19 +51,19 @@ public:
|
||||
cfg_not_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ {"gemm_prim", gemm_ref_impl} }));
|
||||
}
|
||||
|
||||
network network_not_fused(this->engine, this->topology_non_fused, cfg_not_fused);
|
||||
network network_fused(this->engine, this->topology_fused, cfg_fused);
|
||||
network_fused.set_input_data("input0", input0_prim);
|
||||
network_not_fused.set_input_data("input0", input0_prim);
|
||||
network_fused.set_input_data("input1", input1_prim);
|
||||
network_not_fused.set_input_data("input1", input1_prim);
|
||||
network::ptr network_not_fused = get_network(this->engine, this->topology_non_fused, cfg_not_fused, get_test_stream_ptr(), is_caching_test);
|
||||
network::ptr network_fused = get_network(this->engine, this->topology_fused, cfg_fused, get_test_stream_ptr(), is_caching_test);
|
||||
network_fused->set_input_data("input0", input0_prim);
|
||||
network_not_fused->set_input_data("input0", input0_prim);
|
||||
network_fused->set_input_data("input1", input1_prim);
|
||||
network_not_fused->set_input_data("input1", input1_prim);
|
||||
if (p.in_shapes.size() > 2) {
|
||||
auto input2_prim = get_mem(get_input_layout(p, 2));
|
||||
network_fused.set_input_data("input2", input2_prim);
|
||||
network_not_fused.set_input_data("input2", input2_prim);
|
||||
network_fused->set_input_data("input2", input2_prim);
|
||||
network_not_fused->set_input_data("input2", input2_prim);
|
||||
}
|
||||
|
||||
compare(network_not_fused, network_fused, p);
|
||||
compare(*network_not_fused, *network_fused, p);
|
||||
}
|
||||
|
||||
layout get_input_layout(gemm_test_params& p, int in_no) {
|
||||
@@ -317,6 +317,38 @@ TEST_P(gemm_2in_add, eltwise_postop) {
|
||||
execute(p);
|
||||
}
|
||||
|
||||
TEST_P(gemm_2in_add, eltwise_postop_cached) {
|
||||
auto p = GetParam();
|
||||
|
||||
if (engine.get_device_info().supports_immad) {
|
||||
ov::intel_gpu::ImplementationDesc gemmv_impl = { cldnn::format::type::any, "", impl_types::onednn };
|
||||
cfg_fused.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "gemm_prim", gemmv_impl } }));
|
||||
}
|
||||
|
||||
auto add_data_layout = get_output_layout(p);
|
||||
auto add_data_size = add_data_layout.get_tensor();
|
||||
if (p.broadcast_kind == dim_vec_kind::batch)
|
||||
add_data_size.batch[0] = 1;
|
||||
else
|
||||
add_data_size.feature[0] = 1;
|
||||
add_data_layout.set_tensor(add_data_size);
|
||||
|
||||
auto in_layout0 = get_input_layout(p, 0);
|
||||
auto in_layout1 = get_input_layout(p, 1);
|
||||
|
||||
create_topologies(
|
||||
input_layout("input0", in_layout0),
|
||||
input_layout("input1", in_layout1),
|
||||
data("add_data", get_mem(add_data_layout, 1.0f/p.kernel.count())),
|
||||
gemm("gemm_prim", { input_info("input0"), input_info("input1") }, data_types::f32, false, false, 1.f, 0.f, in_layout0.get_rank(), in_layout1.get_rank()),
|
||||
eltwise("add_prim", { input_info("gemm_prim"), input_info("add_data") }, p.eltwise_m, p.default_type),
|
||||
reorder("reorder_bfyx", input_info("add_prim"), p.default_format, data_types::f32)
|
||||
);
|
||||
|
||||
tolerance = default_tolerance(p.default_type);
|
||||
execute(p, true);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, gemm_2in_add, ::testing::ValuesIn(std::vector<gemm_test_params>{
|
||||
gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::batch, eltwise_mode::sum },
|
||||
gemm_test_params{ CASE_GEMM_2IN_FP16_5, 3, 4, "", dim_vec_kind::batch, eltwise_mode::prod },
|
||||
|
||||
Reference in New Issue
Block a user