[GPU] fix issues of MobileFaceNet for dynamic shape (#18171)

* fix issues of MobileFaceNet for dynamic shape

* update unit test
This commit is contained in:
Wilson Seok 2023-06-26 17:22:15 +09:00 committed by GitHub
parent 9e91076a63
commit f306a11b82
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 197 additions and 2 deletions

View File

@ -1063,6 +1063,11 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
auto eltw_in_size = peer_node->get_output_layout();
if (eltw_in_size.is_dynamic())
return;
// When input rank > 4, fused eltwise to gemm should be converted to 4 dim in init_onednn_primitive_attribute()
// But current init_onednn_primitive_attribute() cannot handle dynamic shape case.
auto eltw_in_rank = fused_node->get_output_layout().get_rank();
if ((fused_node->is_type<gemm>()) && (eltw_in_rank > 4))
return;
}
if (parent1.first->is_type<convolution>() && !conv_supports_fusings(parent1.first->as<convolution>()))
return;

View File

@ -1296,7 +1296,9 @@ bool primitive_inst::is_valid_fusion() const {
auto outer_dep_pshape = outer_dep.first->_impl_params->get_output_layout().get_partial_shape();
auto merged_shape = out_pshape;
auto can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc<eltwise>()->broadcast_spec);
bool can_broadcast = true;
if (fd.is_type<eltwise>())
can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc<eltwise>()->broadcast_spec);
#ifdef ENABLE_ONEDNN_FOR_GPU
// WA for OneDNN binary add fusions: we need to broadcast batch dimension to avoid situation with

View File

@ -965,7 +965,7 @@ void program_node::init_onednn_primitive_attributes() {
if (fused_desc->activation_function == cldnn::activation_func::relu_negative_slope
&& !fused_desc->additional_params_input.empty()) {
auto dep_idx = cldnn_post_ops[idx].outer_dep_start_idx;
int oc_dim = static_cast<int>(desc.output_layout.get_tensor().feature.size());
auto oc_dim = static_cast<int>(desc.output_layout.get_partial_shape()[1].get_max_length());
post_ops.append_prelu(1 << oc_dim);
update_onednn_post_op_list(onednn_post_op_type::binary_relu, dep_idx);
} else if (fused_desc->activation_function == cldnn::activation_func::hard_sigmoid) {

View File

@ -0,0 +1,93 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/data.hpp>
#include "program_wrapper.h"
#include <cmath>
#include <algorithm>
using namespace cldnn;
using namespace ::tests;
namespace is_valid_fusion_tests {
TEST(eltwise_activation_fusing_test, basic_dynamic_rank4) {
// is_valid_fusion() should work properly when conv->add->prelu case
auto& engine = get_test_engine();
layout weight_layout = layout{ov::PartialShape{1, 3, 3, 3}, data_types::f32, format::bfyx};
auto weights = engine.allocate_memory(weight_layout);
set_values<FLOAT16>(weights, {
1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f,
//
2.0f, 2.0f, 2.0f,
2.0f, 2.0f, 2.0f,
2.0f, 2.0f, 2.0f,
//
3.0f, 3.0f, 3.0f,
3.0f, 3.0f, 3.0f,
3.0f, 3.0f, 3.0f,
});
layout in_layout = layout{ov::PartialShape{1, 3, 2, 2}, data_types::f32, format::bfyx};
auto input_mem = engine.allocate_memory(in_layout);
set_values(input_mem, {11.0f, 11.0f, 11.0f, 11.0f,
11.0f, 11.0f, 11.0f, 11.0f,
11.0f, 11.0f, 11.0f, 11.0f});
std::vector<float> ref = { 33.0625f, 55.09375f, 55.09375f, 33.0625f,
55.09375f, 99.1875f, 429.75f, 385.75f,
385.75f, 760.5f, 1091.0f, 716.5f,
363.75f, 716.5f, 716.5f, 363.75f};
auto const1 = engine.allocate_memory(layout{ov::PartialShape({1, 1, 1, 1}), data_types::f32, format::bfyx});
set_values(const1, {11.0f});
auto const2 = engine.allocate_memory(layout{ov::PartialShape({1, 1, 1, 1}), data_types::f32, format::bfyx});
set_values(const2, {0.1f});
std::vector<float> values_to_subtract = {};
auto in_layout_0 = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
topology topology(input_layout("input", in_layout_0),
data("weights", weights),
data("const1", const1),
data("const2", const2),
reorder("reorder", input_info("input"), format::bfyx, data_types::f16,
values_to_subtract, reorder_mean_mode::subtract, padding{{0, 0, 2, 2}, 0}),
convolution("conv",
input_info("reorder"),
"weights",
"", /*bias*/
1,
{1, 1}, /*stride*/
{1, 1}, /*dilation*/
{2, 2}, /*pad_above*/
{2, 2}, /*pad_below*/
false,
ov::op::PadType::EXPLICIT,
padding{{0, 0, 0, 0}, 0}),
eltwise("eltwise", input_info("conv"), input_info("const1"), eltwise_mode::sum),
activation("prelu", input_info("eltwise"), "const2", activation_func::relu_negative_slope),
reorder("output", input_info("prelu"), format::bfyx, data_types::f32));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network network(engine, topology, config);
network.set_input_data("input", input_mem);
auto outputs = network.execute();
auto output_mem = outputs.begin()->second.get_memory();
cldnn::mem_lock<float> output_mem_ptr(output_mem, get_test_stream());
for (size_t i = 0; i < output_mem->get_layout().get_buffer_size().count(); ++i) {
ASSERT_EQ(output_mem_ptr[i], ref[i]);
}
}
} // is_valid_fusion_tests

View File

@ -0,0 +1,60 @@
// Copyright (C) 2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "test_utils.h"
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/graph/network.hpp"
#include "intel_gpu/graph/program.hpp"
#include "data_inst.h"
#include "eltwise_inst.h"
#include "activation_inst.h"
#include "reorder_inst.h"
#include "convolution_inst.h"
#include "pass_manager.h"
#include "to_string_utils.h"
#include "program_wrapper.h"
#include <memory>
using namespace cldnn;
using namespace ::tests;
TEST(add_onednn_optimization_attributes, init_attribute_for_fused_onednn_primitive) {
auto& engine = get_test_engine();
auto in_layout = layout{ov::PartialShape({-1, 3, 112, 112}), data_types::f16, format::bfyx};
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 3, 112, 112}), data_types::f16, format::bfyx});
auto weight = engine.allocate_memory(layout{ov::PartialShape({128, 3, 3, 3}), data_types::f16, format::bfyx});
auto const1 = engine.allocate_memory(layout{ov::PartialShape({1, 128, 1, 1}), data_types::f16, format::bfyx});
auto const2 = engine.allocate_memory(layout{ov::PartialShape({1, 128, 1, 1}), data_types::f16, format::bfyx});
topology topology;
topology.add(input_layout("input", in_layout));
topology.add(data("weight", weight));
topology.add(data("const1", const1));
topology.add(data("const2", const2));
topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
topology.add(eltwise("eltwise", input_info("convolution"), input_info("const1"), eltwise_mode::sum));
topology.add(activation("prelu", input_info("eltwise"), "const2", activation_func::relu_negative_slope));
topology.add(reorder("reorder", input_info("prelu"), format::bfyx, data_types::f32));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
auto prog = program::build_program(engine, topology, config, false, false);
layout_optimizer lo(true);
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
program_wrapper::apply_opt_pass<add_onednn_optimization_attributes>(*prog);
ASSERT_NE(prog, nullptr);
ASSERT_FALSE(has_node(*prog, "eltwise"));
ASSERT_FALSE(has_node(*prog, "prelu"));
}

View File

@ -515,3 +515,38 @@ TEST(prepare_primitive_fusing, eltwise_fusing_residual_connection) {
net.execute();
ASSERT_TRUE(conv_inst->has_unfused_subgraph());
}
TEST(prepare_primitive_fusing, dont_fuse_eltwise_to_onednn_gemm_dyn_rank5) {
auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;
ov::Shape input1_shape = { 2, 2, 2, 2, 2};
ov::Shape input2_shape = { 2, 2, 2, 2, 2};
auto input1_layout = layout{ov::PartialShape::dynamic(input1_shape.size()), data_types::f32, format::bfzyx};
auto input2_layout = layout{ov::PartialShape::dynamic(input2_shape.size()), data_types::f32, format::bfzyx};
auto input1 = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f32, format::bfzyx});
auto input2 = engine.allocate_memory(layout{ov::PartialShape(input2_shape), data_types::f32, format::bfzyx});
auto const_layout = layout{ ov::PartialShape{2, 2, 2, 2, 2}, data_types::f32, format::bfzyx };
auto const_mem = engine.allocate_memory(const_layout);
topology topology;
topology.add(input_layout("input1", input1_layout));
topology.add(input_layout("input2", input2_layout));
topology.add(data("const", const_mem));
topology.add(gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32));
topology.add(eltwise("add", { input_info("gemm"), input_info("const") }, eltwise_mode::sum));
topology.add(reorder("reorder", input_info("add"), format::bfzyx, data_types::f16));
ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
auto prog = program::build_program(engine, topology, config, false, true);
layout_optimizer lo(true);
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
ASSERT_NE(prog, nullptr);
ASSERT_TRUE(has_node(*prog, "add"));
}