[GPU] fix issues of MobileFaceNet for dynamic shape (#18171)
* fix issues of MobileFaceNet for dynamic shape * update unit test
This commit is contained in:
parent
9e91076a63
commit
f306a11b82
@ -1063,6 +1063,11 @@ void prepare_primitive_fusing::fuse_simple_primitives(program &p) {
|
||||
auto eltw_in_size = peer_node->get_output_layout();
|
||||
if (eltw_in_size.is_dynamic())
|
||||
return;
|
||||
// When input rank > 4, fused eltwise to gemm should be converted to 4 dim in init_onednn_primitive_attribute()
|
||||
// But current init_onednn_primitive_attribute() cannot handle dynamic shape case.
|
||||
auto eltw_in_rank = fused_node->get_output_layout().get_rank();
|
||||
if ((fused_node->is_type<gemm>()) && (eltw_in_rank > 4))
|
||||
return;
|
||||
}
|
||||
if (parent1.first->is_type<convolution>() && !conv_supports_fusings(parent1.first->as<convolution>()))
|
||||
return;
|
||||
|
@ -1296,7 +1296,9 @@ bool primitive_inst::is_valid_fusion() const {
|
||||
|
||||
auto outer_dep_pshape = outer_dep.first->_impl_params->get_output_layout().get_partial_shape();
|
||||
auto merged_shape = out_pshape;
|
||||
auto can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc<eltwise>()->broadcast_spec);
|
||||
bool can_broadcast = true;
|
||||
if (fd.is_type<eltwise>())
|
||||
can_broadcast = ov::PartialShape::broadcast_merge_into(merged_shape, outer_dep_pshape, fd.typed_desc<eltwise>()->broadcast_spec);
|
||||
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
// WA for OneDNN binary add fusions: we need to broadcast batch dimension to avoid situation with
|
||||
|
@ -965,7 +965,7 @@ void program_node::init_onednn_primitive_attributes() {
|
||||
if (fused_desc->activation_function == cldnn::activation_func::relu_negative_slope
|
||||
&& !fused_desc->additional_params_input.empty()) {
|
||||
auto dep_idx = cldnn_post_ops[idx].outer_dep_start_idx;
|
||||
int oc_dim = static_cast<int>(desc.output_layout.get_tensor().feature.size());
|
||||
auto oc_dim = static_cast<int>(desc.output_layout.get_partial_shape()[1].get_max_length());
|
||||
post_ops.append_prelu(1 << oc_dim);
|
||||
update_onednn_post_op_list(onednn_post_op_type::binary_relu, dep_idx);
|
||||
} else if (fused_desc->activation_function == cldnn::activation_func::hard_sigmoid) {
|
||||
|
@ -0,0 +1,93 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/reorder.hpp>
|
||||
#include <intel_gpu/primitives/data.hpp>
|
||||
|
||||
#include "program_wrapper.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
namespace is_valid_fusion_tests {
|
||||
TEST(eltwise_activation_fusing_test, basic_dynamic_rank4) {
|
||||
// is_valid_fusion() should work properly when conv->add->prelu case
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
layout weight_layout = layout{ov::PartialShape{1, 3, 3, 3}, data_types::f32, format::bfyx};
|
||||
auto weights = engine.allocate_memory(weight_layout);
|
||||
set_values<FLOAT16>(weights, {
|
||||
1.0f, 1.0f, 1.0f,
|
||||
1.0f, 1.0f, 1.0f,
|
||||
1.0f, 1.0f, 1.0f,
|
||||
//
|
||||
2.0f, 2.0f, 2.0f,
|
||||
2.0f, 2.0f, 2.0f,
|
||||
2.0f, 2.0f, 2.0f,
|
||||
//
|
||||
3.0f, 3.0f, 3.0f,
|
||||
3.0f, 3.0f, 3.0f,
|
||||
3.0f, 3.0f, 3.0f,
|
||||
});
|
||||
|
||||
layout in_layout = layout{ov::PartialShape{1, 3, 2, 2}, data_types::f32, format::bfyx};
|
||||
auto input_mem = engine.allocate_memory(in_layout);
|
||||
set_values(input_mem, {11.0f, 11.0f, 11.0f, 11.0f,
|
||||
11.0f, 11.0f, 11.0f, 11.0f,
|
||||
11.0f, 11.0f, 11.0f, 11.0f});
|
||||
std::vector<float> ref = { 33.0625f, 55.09375f, 55.09375f, 33.0625f,
|
||||
55.09375f, 99.1875f, 429.75f, 385.75f,
|
||||
385.75f, 760.5f, 1091.0f, 716.5f,
|
||||
363.75f, 716.5f, 716.5f, 363.75f};
|
||||
|
||||
auto const1 = engine.allocate_memory(layout{ov::PartialShape({1, 1, 1, 1}), data_types::f32, format::bfyx});
|
||||
set_values(const1, {11.0f});
|
||||
auto const2 = engine.allocate_memory(layout{ov::PartialShape({1, 1, 1, 1}), data_types::f32, format::bfyx});
|
||||
set_values(const2, {0.1f});
|
||||
std::vector<float> values_to_subtract = {};
|
||||
|
||||
auto in_layout_0 = layout{ov::PartialShape::dynamic(4), data_types::f32, format::bfyx};
|
||||
topology topology(input_layout("input", in_layout_0),
|
||||
data("weights", weights),
|
||||
data("const1", const1),
|
||||
data("const2", const2),
|
||||
reorder("reorder", input_info("input"), format::bfyx, data_types::f16,
|
||||
values_to_subtract, reorder_mean_mode::subtract, padding{{0, 0, 2, 2}, 0}),
|
||||
convolution("conv",
|
||||
input_info("reorder"),
|
||||
"weights",
|
||||
"", /*bias*/
|
||||
1,
|
||||
{1, 1}, /*stride*/
|
||||
{1, 1}, /*dilation*/
|
||||
{2, 2}, /*pad_above*/
|
||||
{2, 2}, /*pad_below*/
|
||||
false,
|
||||
ov::op::PadType::EXPLICIT,
|
||||
padding{{0, 0, 0, 0}, 0}),
|
||||
eltwise("eltwise", input_info("conv"), input_info("const1"), eltwise_mode::sum),
|
||||
activation("prelu", input_info("eltwise"), "const2", activation_func::relu_negative_slope),
|
||||
reorder("output", input_info("prelu"), format::bfyx, data_types::f32));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
|
||||
network network(engine, topology, config);
|
||||
network.set_input_data("input", input_mem);
|
||||
auto outputs = network.execute();
|
||||
auto output_mem = outputs.begin()->second.get_memory();
|
||||
cldnn::mem_lock<float> output_mem_ptr(output_mem, get_test_stream());
|
||||
|
||||
for (size_t i = 0; i < output_mem->get_layout().get_buffer_size().count(); ++i) {
|
||||
ASSERT_EQ(output_mem_ptr[i], ref[i]);
|
||||
}
|
||||
}
|
||||
} // is_valid_fusion_tests
|
@ -0,0 +1,60 @@
|
||||
// Copyright (C) 2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "test_utils.h"
|
||||
|
||||
#include "intel_gpu/runtime/engine.hpp"
|
||||
|
||||
#include "intel_gpu/graph/network.hpp"
|
||||
#include "intel_gpu/graph/program.hpp"
|
||||
#include "data_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "activation_inst.h"
|
||||
#include "reorder_inst.h"
|
||||
#include "convolution_inst.h"
|
||||
#include "pass_manager.h"
|
||||
#include "to_string_utils.h"
|
||||
|
||||
#include "program_wrapper.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
using namespace cldnn;
|
||||
using namespace ::tests;
|
||||
|
||||
TEST(add_onednn_optimization_attributes, init_attribute_for_fused_onednn_primitive) {
|
||||
auto& engine = get_test_engine();
|
||||
|
||||
auto in_layout = layout{ov::PartialShape({-1, 3, 112, 112}), data_types::f16, format::bfyx};
|
||||
auto input = engine.allocate_memory(layout{ov::PartialShape({1, 3, 112, 112}), data_types::f16, format::bfyx});
|
||||
auto weight = engine.allocate_memory(layout{ov::PartialShape({128, 3, 3, 3}), data_types::f16, format::bfyx});
|
||||
auto const1 = engine.allocate_memory(layout{ov::PartialShape({1, 128, 1, 1}), data_types::f16, format::bfyx});
|
||||
auto const2 = engine.allocate_memory(layout{ov::PartialShape({1, 128, 1, 1}), data_types::f16, format::bfyx});
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input", in_layout));
|
||||
topology.add(data("weight", weight));
|
||||
topology.add(data("const1", const1));
|
||||
topology.add(data("const2", const2));
|
||||
topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false));
|
||||
topology.add(eltwise("eltwise", input_info("convolution"), input_info("const1"), eltwise_mode::sum));
|
||||
topology.add(activation("prelu", input_info("eltwise"), "const2", activation_func::relu_negative_slope));
|
||||
topology.add(reorder("reorder", input_info("prelu"), format::bfyx, data_types::f32));
|
||||
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, false);
|
||||
|
||||
layout_optimizer lo(true);
|
||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
|
||||
|
||||
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
|
||||
program_wrapper::apply_opt_pass<add_onednn_optimization_attributes>(*prog);
|
||||
|
||||
ASSERT_NE(prog, nullptr);
|
||||
ASSERT_FALSE(has_node(*prog, "eltwise"));
|
||||
ASSERT_FALSE(has_node(*prog, "prelu"));
|
||||
}
|
@ -515,3 +515,38 @@ TEST(prepare_primitive_fusing, eltwise_fusing_residual_connection) {
|
||||
net.execute();
|
||||
ASSERT_TRUE(conv_inst->has_unfused_subgraph());
|
||||
}
|
||||
|
||||
TEST(prepare_primitive_fusing, dont_fuse_eltwise_to_onednn_gemm_dyn_rank5) {
|
||||
auto& engine = get_test_engine();
|
||||
if (!engine.get_device_info().supports_immad)
|
||||
return;
|
||||
ov::Shape input1_shape = { 2, 2, 2, 2, 2};
|
||||
ov::Shape input2_shape = { 2, 2, 2, 2, 2};
|
||||
auto input1_layout = layout{ov::PartialShape::dynamic(input1_shape.size()), data_types::f32, format::bfzyx};
|
||||
auto input2_layout = layout{ov::PartialShape::dynamic(input2_shape.size()), data_types::f32, format::bfzyx};
|
||||
auto input1 = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f32, format::bfzyx});
|
||||
auto input2 = engine.allocate_memory(layout{ov::PartialShape(input2_shape), data_types::f32, format::bfzyx});
|
||||
auto const_layout = layout{ ov::PartialShape{2, 2, 2, 2, 2}, data_types::f32, format::bfzyx };
|
||||
auto const_mem = engine.allocate_memory(const_layout);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input1", input1_layout));
|
||||
topology.add(input_layout("input2", input2_layout));
|
||||
topology.add(data("const", const_mem));
|
||||
topology.add(gemm("gemm", { input_info("input1"), input_info("input2") }, data_types::f32));
|
||||
topology.add(eltwise("add", { input_info("gemm"), input_info("const") }, eltwise_mode::sum));
|
||||
topology.add(reorder("reorder", input_info("add"), format::bfzyx, data_types::f16));
|
||||
|
||||
ExecutionConfig config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::optimize_data(true));
|
||||
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
|
||||
auto prog = program::build_program(engine, topology, config, false, true);
|
||||
|
||||
layout_optimizer lo(true);
|
||||
lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::use_onednn_impls, true);
|
||||
|
||||
program_wrapper::apply_opt_pass<prepare_primitive_fusing>(*prog, lo);
|
||||
|
||||
ASSERT_NE(prog, nullptr);
|
||||
ASSERT_TRUE(has_node(*prog, "add"));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user