Optimized permute kernel cannot be fused with both eltwise + reorder. (#15542)
This commit is contained in:
parent
ac1e885324
commit
b9107ac7ca
@ -187,7 +187,7 @@ JitConstants PermuteKernel_tile_8x8_4x4::GetJitConstants(const permute_params& p
|
||||
jit.AddConstant(MakeJitConstant("TRANS_BUF_SIZE", (tile_size / vector_width) * tile_size * total_lws) );
|
||||
|
||||
if (!params.fused_ops.empty()) {
|
||||
std::vector<std::string> output_order = GetFusedOpOrderVector(params.outputs[0].GetDims().size());
|
||||
std::vector<std::string> output_order = GetFusedOpOrderVector(params.inputs[0].GetDims().size());
|
||||
FusedOpsConfiguration conf = {"", output_order, "input_var", params.inputs[0].GetDType(), 1};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
}
|
||||
@ -248,6 +248,12 @@ CommonDispatchData PermuteKernel_tile_8x8_4x4::SetDefault(const permute_params&
|
||||
bool PermuteKernel_tile_8x8_4x4::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!Parent::Validate(p, o)) return false;
|
||||
|
||||
const permute_params& params = static_cast<const permute_params&>(p);
|
||||
|
||||
if (params.outputs[0].PitchesDifferFromLogicalDims() || params.inputs[0].PitchesDifferFromLogicalDims()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::function<bool(const std::vector<uint16_t>&)> is_rotating_except_batch = [](const std::vector<uint16_t>& order) {
|
||||
// Target transform: Rotate feature dim to back to be taken as inner-most axis
|
||||
// ex) 0(b), 4(f), 1(z), 2(y), 3(x)
|
||||
@ -260,15 +266,22 @@ bool PermuteKernel_tile_8x8_4x4::Validate(const Params& p, const optional_params
|
||||
return true;
|
||||
};
|
||||
|
||||
const permute_params& params = static_cast<const permute_params&>(p);
|
||||
|
||||
if (!is_rotating_except_batch(params.order)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (params.outputs[0].PitchesDifferFromLogicalDims() || params.inputs[0].PitchesDifferFromLogicalDims()) {
|
||||
std::function<bool(const permute_params&)> has_fused_op = [] (const permute_params& params) {
|
||||
if (!params.fused_ops.empty()) {
|
||||
for (auto f : params.fused_ops) {
|
||||
if (f.GetType() != KernelType::REORDER)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (has_fused_op(params) && params.inputs[0].GetDims().size() != params.outputs[0].GetDims().size())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -79,6 +79,16 @@ public:
|
||||
layout get_input_layout(permute_reorder_params& p) {
|
||||
return layout{ p.permute_type, p.permute_format, p.in_shape, padding{} };
|
||||
}
|
||||
|
||||
layout get_elt_input_layout(permute_reorder_params&p) {
|
||||
ov::Shape output_shape;
|
||||
auto input_shape = get_input_layout(p).get_dims();
|
||||
for (int32_t o = 0; o < static_cast<int32_t>(p.permute_order1.size()); ++o) {
|
||||
output_shape.push_back(input_shape[p.permute_order1[o]]);
|
||||
}
|
||||
return layout{ ov::PartialShape(output_shape), p.permute_type, p.permute_format, padding{} };
|
||||
}
|
||||
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@ -500,6 +510,7 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_scale_eltwise_actv_scale_actv, ::t
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_9 { 1, 24, 2, 3, 256 }, { 0, 2, 3, 4, 1 }, { 0, 3, 1, 2 }, data_types::f16, data_types::f32, format::bfzyx, format::bfyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_10 { 1, 35, 3, 253 }, { 0, 2, 3, 1 }, { 0, 4, 1, 3, 2 }, data_types::f16, data_types::f32, format::bfyx, format::bfzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_11 { 1, 32, 3, 253 }, { 0, 2, 3, 1 }, { 0, 5, 1, 4, 2, 3 }, data_types::f16, data_types::f32, format::bfyx, format::bfwzyx
|
||||
#define CASE_PERMUTE_REORDER_TILED_F16_12 { 1, 768, 32, 32 }, { 0, 2, 3, 1 }, { 0, 4, 1, 3, 2}, data_types::f16, data_types::f32, format::bfyx, format::bfzyx
|
||||
|
||||
class permute_redundant_reorder : public PermuteReorderFusingTest {};
|
||||
TEST_P(permute_redundant_reorder, basic) {
|
||||
@ -590,3 +601,30 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_act_reorder, ::testing::ValuesIn(s
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_10, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_11, 3, 5 },
|
||||
}));
|
||||
|
||||
class permute_eltwise_reorder : public PermuteReorderFusingTest {};
|
||||
|
||||
TEST_P(permute_eltwise_reorder, basic) {
|
||||
auto p = GetParam();
|
||||
create_topologies(
|
||||
input_layout("input", get_input_layout(p)),
|
||||
data("elt_data", get_mem(get_elt_input_layout(p))),
|
||||
permute("permute1", input_info("input"), p.permute_order1),
|
||||
eltwise("elt", { input_info("permute1"), input_info("elt_data") }, eltwise_mode::sum, p.permute_type),
|
||||
reorder("reorder1", input_info("elt"), p.output_format, p.output_type), // to be fused to prev permute
|
||||
permute("permute2", input_info("reorder1"), p.permute_order2) // dummy last op to make reorder fused
|
||||
);
|
||||
|
||||
tolerance = 1e-5f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
// Tiled opt kernel should not be fused with eltwise + reorder. Currently permute_ref will be selected and fused with eltwise + reorder
|
||||
INSTANTIATE_TEST_SUITE_P(fusings_gpu, permute_eltwise_reorder, ::testing::ValuesIn(std::vector<permute_reorder_params>{
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_7, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_8, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_9, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_10, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_11, 3, 5 },
|
||||
permute_reorder_params{ CASE_PERMUTE_REORDER_TILED_F16_12, 3, 5 },
|
||||
}));
|
||||
|
Loading…
Reference in New Issue
Block a user