[IE CLDNN] Fix eltwise fused ops in NO_PITCH_SAME_DIMS mode (#2454)

This commit is contained in:
Sergey Shlyapnikov 2020-10-02 16:52:00 +03:00 committed by GitHub
parent 6b456e58a1
commit 534a7b81a9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 4 deletions

View File

@ -70,8 +70,9 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con
idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"};
}
FusedOpsConfiguration conf = {"", idx_order, "res", input_dt, 1};
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
FusedOpsConfiguration tensor_coord = {"_TENSOR", idx_order, "res", input_dt, 1};
FusedOpsConfiguration linear_coord = {"_LINEAR", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET};
jit.Merge(MakeFusedOpsJitConstants(params, {tensor_coord, linear_coord}));
}
return jit;

View File

@ -117,8 +117,13 @@ KERNEL(eltwise)(
DO_ELTWISE;
#if HAS_FUSED_OPS
FUSED_OPS;
OUTPUT_TYPE out = FUSED_OPS_RESULT;
#if ELTWISE_NO_PITCH_SAME_DIMS
FUSED_OPS_LINEAR;
OUTPUT_TYPE out = FUSED_OPS_RESULT_LINEAR;
#else
FUSED_OPS_TENSOR;
OUTPUT_TYPE out = FUSED_OPS_RESULT_TENSOR;
#endif
#else
#define out res
#endif

View File

@ -237,6 +237,19 @@ bool CheckInputsOutputNoPitchSameDims(const base_params& params) {
(params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0))
return false;
if (params.fused_ops.size()) {
for (auto fused_op : params.fused_ops) {
for (size_t in = 0; in < fused_op.tensors.size(); in++) {
if (fused_op.tensors[in].LogicalSize() == 1)
continue;
if ((fused_op.tensors[in].GetLayout() == DataLayout::b_fs_yx_fsv16 && fused_op.tensors[in].Feature().v % 16 != 0) ||
(fused_op.tensors[in].GetLayout() == DataLayout::b_fs_zyx_fsv16 && fused_op.tensors[in].Feature().v % 16 != 0))
return false;
no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == fused_op.tensors[in]);
}
}
}
for (size_t i = 1; i < params.inputs.size(); i++) {
no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]);

View File

@ -6253,6 +6253,34 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu,
eltwise_test_params{CASE_ELTWISE_U8_FP16_3},
}), );
class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {};
TEST_P(eltwise_no_pitches_same_dims_quantize, quantize_f32_output) {
auto p = GetParam();
create_topologies(input_layout("input", get_input_layout(p)),
input_layout("input2", get_input_layout2(p)),
eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
data("out_lo", get_mem(get_single_element_layout(p), -128)),
data("out_hi", get_mem(get_single_element_layout(p), 127)),
quantize("quantize", "eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 256, p.input_type),
reorder("out", "quantize", p.default_format, data_types::f32));
tolerance = 1.f;
execute(p);
}
INSTANTIATE_TEST_CASE_P(fusings_gpu,
eltwise_no_pitches_same_dims_quantize,
::testing::ValuesIn(std::vector<eltwise_test_params>{
eltwise_test_params{CASE_ELTWISE_FP16_1},
eltwise_test_params{CASE_ELTWISE_FP16_2},
eltwise_test_params{CASE_ELTWISE_FP16_3},
eltwise_test_params{CASE_ELTWISE_FP32_1},
eltwise_test_params{CASE_ELTWISE_FP32_2},
eltwise_test_params{CASE_ELTWISE_FP32_3},
}), );
class eltwise_activation : public EltwiseFusingTest {};
TEST_P(eltwise_activation, basic) {
auto p = GetParam();