From 534a7b81a97c81365e507775fcd815ef86335a41 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Fri, 2 Oct 2020 16:52:00 +0300 Subject: [PATCH] [IE CLDNN] Fix eltwise fused ops in NO_PITCH_SAME_DIMS mode (#2454) --- .../eltwise/eltwise_kernel_ref.cpp | 5 ++-- .../core/cl_kernels/generic_eltwise_ref.cl | 9 ++++-- .../core/common/kernel_selector_utils.cpp | 13 +++++++++ .../tests/test_cases/fusings_gpu_test.cpp | 28 +++++++++++++++++++ 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp index da116fa7651..5ad8d92505d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp @@ -70,8 +70,9 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"}; } - FusedOpsConfiguration conf = {"", idx_order, "res", input_dt, 1}; - jit.Merge(MakeFusedOpsJitConstants(params, {conf})); + FusedOpsConfiguration tensor_coord = {"_TENSOR", idx_order, "res", input_dt, 1}; + FusedOpsConfiguration linear_coord = {"_LINEAR", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET}; + jit.Merge(MakeFusedOpsJitConstants(params, {tensor_coord, linear_coord})); } return jit; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl index e2295ff5524..e8f7b1fa12d 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl @@ -117,8 +117,13 @@ KERNEL(eltwise)( DO_ELTWISE; #if HAS_FUSED_OPS - FUSED_OPS; - OUTPUT_TYPE out = FUSED_OPS_RESULT; + #if ELTWISE_NO_PITCH_SAME_DIMS + FUSED_OPS_LINEAR; + OUTPUT_TYPE out = FUSED_OPS_RESULT_LINEAR; + #else + FUSED_OPS_TENSOR; + OUTPUT_TYPE out = FUSED_OPS_RESULT_TENSOR; + #endif #else #define out res #endif diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp index 106ec8b3fa9..926f100ec20 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp @@ -237,6 +237,19 @@ bool CheckInputsOutputNoPitchSameDims(const base_params& params) { (params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0)) return false; + if (params.fused_ops.size()) { + for (auto fused_op : params.fused_ops) { + for (size_t in = 0; in < fused_op.tensors.size(); in++) { + if (fused_op.tensors[in].LogicalSize() == 1) + continue; + if ((fused_op.tensors[in].GetLayout() == DataLayout::b_fs_yx_fsv16 && fused_op.tensors[in].Feature().v % 16 != 0) || + (fused_op.tensors[in].GetLayout() == DataLayout::b_fs_zyx_fsv16 && fused_op.tensors[in].Feature().v % 16 != 0)) + return false; + no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == fused_op.tensors[in]); + } + } + } + for (size_t i = 1; i < params.inputs.size(); i++) { no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]); diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp index 9a10f31d511..88e9f9b3656 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fusings_gpu_test.cpp @@ -6253,6 +6253,34 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu, eltwise_test_params{CASE_ELTWISE_U8_FP16_3}, }), ); +class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {}; +TEST_P(eltwise_no_pitches_same_dims_quantize, quantize_f32_output) { + auto p = GetParam(); + create_topologies(input_layout("input", get_input_layout(p)), + input_layout("input2", get_input_layout2(p)), + eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type), + data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)), + data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)), + data("out_lo", get_mem(get_single_element_layout(p), -128)), + data("out_hi", get_mem(get_single_element_layout(p), 127)), + quantize("quantize", "eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 256, p.input_type), + reorder("out", "quantize", p.default_format, data_types::f32)); + + tolerance = 1.f; + execute(p); +} + +INSTANTIATE_TEST_CASE_P(fusings_gpu, + eltwise_no_pitches_same_dims_quantize, + ::testing::ValuesIn(std::vector{ + eltwise_test_params{CASE_ELTWISE_FP16_1}, + eltwise_test_params{CASE_ELTWISE_FP16_2}, + eltwise_test_params{CASE_ELTWISE_FP16_3}, + eltwise_test_params{CASE_ELTWISE_FP32_1}, + eltwise_test_params{CASE_ELTWISE_FP32_2}, + eltwise_test_params{CASE_ELTWISE_FP32_3}, + }), ); + class eltwise_activation : public EltwiseFusingTest {}; TEST_P(eltwise_activation, basic) { auto p = GetParam();