diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp index 6c5f8f3ec8f..8abf3cd2300 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp @@ -9,6 +9,14 @@ namespace kernel_selector { +static inline bool IsBroadcastingPossibleInput(const DataTensor& input, const DataTensor& output) { + if ((input.LogicalSize() == 1) || + (input.LogicalSize() == output.Feature().v && input.Feature().v == output.Feature().v)) { + return true; + } + return false; +} + ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); @@ -34,7 +42,7 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const { static inline size_t GetBlockSize(const eltwise_params& params) { // Set blocksize 1 when broadcasting X dim for (size_t i = 0; i < params.inputs.size(); i++) { - if (params.inputs[i].X().v == 1 && params.inputs[i].LogicalSize() != 1) { + if ((params.inputs[i].X().v == 1) && !IsBroadcastingPossibleInput(params.inputs[i], params.output)) { return 1; } } @@ -56,9 +64,9 @@ static inline bool OpHasFeatureBroadcast(const eltwise_params& params, const siz for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) { const auto &input = ew.inputs[input_idx]; if (input.mode == EltwiseInputMode::INPUT_BUFFER) { - if (params.inputs[input_idx].LogicalSize() != 1 - && params.inputs[input_idx].Feature().v == 1 - && params.output.Feature().v != 1) { + if (params.inputs[input_idx].LogicalSize() != 1 && + params.inputs[input_idx].Feature().v == 1 && + params.output.Feature().v != 1) { return true; } } @@ -193,31 +201,45 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::GetJitConstants(const eltwise_params& jit.Merge(MakeFusedOpsJitConstants(params, {conf})); } - jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast)); + if (params.broadcast) { + bool need_idx_safe = true; + for (size_t i = 0; i < params.inputs.size(); i++) { + if (IsBroadcastingPossibleInput(params.inputs[i], params.output)) { + need_idx_safe = false; + break; + } + } + if (need_idx_safe) + jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast)); + } return jit; } -bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_params& o) const { - if (!EltwiseKernelBase::Validate(params, o)) { +bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const { + if (!EltwiseKernelBase::Validate(p, o)) { return false; } - const auto& ewParams = static_cast(params); + const auto& params = static_cast(p); - const auto& output = ewParams.output; + const auto count = params.output.PhysicalSize(); - for (size_t i = 0; i < ewParams.inputs.size(); i++) { - if (ewParams.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16 && GetBlockSize(ewParams) != 1) { + if (count % 8 != 0) + return false; + + for (size_t i = 0; i < params.inputs.size(); i++) { + if ((params.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16) && + !IsBroadcastingPossibleInput(params.inputs[i], params.output)) { return false; } } - auto input0 = ewParams.inputs[0]; + auto input0 = params.inputs[0]; // Check that padding before features doesn't miss-align the blocks auto feature_block_size = 16; - if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) { + if (input0.Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0) { return false; } @@ -240,10 +262,10 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_ return same; }; - for (size_t i = 1; i < ewParams.inputs.size(); i++) { - if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0))) + for (size_t i = 1; i < params.inputs.size(); i++) { + if (params.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(params.inputs[i], input0))) return false; - if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) { + if (params.inputs[i].Feature().pad.before % feature_block_size != 0) { return false; } } diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp index 9624213018d..eb70de8ef5f 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp @@ -3198,9 +3198,6 @@ using eltwise_test_params = std::tuple class BaseEltwiseTest : public ::testing::TestWithParam { -}; - -class eltwise_test : public BaseEltwiseTest { public: template VF eltwise_ref(VVVVVVF input0, VVVVVVF input1, tensor input0_size, tensor input1_size, eltwise_mode mode) { @@ -3253,6 +3250,9 @@ public: } }; +class eltwise_test : public BaseEltwiseTest { +}; + TEST_P(eltwise_test, fsv16) { auto p = GetParam(); @@ -3322,6 +3322,7 @@ TEST_P(eltwise_test, fsv16) { } } + static std::vector modes = {eltwise_mode::sum, eltwise_mode::prod}; static std::vector types = {data_types::f32, data_types::f16}; static std::vector>> inputs = { @@ -3520,3 +3521,102 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision, ::testing::ValuesIn(mixed_types), ::testing::ValuesIn(inputs) ), ); + + +struct eltwise_layout_test_params { + eltwise_mode mode; + std::vector input0_size; + std::vector input1_size; + format input0_format; + format input1_format; + std::string selected_kernel_name; +}; + +#define CASE_ELTWISE_TEST1 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST2 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16" +#define CASE_ELTWISE_TEST3 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST4 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16" +#define CASE_ELTWISE_TEST5 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST6 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST7 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref" +#define CASE_ELTWISE_TEST8 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref" + +class eltwise_layout_test : public BaseEltwiseTest { +}; + +class eltwise_test_mixed_layout : public eltwise_layout_test {}; +TEST_P(eltwise_test_mixed_layout, mixed_layout) { + auto p = GetParam(); + + auto mode = p.mode; + auto input0_size = p.input0_size; + auto input1_size = p.input1_size; + auto format0 = p.input0_format; + auto format1 = p.input1_format; + auto selected_kernel = p.selected_kernel_name; + + int b0 = input0_size[0]; + int f0 = input0_size[1]; + int y0 = input0_size[2]; + int x0 = input0_size[3]; + + int b1 = input1_size[0]; + int f1 = input1_size[1]; + int y1 = input1_size[2]; + int x1 = input1_size[3]; + + int min_random = -2, max_random = 2; + VVVVVVF input1_rnd = generate_random_6d(b0, f0, 1, 1, y0, x0, min_random, max_random); + VVVVVVF input2_rnd = generate_random_6d(b1, f1, 1, 1, y1, x1, min_random, max_random); + VF input1_rnd_vec = flatten_6d(format::bfwzyx, input1_rnd); + VF input2_rnd_vec = flatten_6d(format::bfwzyx, input2_rnd); + + const auto& engine = get_test_engine(); + auto in0_size = tensor(format::bfyx, input0_size); + auto in1_size = tensor(format::bfyx, input1_size); + + auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, in0_size }); + auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, in1_size }); + set_values(input1, input1_rnd_vec); + set_values(input2, input2_rnd_vec); + + topology topology; + topology.add(input_layout("input1", input1.get_layout())); + topology.add(input_layout("input2", input2.get_layout())); + topology.add(reorder("reorder1", "input1", format0, data_types::f32)); + topology.add(reorder("reorder2", "input2", format1, data_types::f32)); + topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode)); + topology.add(reorder("out", "eltwise", format::bfyx, data_types::f32)); + primitive_id out_id = "out"; + + network network(engine, topology); + + network.set_input_data("input1", input1); + network.set_input_data("input2", input2); + auto outputs = network.execute(); + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, out_id); + + EXPECT_TRUE(network.get_primitive_info("eltwise").find(selected_kernel) != std::string::npos); + + auto output_memory = outputs.at(out_id).get_memory(); + auto output_ptr = output_memory.pointer(); + + VF output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode); + for (size_t i = 0; i < output_cpu_vec.size(); ++i) { + EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i]))); + ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]); + } +} + +INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_layout, + ::testing::ValuesIn(std::vector{ + eltwise_layout_test_params{CASE_ELTWISE_TEST1}, + eltwise_layout_test_params{CASE_ELTWISE_TEST2}, + eltwise_layout_test_params{CASE_ELTWISE_TEST3}, + eltwise_layout_test_params{CASE_ELTWISE_TEST4}, + eltwise_layout_test_params{CASE_ELTWISE_TEST5}, + eltwise_layout_test_params{CASE_ELTWISE_TEST6}, + eltwise_layout_test_params{CASE_ELTWISE_TEST7}, + eltwise_layout_test_params{CASE_ELTWISE_TEST8}, + }), );