From f7e0d90292b32ac2a382c06b01ebb48f1a3816d8 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Tue, 19 Jan 2021 15:17:03 +0300 Subject: [PATCH] [IE CLDNN] Eltwise b_fs_yx_fsv16 mixed presicion support (#3734) --- .../eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp | 36 ++++++-- .../core/cl_kernels/eltwise_b_fs_yx_fsv16.cl | 6 +- .../tests/test_cases/eltwise_gpu_test.cpp | 88 ++++++++++++++++++- 3 files changed, 115 insertions(+), 15 deletions(-) diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp index ae0c07b7098..1795f7b2c3b 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp @@ -24,6 +24,8 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const { ParamsKey k; k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableInputDataType(Datatype::INT8); + k.EnableInputDataType(Datatype::UINT8); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); k.EnableOutputDataType(Datatype::INT8); @@ -78,9 +80,12 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::MakeLoadJitConstants(const eltwise_par "input" + std::to_string(input.index) + "[0]")); } else { + std::string block_read_str = "BLOCK_READN(INPUT" + std::to_string(input.index) + "_TYPE, " + + "BLOCK_SIZE, " + + "input" + std::to_string(input.index) + ", " + + "INPUT" + std::to_string(input.index) + "_GET_INDEX(b, f_block*16, y, x))"; jit.AddConstant(MakeJitConstant(name, - "READ_FUNC(input" + std::to_string(input.index) + - ", INPUT"+std::to_string(input.index)+"_GET_INDEX(b, f_block*16, y, x))")); + "TO_TYPE(MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, BLOCK_SIZE), " + block_read_str + ")")); } break; case EltwiseInputMode::OUTPUT_BUFFER: @@ -176,20 +181,33 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_ auto input0 = ewParams.inputs[0]; - for (size_t i = 1; i < ewParams.inputs.size(); i++) { - if (input0.GetDType() != ewParams.inputs[i].GetDType()) { - return false; - } - } - // Check that padding before features doesn't miss-align the blocks auto feature_block_size = 16; if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) { return false; } + auto compareTensors = [](const DataTensor& input0, const DataTensor& input1) -> bool { + // Check all parameters except DataType + auto& input0_dims = input0.GetDims(); + auto& input1_dims = input1.GetDims(); + bool same = input0.GetLayout() == input1.GetLayout() && + input0.GetPaddedVal() == input1.GetPaddedVal() && + input0.GetViewOffset() == input1.GetViewOffset() && + input0_dims.size() == input1_dims.size(); + if (same) { + for (size_t i = 0; i < input0_dims.size(); i++) { + same &= input0_dims[i].v == input1_dims[i].v && + input0_dims[i].pad.before == input1_dims[i].pad.before && + input0_dims[i].pad.after == input1_dims[i].pad.after && + input0_dims[i].pitch == input1_dims[i].pitch; + } + } + return same; + }; + for (size_t i = 1; i < ewParams.inputs.size(); i++) { - if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(ewParams.inputs[i] == input0)) + if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0))) return false; if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) { return false; diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv16.cl index 9be38f05ee9..b13ef8a31f1 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv16.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv16.cl @@ -21,7 +21,7 @@ #define FEATURE_SLICE_SIZE 16 #define OUTPUT_TYPE_BLOCK MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE) -#define TO_OUTPUT_TYPE_BLOCK(val) CAT(convert_, OUTPUT_TYPE_BLOCK)(val) +#define TO_TYPE(type, val) CAT(convert_, type)(val) #if BLOCK_SIZE != 1 #define READ_FUNC(ptr, offset) CAT(DT_INPUT_BLOCK_READ, BLOCK_SIZE)(ptr, offset) @@ -68,10 +68,10 @@ KERNEL(eltwise_b_fs_yx_fsv16)(INPUTS_DECLS #if HAS_FUSED_OPS FUSED_OPS; - OUTPUT_TYPE_BLOCK out = TO_OUTPUT_TYPE_BLOCK(FUSED_OPS_RESULT); + OUTPUT_TYPE_BLOCK out = TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE), FUSED_OPS_RESULT); #else #if BLOCK_SIZE != 1 - OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_OUTPUT_TYPE_BLOCK(res), ACTIVATION_PARAMS_TYPED); + OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE), res), ACTIVATION_PARAMS_TYPED); #else OUTPUT_TYPE out = ACTIVATION_TYPED(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS_TYPED); #endif diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp index 35992c76551..3a2e6c8cf19 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp @@ -3214,7 +3214,8 @@ class BaseEltwiseTest : public ::testing::TestWithParam { class eltwise_test : public BaseEltwiseTest { public: - VF eltwise_ref(VVVVVVF input0, VVVVVVF input1, tensor input0_size, tensor input1_size, eltwise_mode mode) { + template + VF eltwise_ref(VVVVVVF input0, VVVVVVF input1, tensor input0_size, tensor input1_size, eltwise_mode mode) { auto out_size = tensor::max(input0_size, input1_size); int output_b = out_size.batch[0]; @@ -3250,8 +3251,8 @@ public: int in1_y = y % input1_size.spatial[1]; int in1_x = x % input1_size.spatial[0]; - auto in0 = input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x]; - auto in1 = input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x]; + auto in0 = static_cast(input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x]); + auto in1 = static_cast(input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x]); output[b][f][w][z][y][x] = eltwise_execute(mode, in0, in1); } } @@ -3448,3 +3449,84 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_6d, ::testing::ValuesIn(types), ::testing::ValuesIn(inputs_6d) ), ); + +class eltwise_test_mixed_precision : public eltwise_test {}; +TEST_P(eltwise_test_mixed_precision, fsv16) { + auto p = GetParam(); + + ASSERT_EQ(std::get<2>(p).size(), 2); + + auto mode = std::get<0>(p); + auto input0_dt = data_types::f16; + auto input1_dt = std::get<1>(p); + auto input0_size = std::get<2>(p)[0]; + auto input1_size = std::get<2>(p)[1]; + + int b0 = input0_size[0]; + int f0 = input0_size[1]; + int z0 = input0_size.size() == 4 ? 1 : input0_size[2]; + int y0 = input0_size[input0_size.size() == 4 ? 2 : 3]; + int x0 = input0_size[input0_size.size() == 4 ? 3 : 4]; + + int b1 = input1_size[0]; + int f1 = input1_size[1]; + int z1 = input1_size.size() == 4 ? 1 : input1_size[2]; + int y1 = input1_size[input1_size.size() == 4 ? 2 : 3]; + int x1 = input1_size[input1_size.size() == 4 ? 3 : 4]; + + int min_random = input1_dt == data_types::u8 ? 0 : -2; + int max_random = input1_dt == data_types::u8 ? 4 : 2; + VVVVVVF input1_rnd = generate_random_6d(b0, f0, 1, z0, y0, x0, min_random, max_random); + VVVVVVF input2_rnd = generate_random_6d(b1, f1, 1, z1, y1, x1, min_random, max_random); + VF input1_rnd_vec = flatten_6d(format::bfwzyx, input1_rnd); + VF input2_rnd_vec = flatten_6d(format::bfwzyx, input2_rnd); + + const auto& engine = get_test_engine(); + auto fmt_pln = input0_size.size() == 4 ? format::bfyx : format::bfzyx; + auto fmt_fsv16 = input0_size.size() == 4 ? format::b_fs_yx_fsv16 : format::b_fs_zyx_fsv16; + + auto in0_size = tensor(fmt_pln, input0_size); + auto in1_size = tensor(fmt_pln, input1_size); + + auto input1 = memory::allocate(engine, { data_types::f32, fmt_pln, in0_size }); + auto input2 = memory::allocate(engine, { data_types::i32, fmt_pln, in1_size }); + set_values(input1, input1_rnd_vec); + set_values(input2, input2_rnd_vec); + + topology topology; + topology.add(input_layout("input1", input1.get_layout())); + topology.add(input_layout("input2", input2.get_layout())); + topology.add(reorder("reorder1", "input1", fmt_fsv16, input0_dt)); + topology.add(reorder("reorder2", "input2", fmt_fsv16, input1_dt)); + topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode)); + topology.add(reorder("out", "eltwise", fmt_pln, data_types::f32)); + primitive_id out_id = "out"; + + build_options bo; + bo.set_option(build_option::optimize_data(true)); + network network(engine, topology, bo); + + network.set_input_data("input1", input1); + network.set_input_data("input2", input2); + auto outputs = network.execute(); + EXPECT_EQ(outputs.size(), size_t(1)); + EXPECT_EQ(outputs.begin()->first, out_id); + + auto output_memory = outputs.at(out_id).get_memory(); + auto output_ptr = output_memory.pointer(); + + VF output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode); + for (size_t i = 0; i < output_cpu_vec.size(); ++i) { + EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i]))); + ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]); + } +} + +static std::vector mixed_types = {data_types::i8, data_types::u8}; + +INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision, + ::testing::Combine( + ::testing::ValuesIn(modes), + ::testing::ValuesIn(mixed_types), + ::testing::ValuesIn(inputs) + ), );