[IE CLDNN] Eltwise b_fs_yx_fsv16 mixed presicion support (#3734)

2021-01-19 15:17:03 +03:00 · 2021-01-19 15:17:03 +03:00 · f7e0d90292
commit f7e0d90292
parent 559b509b15
3 changed files with 115 additions and 15 deletions
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv16.cpp
@ -24,6 +24,8 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
    ParamsKey k;
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
    k.EnableInputDataType(Datatype::INT8);
    k.EnableInputDataType(Datatype::UINT8);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::F32);
    k.EnableOutputDataType(Datatype::INT8);
@ -78,9 +80,12 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::MakeLoadJitConstants(const eltwise_par
                                                        "input" + std::to_string(input.index) +
                                                        "[0]"));
                    } else {
                        std::string block_read_str = "BLOCK_READN(INPUT" + std::to_string(input.index) + "_TYPE, " +
                                                                 "BLOCK_SIZE, " +
                                                                 "input" + std::to_string(input.index) + ", " +
                                                                 "INPUT" + std::to_string(input.index) + "_GET_INDEX(b, f_block*16, y, x))";
                        jit.AddConstant(MakeJitConstant(name,
-                                                        "READ_FUNC(input" + std::to_string(input.index) +
+                                                        "TO_TYPE(MAKE_VECTOR_TYPE(ACCUMULATOR_TYPE, BLOCK_SIZE), " + block_read_str + ")"));
                                                        ", INPUT"+std::to_string(input.index)+"_GET_INDEX(b, f_block*16, y, x))"));
                    }
                    break;
                case EltwiseInputMode::OUTPUT_BUFFER:
@ -176,20 +181,33 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
    auto input0 = ewParams.inputs[0];
    for (size_t i = 1; i < ewParams.inputs.size(); i++) {
        if (input0.GetDType() != ewParams.inputs[i].GetDType()) {
            return false;
        }
    }
    // Check that padding before features doesn't miss-align the blocks
    auto feature_block_size = 16;
    if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
        return false;
    }
    auto compareTensors = [](const DataTensor& input0, const DataTensor& input1) -> bool {
        // Check all parameters except DataType
        auto& input0_dims = input0.GetDims();
        auto& input1_dims = input1.GetDims();
        bool same = input0.GetLayout() == input1.GetLayout() &&
                    input0.GetPaddedVal() == input1.GetPaddedVal() &&
                    input0.GetViewOffset() == input1.GetViewOffset() &&
                    input0_dims.size() == input1_dims.size();
        if (same) {
            for (size_t i = 0; i < input0_dims.size(); i++) {
                same &= input0_dims[i].v == input1_dims[i].v &&
                        input0_dims[i].pad.before == input1_dims[i].pad.before &&
                        input0_dims[i].pad.after == input1_dims[i].pad.after &&
                        input0_dims[i].pitch == input1_dims[i].pitch;
            }
        }
        return same;
    };
    for (size_t i = 1; i < ewParams.inputs.size(); i++) {
-        if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(ewParams.inputs[i] == input0))
+        if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0)))
            return false;
        if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
            return false;
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv16.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv16.cl
@ -21,7 +21,7 @@
 #define FEATURE_SLICE_SIZE 16
 #define OUTPUT_TYPE_BLOCK               MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE)
-#define TO_OUTPUT_TYPE_BLOCK(val)       CAT(convert_, OUTPUT_TYPE_BLOCK)(val)
+#define TO_TYPE(type, val)              CAT(convert_, type)(val)
 #if BLOCK_SIZE != 1
    #define READ_FUNC(ptr, offset) CAT(DT_INPUT_BLOCK_READ, BLOCK_SIZE)(ptr, offset)
@ -68,10 +68,10 @@ KERNEL(eltwise_b_fs_yx_fsv16)(INPUTS_DECLS
 #if HAS_FUSED_OPS
    FUSED_OPS;
-    OUTPUT_TYPE_BLOCK out = TO_OUTPUT_TYPE_BLOCK(FUSED_OPS_RESULT);
+    OUTPUT_TYPE_BLOCK out = TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE), FUSED_OPS_RESULT);
 #else
 #if BLOCK_SIZE != 1
-    OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_OUTPUT_TYPE_BLOCK(res), ACTIVATION_PARAMS_TYPED);
+    OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, BLOCK_SIZE), res), ACTIVATION_PARAMS_TYPED);
 #else
    OUTPUT_TYPE out = ACTIVATION_TYPED(TO_OUTPUT_TYPE(res), ACTIVATION_PARAMS_TYPED);
 #endif
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
@ -3214,7 +3214,8 @@ class BaseEltwiseTest : public ::testing::TestWithParam<T> {
 class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
 public:
-    VF<float> eltwise_ref(VVVVVVF<float> input0, VVVVVVF<float> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
+    template<typename T1, typename T2>
    VF<float> eltwise_ref(VVVVVVF<T1> input0, VVVVVVF<T2> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
        auto out_size = tensor::max(input0_size, input1_size);
        int output_b = out_size.batch[0];
@ -3250,8 +3251,8 @@ public:
                                int in1_y = y % input1_size.spatial[1];
                                int in1_x = x % input1_size.spatial[0];
-                                auto in0 = input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x];
+                                auto in0 = static_cast<float>(input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x]);
-                                auto in1 = input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x];
+                                auto in1 = static_cast<float>(input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x]);
                                output[b][f][w][z][y][x] = eltwise_execute<float>(mode, in0, in1);
                            }
                        }
@ -3448,3 +3449,84 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_6d,
                                ::testing::ValuesIn(types),
                                ::testing::ValuesIn(inputs_6d)
                                ), );
 class eltwise_test_mixed_precision : public eltwise_test {};
 TEST_P(eltwise_test_mixed_precision, fsv16) {
    auto p = GetParam();
    ASSERT_EQ(std::get<2>(p).size(), 2);
    auto mode = std::get<0>(p);
    auto input0_dt = data_types::f16;
    auto input1_dt = std::get<1>(p);
    auto input0_size = std::get<2>(p)[0];
    auto input1_size = std::get<2>(p)[1];
    int b0 = input0_size[0];
    int f0 = input0_size[1];
    int z0 = input0_size.size() == 4 ? 1 : input0_size[2];
    int y0 = input0_size[input0_size.size() == 4 ? 2 : 3];
    int x0 = input0_size[input0_size.size() == 4 ? 3 : 4];
    int b1 = input1_size[0];
    int f1 = input1_size[1];
    int z1 = input1_size.size() == 4 ? 1 : input1_size[2];
    int y1 = input1_size[input1_size.size() == 4 ? 2 : 3];
    int x1 = input1_size[input1_size.size() == 4 ? 3 : 4];
    int min_random = input1_dt == data_types::u8 ? 0 : -2;
    int max_random = input1_dt == data_types::u8 ? 4 : 2;
    VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, z0, y0, x0, min_random, max_random);
    VVVVVVF<int> input2_rnd = generate_random_6d<int>(b1, f1, 1, z1, y1, x1, min_random, max_random);
    VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
    VF<int> input2_rnd_vec = flatten_6d<int>(format::bfwzyx, input2_rnd);
    const auto& engine = get_test_engine();
    auto fmt_pln = input0_size.size() == 4 ? format::bfyx : format::bfzyx;
    auto fmt_fsv16 = input0_size.size() == 4 ? format::b_fs_yx_fsv16 : format::b_fs_zyx_fsv16;
    auto in0_size = tensor(fmt_pln, input0_size);
    auto in1_size = tensor(fmt_pln, input1_size);
    auto input1 = memory::allocate(engine, { data_types::f32, fmt_pln, in0_size });
    auto input2 = memory::allocate(engine, { data_types::i32, fmt_pln, in1_size });
    set_values(input1, input1_rnd_vec);
    set_values(input2, input2_rnd_vec);
    topology topology;
    topology.add(input_layout("input1", input1.get_layout()));
    topology.add(input_layout("input2", input2.get_layout()));
    topology.add(reorder("reorder1", "input1", fmt_fsv16, input0_dt));
    topology.add(reorder("reorder2", "input2", fmt_fsv16, input1_dt));
    topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
    topology.add(reorder("out", "eltwise", fmt_pln, data_types::f32));
    primitive_id out_id = "out";
    build_options bo;
    bo.set_option(build_option::optimize_data(true));
    network network(engine, topology, bo);
    network.set_input_data("input1", input1);
    network.set_input_data("input2", input2);
    auto outputs = network.execute();
    EXPECT_EQ(outputs.size(), size_t(1));
    EXPECT_EQ(outputs.begin()->first, out_id);
    auto output_memory = outputs.at(out_id).get_memory();
    auto output_ptr = output_memory.pointer<float>();
    VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode);
    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
        EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
        ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
    }
 }
 static std::vector<data_types> mixed_types = {data_types::i8, data_types::u8};
 INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision,
                        ::testing::Combine(
                                ::testing::ValuesIn(modes),
                                ::testing::ValuesIn(mixed_types),
                                ::testing::ValuesIn(inputs)
                                ), );