[IE CLDNN] Added 6d tensor support in eltwise/scale primitives (#826)

2020-06-09 14:29:36 +03:00 · 2020-06-09 14:29:36 +03:00 · fe198dd544
commit fe198dd544
parent b0eb3e67ee
10 changed files with 330 additions and 40 deletions
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/eltwise.cpp
@ -20,8 +20,8 @@ std::vector<std::vector<std::vector<size_t>>> inShapes = {
        {{1, 2, 4}},
        {{1, 4, 4}},
        {{1, 4, 4, 1}},
-        {{1, 1, 1, 1, 1, 1, 3}},
+        {{1, 4, 3, 2, 1, 3}},
-        {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}
+        {{1, 3, 1, 1, 1, 3}, {1, 3, 1, 1, 1, 1}},
 };
 std::vector<InferenceEngine::Precision> netPrecisions = {
@ -53,7 +53,7 @@ const auto multiply_params = ::testing::Combine(
        ::testing::ValuesIn(secondaryInputTypes),
        ::testing::ValuesIn(opTypes),
        ::testing::ValuesIn(netPrecisions),
-        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+        ::testing::Values(CommonTestUtils::DEVICE_GPU),
        ::testing::Values(additional_config));
 INSTANTIATE_TEST_CASE_P(CompareWithRefs, EltwiseLayerTest, multiply_params, EltwiseLayerTest::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/skip_tests_config.cpp
@ -9,6 +9,8 @@
 std::vector<std::string> disabledTestPatterns() {
    return {
-            R"(.*(EltwiseLayerTest).*IS=\(.*\..*\..*\..*\..*\).*secondaryInputType=PARAMETER.*opType=SCALAR.*)",
+            // cldnn treats 1d constant as [1, f, 1, 1] tensor instead of [b, 1, 1, 1] which leads to fails of these tests
            R"(.*(EltwiseLayerTest).*IS=\(.*\..*\..*\..*\..*\).*secondaryInputType=CONSTANT.*opType=SCALAR.*)",
            R"(.*(EltwiseLayerTest).*IS=\(.*\).*secondaryInputType=CONSTANT.*)",
    };
 }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@ -1,4 +1,4 @@
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -397,6 +397,8 @@ JitConstants EltwiseKernelBase::MakeIndexJitConstants(const eltwise_params& para
                                                                                          {1, 1, 1})));
            } else if (out_c == 5) {
                jit.AddConstant(MakeJitConstant(out_idx_order, "d5,d4,d3,d2,d1"));
            } else if (out_c == 6) {
                jit.AddConstant(MakeJitConstant(out_idx_order, "d6,d5,d4,d3,d2,d1"));
            } else {
                assert(0);
            }
@ -445,6 +447,14 @@ JitConstants EltwiseKernelBase::MakeIndexJitConstants(const eltwise_params& para
                    // quite strange case, but can happen due to reorders fusing
                    // it means that z coord is equal to 1, so z offset will be always equal to 0
                    jit.AddConstant(MakeJitConstant(idx_order, "d4,d3,0,d2,d1"));
                } else if (out_c == 6) {
                    if (in_c < 5)
                        jit.AddConstant(MakeJitConstant(idx_order, "d6,d5,d2,d1"));
                    else if (in_c == 5) {
                        jit.AddConstant(MakeJitConstant(idx_order, "d6,d5,d3,d2,d1"));
                    } else {
                        jit.AddConstant(MakeJitConstant(idx_order, "d6,d5,d4,d3,d2,d1"));
                    }
                } else {
                    assert(0);
                }
@ -526,19 +536,16 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
            gws.push_back(o.v);
        }
-        size_t n_dims;
+        size_t n_dims = DataTensor::ChannelsCount(out.GetLayout());
        if ((out.GetLayout() == DataLayout::bfzyx)  || (out.GetLayout() == DataLayout::b_fs_zyx_fsv16) ||
            (out.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16))
            n_dims = 5;
        else
            n_dims = 4;
        for (size_t i = gws.size(); i < n_dims; i++) {
            gws.push_back(1U);
        }
        kd.gws0 = gws[0];
-        if (n_dims == 5) {
+        if (n_dims == 6) {
            kd.gws1 = gws[1] * gws[2] * gws[3];  // y*z*w
            kd.gws2 = gws[4] * gws[5];
        } else if (n_dims == 5) {
            kd.gws1 = gws[1] * gws[2];  // y*z
            kd.gws2 = gws[3] * gws[4];
        } else {
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
@ -52,6 +52,7 @@ JitConstants PermuteKernelRef::GetJitConstants(const permute_params& params) con
        default: in_idx = {"b", "f", "x", "y" }; break;
    }
    assert(params.order.size() == in_idx.size());
    for (auto& o : params.order) {
        out_idx.push_back(in_idx[o]);
    }
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -32,7 +32,39 @@ KERNEL(eltwise)(
 #endif
    )
 {
-#if OUTPUT_DIMS == 5 // 3D spatial
+
 #if OUTPUT_DIMS == 6 // 4D spatial
    #if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
        uint data_idx = get_global_id(GWS_YX);
        const uint d1 = data_idx % OUTPUT_SIZE_X; // X
        data_idx = data_idx / OUTPUT_SIZE_X;
        const uint d2 = data_idx % OUTPUT_SIZE_Y; // Y
        data_idx = data_idx / OUTPUT_SIZE_Y;
        const uint d3 = data_idx % OUTPUT_SIZE_Z; // Z
        data_idx = data_idx / OUTPUT_SIZE_Z;
        const uint d4 = data_idx % OUTPUT_SIZE_W; // W
        const uint d5 = get_global_id(GWS_FEATURE);             // Feature
        const uint d6 = get_global_id(GWS_BATCH);               // Batch
        uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
    #elif ELTWISE_NO_PITCH_SAME_DIMS
        const uint d1 = get_global_id(0);
        uint output_offset = OUTPUT_OFFSET + d1;
    #else
        const uint d1 = get_global_id(0);
        const uint d2 = (uint)get_global_id(1) % OUTPUT_SIZES[1];
        const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZE[2];
        const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZE[2];
        const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[3];
        const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[3];
        uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
    #endif
 #elif OUTPUT_DIMS == 5 // 3D spatial
    #if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
        uint data_idx = get_global_id(GWS_YX);
        const uint d1 = data_idx % OUTPUT_SIZE_X; // X
--- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -255,6 +255,13 @@ attach_eltwise_gpu::attach_eltwise_gpu() {
         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
         // 4D
         { std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
         { std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
--- a/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/scale_gpu.cpp
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -98,6 +98,11 @@ attach_scale_gpu::attach_scale_gpu() {
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
    implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -3214,39 +3214,53 @@ class BaseEltwiseTest : public ::testing::TestWithParam<T> {
 class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
 public:
-    VF<float> eltwise_ref(VVVVF<float> input0, VVVVF<float> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
+    VF<float> eltwise_ref(VVVVVVF<float> input0, VVVVVVF<float> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
        auto out_size = tensor::max(input0_size, input1_size);
        int output_b = out_size.batch[0];
        int output_f = out_size.feature[0];
        int output_w = out_size.spatial[3];
        int output_z = out_size.spatial[2];
        int output_y = out_size.spatial[1];
        int output_x = out_size.spatial[0];
-        VVVVF<float> output(output_b, VVVF<float>(output_f, VVF<float>(output_y, VF<float>(output_x))));
+        VVVVVVF<float> output(output_b, VVVVVF<float>(output_f,
                                         VVVVF<float>(output_w,
                                          VVVF<float>(output_z,
                                           VVF<float>(output_y,
                                            VF<float>(output_x))))));
        for (int b = 0; b < output_b; ++b) {
            for (int f = 0; f < output_f; ++f) {
-                for (int y = 0; y <output_y; ++y) {
+                for (int w = 0; w <output_w; ++w) {
-                    for (int x = 0; x < output_x; ++x) {
+                    for (int z = 0; z <output_z; ++z) {
-                        int in0_b = b % input0_size.batch[0];
+                        for (int y = 0; y <output_y; ++y) {
-                        int in0_f = f % input0_size.feature[0];
+                            for (int x = 0; x < output_x; ++x) {
-                        int in0_y = y % input0_size.spatial[1];
+                                int in0_b = b % input0_size.batch[0];
-                        int in0_x = x % input0_size.spatial[0];
+                                int in0_f = f % input0_size.feature[0];
                                int in0_w = w % input0_size.spatial[3];
                                int in0_z = z % input0_size.spatial[2];
                                int in0_y = y % input0_size.spatial[1];
                                int in0_x = x % input0_size.spatial[0];
-                        int in1_b = b % input1_size.batch[0];
+                                int in1_b = b % input1_size.batch[0];
-                        int in1_f = f % input1_size.feature[0];
+                                int in1_f = f % input1_size.feature[0];
-                        int in1_y = y % input1_size.spatial[1];
+                                int in1_w = w % input1_size.spatial[3];
-                        int in1_x = x % input1_size.spatial[0];
+                                int in1_z = z % input1_size.spatial[2];
                                int in1_y = y % input1_size.spatial[1];
                                int in1_x = x % input1_size.spatial[0];
-                        auto in0 = input0[in0_b][in0_f][in0_y][in0_x];
+                                auto in0 = input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x];
-                        auto in1 = input1[in1_b][in1_f][in1_y][in1_x];
+                                auto in1 = input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x];
-                        output[b][f][y][x] = eltwise_execute<float>(mode, in0, in1);
+                                output[b][f][w][z][y][x] = eltwise_execute<float>(mode, in0, in1);
                            }
                        }
                    }
                }
            }
        }
-        return flatten_4d<float>(format::bfyx, output);;
+        return flatten_6d<float>(format::bfwzyx, output);
    }
 };
@ -3271,10 +3285,10 @@ TEST_P(eltwise_test, b_fs_yx_fsv16) {
    int x1 = input1_size.spatial[0];
    int min_random = -2, max_random = 2;
-    VVVVF<float> input1_rnd = generate_random_4d<float>(b0, f0, y0, x0, min_random, max_random);
+    VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, 1, y0, x0, min_random, max_random);
-    VVVVF<float> input2_rnd = generate_random_4d<float>(b1, f1, y1, x1, min_random, max_random);
+    VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, 1, 1, y1, x1, min_random, max_random);
-    VF<float> input1_rnd_vec = flatten_4d<float>(format::bfyx, input1_rnd);
+    VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
-    VF<float> input2_rnd_vec = flatten_4d<float>(format::bfyx, input2_rnd);
+    VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);
    const auto& engine = get_test_engine();
    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, input0_size });
@ -3339,3 +3353,82 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test,
                                ::testing::ValuesIn(types),
                                ::testing::ValuesIn(inputs)
                                ), );
 class eltwise_test_6d : public eltwise_test {};
 TEST_P(eltwise_test_6d, bfwzyx) {
    auto p = GetParam();
    ASSERT_EQ(std::get<2>(p).size(), 2);
    auto mode = std::get<0>(p);
    auto dt = std::get<1>(p);
    auto input0_size = std::get<2>(p)[0];
    auto input1_size = std::get<2>(p)[1];
    int b0 = input0_size.batch[0];
    int f0 = input0_size.feature[0];
    int w0 = input0_size.spatial[3];
    int z0 = input0_size.spatial[2];
    int y0 = input0_size.spatial[1];
    int x0 = input0_size.spatial[0];
    int b1 = input1_size.batch[0];
    int f1 = input1_size.feature[0];
    int w1 = input1_size.spatial[3];
    int z1 = input1_size.spatial[2];
    int y1 = input1_size.spatial[1];
    int x1 = input1_size.spatial[0];
    int min_random = -2, max_random = 2;
    VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, w0, z0, y0, x0, min_random, max_random);
    VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, w1, z1, y1, x1, min_random, max_random);
    VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
    VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);
    const auto& engine = get_test_engine();
    auto input1 = memory::allocate(engine, { data_types::f32, format::bfwzyx, input0_size });
    auto input2 = memory::allocate(engine, { data_types::f32, format::bfwzyx, input1_size });
    set_values(input1, input1_rnd_vec);
    set_values(input2, input2_rnd_vec);
    topology topology;
    topology.add(input_layout("input1", input1.get_layout()));
    topology.add(input_layout("input2", input2.get_layout()));
    topology.add(reorder("reorder1", "input1", format::bfwzyx, dt));
    topology.add(reorder("reorder2", "input2", format::bfwzyx, dt));
    topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
    topology.add(reorder("out", "eltwise", format::bfwzyx, data_types::f32));
    primitive_id out_id = "out";
    build_options bo;
    bo.set_option(build_option::optimize_data(true));
    network network(engine, topology, bo);
    network.set_input_data("input1", input1);
    network.set_input_data("input2", input2);
    auto outputs = network.execute();
    EXPECT_EQ(outputs.size(), size_t(1));
    EXPECT_EQ(outputs.begin()->first, out_id);
    auto output_memory = outputs.at(out_id).get_memory();
    auto output_ptr = output_memory.pointer<float>();
    VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, input0_size, input1_size, mode);
    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
        EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
        ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
    }
 }
 static std::vector<std::vector<tensor>> inputs_6d = {
        {tensor(format::bfwzyx, {1, 2, 3, 4, 5, 6}),  tensor(format::bfwzyx, {1, 2, 3, 4, 5, 6})},
        {tensor(format::bfwzyx, {1, 32, 1, 1, 1, 1}), tensor(format::bfwzyx, {8, 32, 4, 5, 6, 7})},
        {tensor(format::bfwzyx, {1, 32, 1, 1, 1, 7}), tensor(format::bfwzyx, {8, 32, 4, 5, 6, 7})},
 };
 INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_6d,
                        ::testing::Combine(
                                ::testing::ValuesIn(modes),
                                ::testing::ValuesIn(types),
                                ::testing::ValuesIn(inputs_6d)
                                ), );
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017-2019 Intel Corporation
+// Copyright (c) 2017-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -1388,6 +1388,122 @@ TEST(scale_gpu, basic_in2x2x2x2x3_scale_xyz) {
    }
 }
 TEST(scale_gpu, basic_in2x2x2x2x2x3_scale_4d) {
    //  Scale  : 1x2x1x1
    //  Input  : 2x2x2x2x2x3
    //  Output : 2x2x2x2x2x3
    const auto& engine = get_test_engine();
    auto batch_num = 2;
    auto feature_num = 2;
    auto w_size = 2;
    auto z_size = 2;
    auto y_size = 2;
    auto x_size = 3;
    tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, x_size, y_size, z_size, w_size });
    tensor scale_size = tensor(format::bfyx, { 1, feature_num, 1, 1 });
    auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx,  in_size});
    auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, scale_size });
    topology topology;
    topology.add(input_layout("input", input.get_layout()));
    topology.add(input_layout("scale_input", scale_input.get_layout()));
    topology.add(scale("scale", "input", "scale_input"));
    std::vector<float> input_vec = generate_random_1d<float>(in_size.count(), -10, 10);
    set_values(input, input_vec);
    std::vector<float> scale_input_vec = generate_random_1d<float>(scale_input.count(), -10, 10);
    set_values(scale_input, scale_input_vec);
    network network(engine, topology);
    network.set_input_data("input", input);
    network.set_input_data("scale_input", scale_input);
    auto outputs = network.execute();
    auto output = outputs.at("scale").get_memory();
    auto output_ptr = output.pointer<float>();
    for (int b = 0; b < batch_num; ++b) {
        for (int f = 0; f < feature_num; ++f) {
            for (int w = 0; w < w_size; ++w) {
                for (int z = 0; z < z_size; ++z) {
                    for (int y = 0; y < y_size; ++y) {
                        for (int x = 0; x < x_size; ++x) {
                            int linear_id = x + x_size * (y + y_size * (z + z_size * (w + w_size * (f + feature_num * b))));
                            int linear_id_scale = f;
                            EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05f);
                        }
                    }
                }
            }
        }
    }
 }
 TEST(scale_gpu, basic_in2x2x2x2x2x3_scale_6d) {
    //  Scale  : 1x2x1x1x1x1
    //  Input  : 2x2x2x2x2x3
    //  Output : 2x2x2x2x2x3
    const auto& engine = get_test_engine();
    auto batch_num = 2;
    auto feature_num = 2;
    auto w_size = 2;
    auto z_size = 2;
    auto y_size = 2;
    auto x_size = 3;
    tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, x_size, y_size, z_size, w_size });
    tensor scale_size = tensor(format::bfwzyx, { 1, feature_num, 1, 1, 1, 1 });
    auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx,  in_size});
    auto scale_input = memory::allocate(engine, { data_types::f32, format::bfwzyx, scale_size });
    topology topology;
    topology.add(input_layout("input", input.get_layout()));
    topology.add(input_layout("scale_input", scale_input.get_layout()));
    topology.add(scale("scale", "input", "scale_input"));
    std::vector<float> input_vec = generate_random_1d<float>(in_size.count(), -10, 10);
    set_values(input, input_vec);
    std::vector<float> scale_input_vec = generate_random_1d<float>(scale_input.count(), -10, 10);
    set_values(scale_input, scale_input_vec);
    network network(engine, topology);
    network.set_input_data("input", input);
    network.set_input_data("scale_input", scale_input);
    auto outputs = network.execute();
    auto output = outputs.at("scale").get_memory();
    auto output_ptr = output.pointer<float>();
    for (int b = 0; b < batch_num; ++b) {
        for (int f = 0; f < feature_num; ++f) {
            for (int w = 0; w < w_size; ++w) {
                for (int z = 0; z < z_size; ++z) {
                    for (int y = 0; y < y_size; ++y) {
                        for (int x = 0; x < x_size; ++x) {
                            int linear_id = x + x_size * (y + y_size * (z + z_size * (w + w_size * (f + feature_num * b))));
                            int linear_id_scale = f;
                            EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05f);
                        }
                    }
                }
            }
        }
    }
 }
 //////////////////////////////////////////////////////////////////////////////
 //                                                                          //
 //                      Exhaustive Negative Matrix tests                    //
--- a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
+++ b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2019 Intel Corporation
+// Copyright (c) 2016-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -127,6 +127,33 @@ inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
    return vec;
 }
 template<typename T>
 inline VF<T> flatten_6d(cldnn::format input_format, VVVVVVF<T> &data) {
    size_t a = data.size();
    size_t b = data[0].size();
    size_t c = data[0][0].size();
    size_t d = data[0][0][0].size();
    size_t e = data[0][0][0][0].size();
    size_t f = data[0][0][0][0][0].size();
    VF<T> vec(a * b * c * d * e * f, (T)(0.0f));
    size_t idx = 0;
    switch (input_format.value) {
        case cldnn::format::bfwzyx:
            for (size_t bi = 0; bi < a; ++bi)
                for (size_t fi = 0; fi < b; ++fi)
                    for (size_t wi = 0; wi < c; ++wi)
                        for (size_t zi = 0; zi < d; ++zi)
                            for (size_t yi = 0; yi < e; ++yi)
                                for (size_t xi = 0; xi < f; ++xi)
                                    vec[idx++] = data[bi][fi][wi][zi][yi][xi];
            break;
        default:
            assert(0);
    }
    return vec;
 }
 template<typename T>
 std::vector<T> generate_random_1d(size_t a, int min, int max, int k = 8) {
    static std::default_random_engine generator(random_seed);