[IE CLDNN] Enable ShuffleChannels op (#787)

2020-06-07 22:57:20 +03:00 · 2020-06-07 22:57:20 +03:00 · f0498ad011
commit f0498ad011
parent 63ee9f8916
8 changed files with 94 additions and 214 deletions
--- a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp
+++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp
@ -22,7 +22,7 @@ class TRANSFORMATIONS_API ConvertShuffleChannels3;

 class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
 public:
-    ConvertShuffleChannels3() : GraphRewrite() {
+    ConvertShuffleChannels3() : GraphRewrite(), PassParam() {
        convert_shuffle_channels3();
    }

--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp
@ -0,0 +1,33 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+
+#include "single_layer_tests/shuffle_channels.hpp"
+
+using namespace LayerTestsDefinitions;
+
+const std::vector<InferenceEngine::Precision> inputPrecision = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16,
+    InferenceEngine::Precision::U8,
+};
+
+const std::vector<std::vector<size_t>> inputShapes = {
+    {3, 4, 9, 5}, {2, 16, 24, 15}, {1, 32, 12, 25}
+};
+
+const std::vector<std::tuple<int, int>> shuffleParameters = {
+    std::make_tuple(1, 2), std::make_tuple(-3, 2),
+    std::make_tuple(2, 3), std::make_tuple(-2, 3),
+    std::make_tuple(3, 5), std::make_tuple(-1, 5)
+};
+
+const auto testCases = ::testing::Combine(::testing::ValuesIn(shuffleParameters),
+                                          ::testing::ValuesIn(inputPrecision),
+                                          ::testing::ValuesIn(inputShapes),
+                                          ::testing::Values(CommonTestUtils::DEVICE_GPU));
+
+
+INSTANTIATE_TEST_CASE_P(smoke_GPU_ShuffleChannels, ShuffleChannelsLayerTest, testCases, ShuffleChannelsLayerTest::getTestCaseName);
--- a/inference-engine/tests_deprecated/functional/cldnn/single_layer_tests/shuffle_channels_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/cldnn/single_layer_tests/shuffle_channels_tests.cpp
@ -1,189 +0,0 @@
-// Copyright (C) 2018-2020 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <ie_core.hpp>
-#include <cmath>
-
-#include "tests_common.hpp"
-#include "single_layer_common.hpp"
-
-
-using namespace ::testing;
-using namespace InferenceEngine;
-using namespace std;
-
-
-struct shuffle_channels_test_params {
-    std::string device_name;
-    std::string inPrecision;
-    SizeVector in_out_shape;
-    int axis;
-    int group;
-    std::vector<float> reference;
-};
-
-void ref_shuffle_channels(TBlob<float> &src, TBlob<float> &dst, int axis, int group) {
-    size_t i;
-    const float *src_data = src.data();
-    float* dst_data = dst.data();
-    SizeVector dst_dims = dst.getTensorDesc().getDims();
-    SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
-
-    if (axis < 0)
-        axis += dst_dims.size();
-
-    if (axis < 0 || axis >= dst_dims.size())
-        FAIL() << "Incorrect input parameters dimensions and axis number!";
-
-    if (dst_dims[axis] % group)
-        FAIL() << "Group parameter must evenly divide the channel dimension!";
-
-    //  Find number of dictionaries, index range and data length
-    size_t numDictionaries = 1;
-    for (i = 0; i <= axis; i++)
-        numDictionaries *= dst_dims[i];
-
-    size_t channelsNum = dst_dims[axis] / group;
-
-    size_t dataLength = 1;
-    for (i = axis + 1; i < dst_dims.size(); i++)
-        dataLength *= dst_dims[i];
-
-    if (dataLength == 0)
-        FAIL() << "Incorrect input parameters dimension!";
-
-    size_t j, k;
-    for (j = 0, k = 0; j < numDictionaries; j += dst_dims[axis]) {
-        for (i = 0; i < (dst_dims[axis] * channelsNum); i += channelsNum, k += dataLength) {
-            int idx = j + i / dst_dims[axis] + i % dst_dims[axis];
-            memcpy(&dst_data[k], &src_data[dataLength * idx], sizeof(float) * dataLength);
-        }
-    }
-}
-
-class ShuffleChannelsTests : public TestsCommon, public WithParamInterface<shuffle_channels_test_params> {
-    std::string model_t = R"V0G0N(
-<net Name="ShuffleChannels_net" version="2" precision="FP32" batch="1">
-    <layers>
-        <layer name="input" type="Input" precision="FP32" id="1">
-            <output>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-        <layer name="output" id="2" type="ShuffleChannels" precision="FP32">
-            <data axis="_AX_" group="_GR_"/>
-            <input>
-                <port id="1">
-                    _IN_OUT_
-                </port>
-           </input>
-            <output>
-                <port id="2">
-                    _IN_OUT_
-                </port>
-            </output>
-        </layer>
-    </layers>
-    <edges>
-        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
-    </edges>
-</net>
-)V0G0N";
-
-    std::string getModel(shuffle_channels_test_params p) {
-        std::string model = model_t;
-        std::string in_out_shape;
-
-        for (size_t i = 0; i < p.in_out_shape.size(); i++) {
-            in_out_shape += "<dim>";
-            in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
-        }
-        REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
-        REPLACE_WITH_NUM(model, "_AX_", p.axis);
-        REPLACE_WITH_NUM(model, "_GR_", p.group);
-
-        return model;
-    }
-
-protected:
-    virtual void TearDown() {
-    }
-
-    virtual void SetUp() {
-        try {
-            shuffle_channels_test_params p = ::testing::WithParamInterface<shuffle_channels_test_params>::GetParam();
-            std::string model = getModel(p);
-
-            Core ie;
-            CNNNetwork net = ie.ReadNetwork(model, Blob::CPtr());
-
-            // Output Data
-            OutputsDataMap out = net.getOutputsInfo();
-            auto item = *out.begin();
-
-            // Input Data
-            Blob::Ptr src = make_shared_blob<float>({Precision::FP32,
-                p.in_out_shape,
-                TensorDesc::getLayoutByDims(p.in_out_shape)});
-            src->allocate();
-            fill_data_dbgval(src->buffer(), src->size());
-            auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
-            if (srcPtr == nullptr)
-                FAIL() << "Cannot cast input blob to TBlob<float>.";
-
-            // Output Reference
-            TBlob<float> dst_ref(item.second->getTensorDesc());
-            dst_ref.allocate();
-            ref_shuffle_channels(*srcPtr, dst_ref, p.axis, p.group);
-
-            // Check results
-            if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
-                FAIL() << "Wrong result of TF reference comparison!";
-
-            // Infer
-            ExecutableNetwork executable_network = ie.LoadNetwork(net, p.device_name);
-            InferRequest inferRequest = executable_network.CreateInferRequest();
-            inferRequest.SetBlob("input", src);
-            auto output = inferRequest.GetBlob(item.first);
-
-            inferRequest.Infer();
-
-            // Check results
-            auto * dstPtr = dynamic_cast<TBlob<float>*>(output.get());
-            if (dstPtr == nullptr)
-                FAIL() << "Cannot cast output blob to TBlob<float>.";
-            compare(*dstPtr, dst_ref);
-        } catch (const details::InferenceEngineException &e) {
-            FAIL() << e.what();
-        }
-    }
-};
-
-TEST_P(ShuffleChannelsTests, smoke_GPU_TestsShuffleChannels) {}
-
-static std::vector<float> test0 = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
-                                    4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
-                                    8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f };
-static std::vector<float> test4 = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, 12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f };
-static std::vector<float> test5 = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, 12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f };
-static std::vector<float> test6 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, 12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f };
-static std::vector<float> test7 = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, 12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f };
-static std::vector<float> test8 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f };
-
-INSTANTIATE_TEST_CASE_P(
-        smoke_TestsShuffleChannels, ShuffleChannelsTests,
-        ::testing::Values(
-                shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, 1, 5, test0 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, -3, 5, test0 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, 0, 5, test0 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, -3, 5, test0 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 3, test4 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 3, test5 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 2, test6 },
-                shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 2, test7 }
-        ));
-
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp
@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -20,20 +20,39 @@
 #include <vector>

 namespace kernel_selector {
+
 ParamsKey ShuffleChannelsKernelRef::GetSupportedKey() const {
    ParamsKey k;
+    k.EnableInputDataType(Datatype::UINT8);
+    k.EnableInputDataType(Datatype::INT8);
    k.EnableInputDataType(Datatype::F16);
    k.EnableInputDataType(Datatype::F32);
+    k.EnableOutputDataType(Datatype::UINT8);
+    k.EnableOutputDataType(Datatype::INT8);
    k.EnableOutputDataType(Datatype::F16);
    k.EnableOutputDataType(Datatype::F32);
-    k.EnableInputLayout(DataLayout::bfyx);
-    k.EnableOutputLayout(DataLayout::bfyx);
+    k.EnableAllInputLayout();
+    k.EnableAllOutputLayout();
    k.EnableTensorOffset();
    k.EnableTensorPitches();
    k.EnableBatching();
    return k;
 }

+bool ShuffleChannelsKernelRef::Validate(const Params& p, const optional_params& o) const {
+    if (p.GetType() != KernelType::SHUFFLE_CHANNELS ||
+        o.GetType() != KernelType::SHUFFLE_CHANNELS) {
+        return false;
+    }
+
+    const shuffle_channels_params& params = static_cast<const shuffle_channels_params&>(p);
+
+    if (params.inputs[0].Dimentions() > 4)
+        return false;
+
+    return true;
+}
+
 CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params,
                                                        const optional_params&) const {
    CommonDispatchData runInfo;
@ -81,6 +100,10 @@ JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_pa
 }

 KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
+    if (!Validate(params, options)) {
+        return {};
+    }
+
    KernelData kd = KernelData::Default<shuffle_channels_params>(params);
    shuffle_channels_params& newParams = *static_cast<shuffle_channels_params*>(kd.params.get());

--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h
@ -42,9 +42,11 @@ class ShuffleChannelsKernelRef : public common_kernel_base {
 public:
    ShuffleChannelsKernelRef() : common_kernel_base("shuffle_channels_ref") {}
    virtual ~ShuffleChannelsKernelRef() {}
-    virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
-    virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
    KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
    ParamsKey GetSupportedKey() const override;
+protected:
+    bool Validate(const Params&, const optional_params&) const override;
+    virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
+    virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
 };
 }  // namespace kernel_selector
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl
@ -1,4 +1,4 @@
-// Copyright (c) 2019 Intel Corporation
+// Copyright (c) 2019-2020 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -15,29 +15,20 @@

 #include "include/include_all.cl"

-KERNEL(shuffle_channels_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+KERNEL(shuffle_channels_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
 {
    const uint batch = get_global_id(0);
    const uint feature = get_global_id(1);
    const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
    const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
-    const uint dimensions[] = { batch, feature, y, x };
+    uint dimensions[] = { batch, feature, y, x };

    const uint current_group = dimensions[AXIS] / GROUP_SIZE;
    const uint position_in_group = dimensions[AXIS] % GROUP_SIZE;
-    const uint input_index = INPUT0_OFFSET + (batch * INPUT0_BATCH_PITCH) + (feature * INPUT0_FEATURE_PITCH) + (y * INPUT0_Y_PITCH) + x;
+    const uint input_index = INPUT0_GET_INDEX(batch, feature, y, x);

-    uint output_index = OUTPUT_OFFSET;
-
-    for (uint i = 0; i < AXIS; ++i) {
-        output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
-    }
-
-    output_index += (position_in_group * GROUPS_NUMBER + current_group) * INPUT0_PITCHES[INPUT0_DIMS - AXIS - 1];
-
-    for (uint i = AXIS + 1; i < INPUT0_DIMS; ++i) {
-        output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
-    }
+    dimensions[AXIS] = (position_in_group * GROUPS_NUMBER + current_group);
+    uint output_index = OUTPUT_GET_INDEX(dimensions[0], dimensions[1], dimensions[2], dimensions[3]);

    output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
 }
--- a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
@ -64,10 +64,28 @@ namespace detail {

 attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
    auto val_fw = shuffle_channels_gpu::create;
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
-                                              val_fw);
-    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
-                                              val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
+
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
+
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
+
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
+    implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
 }

 }  // namespace detail
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@ -47,6 +47,7 @@
 #include "deconvolution_inst.h"
 #include "detection_output_inst.h"
 #include "input_layout_inst.h"
+#include "shuffle_channels_inst.h"
 #include "arg_max_min_inst.h"
 #include "lstm_inst.h"
 #include "lstm_elt_inst.h"
@ -1157,6 +1158,7 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
            prim.type() != cldnn::crop::type_id() &&
            prim.type() != cldnn::scale::type_id() &&
            prim.type() != cldnn::depth_to_space::type_id() &&
+            prim.type() != cldnn::shuffle_channels::type_id() &&
            (prim.type() != cldnn::mvn::type_id()
             || (prim.as<mvn>().input().get_output_layout().data_type != data_types::u8 &&
                 prim.as<mvn>().input().get_output_layout().data_type != data_types::i8)