diff --git a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp index 2ed9809ab0c..14387366885 100644 --- a/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp +++ b/inference-engine/src/transformations/include/transformations/convert_opset3_to_opset2/convert_shuffle_channels3.hpp @@ -22,7 +22,7 @@ class TRANSFORMATIONS_API ConvertShuffleChannels3; class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam { public: - ConvertShuffleChannels3() : GraphRewrite() { + ConvertShuffleChannels3() : GraphRewrite(), PassParam() { convert_shuffle_channels3(); } diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp new file mode 100644 index 00000000000..7ca57f5102b --- /dev/null +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/shuffle_channels.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2020 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "single_layer_tests/shuffle_channels.hpp" + +using namespace LayerTestsDefinitions; + +const std::vector inputPrecision = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16, + InferenceEngine::Precision::U8, +}; + +const std::vector> inputShapes = { + {3, 4, 9, 5}, {2, 16, 24, 15}, {1, 32, 12, 25} +}; + +const std::vector> shuffleParameters = { + std::make_tuple(1, 2), std::make_tuple(-3, 2), + std::make_tuple(2, 3), std::make_tuple(-2, 3), + std::make_tuple(3, 5), std::make_tuple(-1, 5) +}; + +const auto testCases = ::testing::Combine(::testing::ValuesIn(shuffleParameters), + ::testing::ValuesIn(inputPrecision), + ::testing::ValuesIn(inputShapes), + ::testing::Values(CommonTestUtils::DEVICE_GPU)); + + +INSTANTIATE_TEST_CASE_P(smoke_GPU_ShuffleChannels, ShuffleChannelsLayerTest, testCases, ShuffleChannelsLayerTest::getTestCaseName); diff --git a/inference-engine/tests_deprecated/functional/cldnn/single_layer_tests/shuffle_channels_tests.cpp b/inference-engine/tests_deprecated/functional/cldnn/single_layer_tests/shuffle_channels_tests.cpp deleted file mode 100644 index a6b5dbb25ad..00000000000 --- a/inference-engine/tests_deprecated/functional/cldnn/single_layer_tests/shuffle_channels_tests.cpp +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright (C) 2018-2020 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include - -#include "tests_common.hpp" -#include "single_layer_common.hpp" - - -using namespace ::testing; -using namespace InferenceEngine; -using namespace std; - - -struct shuffle_channels_test_params { - std::string device_name; - std::string inPrecision; - SizeVector in_out_shape; - int axis; - int group; - std::vector reference; -}; - -void ref_shuffle_channels(TBlob &src, TBlob &dst, int axis, int group) { - size_t i; - const float *src_data = src.data(); - float* dst_data = dst.data(); - SizeVector dst_dims = dst.getTensorDesc().getDims(); - SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides(); - - if (axis < 0) - axis += dst_dims.size(); - - if (axis < 0 || axis >= dst_dims.size()) - FAIL() << "Incorrect input parameters dimensions and axis number!"; - - if (dst_dims[axis] % group) - FAIL() << "Group parameter must evenly divide the channel dimension!"; - - // Find number of dictionaries, index range and data length - size_t numDictionaries = 1; - for (i = 0; i <= axis; i++) - numDictionaries *= dst_dims[i]; - - size_t channelsNum = dst_dims[axis] / group; - - size_t dataLength = 1; - for (i = axis + 1; i < dst_dims.size(); i++) - dataLength *= dst_dims[i]; - - if (dataLength == 0) - FAIL() << "Incorrect input parameters dimension!"; - - size_t j, k; - for (j = 0, k = 0; j < numDictionaries; j += dst_dims[axis]) { - for (i = 0; i < (dst_dims[axis] * channelsNum); i += channelsNum, k += dataLength) { - int idx = j + i / dst_dims[axis] + i % dst_dims[axis]; - memcpy(&dst_data[k], &src_data[dataLength * idx], sizeof(float) * dataLength); - } - } -} - -class ShuffleChannelsTests : public TestsCommon, public WithParamInterface { - std::string model_t = R"V0G0N( - - - - - - _IN_OUT_ - - - - - - - - _IN_OUT_ - - - - - _IN_OUT_ - - - - - - - - -)V0G0N"; - - std::string getModel(shuffle_channels_test_params p) { - std::string model = model_t; - std::string in_out_shape; - - for (size_t i = 0; i < p.in_out_shape.size(); i++) { - in_out_shape += ""; - in_out_shape += std::to_string(p.in_out_shape[i]) + "\n"; - } - REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape); - REPLACE_WITH_NUM(model, "_AX_", p.axis); - REPLACE_WITH_NUM(model, "_GR_", p.group); - - return model; - } - -protected: - virtual void TearDown() { - } - - virtual void SetUp() { - try { - shuffle_channels_test_params p = ::testing::WithParamInterface::GetParam(); - std::string model = getModel(p); - - Core ie; - CNNNetwork net = ie.ReadNetwork(model, Blob::CPtr()); - - // Output Data - OutputsDataMap out = net.getOutputsInfo(); - auto item = *out.begin(); - - // Input Data - Blob::Ptr src = make_shared_blob({Precision::FP32, - p.in_out_shape, - TensorDesc::getLayoutByDims(p.in_out_shape)}); - src->allocate(); - fill_data_dbgval(src->buffer(), src->size()); - auto * srcPtr = dynamic_cast*>(src.get()); - if (srcPtr == nullptr) - FAIL() << "Cannot cast input blob to TBlob."; - - // Output Reference - TBlob dst_ref(item.second->getTensorDesc()); - dst_ref.allocate(); - ref_shuffle_channels(*srcPtr, dst_ref, p.axis, p.group); - - // Check results - if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0) - FAIL() << "Wrong result of TF reference comparison!"; - - // Infer - ExecutableNetwork executable_network = ie.LoadNetwork(net, p.device_name); - InferRequest inferRequest = executable_network.CreateInferRequest(); - inferRequest.SetBlob("input", src); - auto output = inferRequest.GetBlob(item.first); - - inferRequest.Infer(); - - // Check results - auto * dstPtr = dynamic_cast*>(output.get()); - if (dstPtr == nullptr) - FAIL() << "Cannot cast output blob to TBlob."; - compare(*dstPtr, dst_ref); - } catch (const details::InferenceEngineException &e) { - FAIL() << e.what(); - } - } -}; - -TEST_P(ShuffleChannelsTests, smoke_GPU_TestsShuffleChannels) {} - -static std::vector test0 = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f, - 4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f, - 8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f }; -static std::vector test4 = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, 12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f }; -static std::vector test5 = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, 12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f }; -static std::vector test6 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, 12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f }; -static std::vector test7 = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, 12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f }; -static std::vector test8 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f }; - -INSTANTIATE_TEST_CASE_P( - smoke_TestsShuffleChannels, ShuffleChannelsTests, - ::testing::Values( - shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, 1, 5, test0 }, - shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, -3, 5, test0 }, - shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, 0, 5, test0 }, - shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, -3, 5, test0 }, - shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 3, test4 }, - shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 3, test5 }, - shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 2, test6 }, - shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 2, test7 } - )); - diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp index 18fe88f55ae..a1284985d69 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp @@ -1,5 +1,5 @@ /* -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,20 +20,39 @@ #include namespace kernel_selector { + ParamsKey ShuffleChannelsKernelRef::GetSupportedKey() const { ParamsKey k; + k.EnableInputDataType(Datatype::UINT8); + k.EnableInputDataType(Datatype::INT8); k.EnableInputDataType(Datatype::F16); k.EnableInputDataType(Datatype::F32); + k.EnableOutputDataType(Datatype::UINT8); + k.EnableOutputDataType(Datatype::INT8); k.EnableOutputDataType(Datatype::F16); k.EnableOutputDataType(Datatype::F32); - k.EnableInputLayout(DataLayout::bfyx); - k.EnableOutputLayout(DataLayout::bfyx); + k.EnableAllInputLayout(); + k.EnableAllOutputLayout(); k.EnableTensorOffset(); k.EnableTensorPitches(); k.EnableBatching(); return k; } +bool ShuffleChannelsKernelRef::Validate(const Params& p, const optional_params& o) const { + if (p.GetType() != KernelType::SHUFFLE_CHANNELS || + o.GetType() != KernelType::SHUFFLE_CHANNELS) { + return false; + } + + const shuffle_channels_params& params = static_cast(p); + + if (params.inputs[0].Dimentions() > 4) + return false; + + return true; +} + CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params, const optional_params&) const { CommonDispatchData runInfo; @@ -81,6 +100,10 @@ JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_pa } KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { + if (!Validate(params, options)) { + return {}; + } + KernelData kd = KernelData::Default(params); shuffle_channels_params& newParams = *static_cast(kd.params.get()); diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h index 38f77e7a44f..758dc14dfee 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h @@ -42,9 +42,11 @@ class ShuffleChannelsKernelRef : public common_kernel_base { public: ShuffleChannelsKernelRef() : common_kernel_base("shuffle_channels_ref") {} virtual ~ShuffleChannelsKernelRef() {} - virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const; - virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; ParamsKey GetSupportedKey() const override; +protected: + bool Validate(const Params&, const optional_params&) const override; + virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const; + virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const; }; } // namespace kernel_selector diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl index 43f06e2b9eb..d210e944dfb 100644 --- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl +++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl @@ -1,4 +1,4 @@ -// Copyright (c) 2019 Intel Corporation +// Copyright (c) 2019-2020 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -15,29 +15,20 @@ #include "include/include_all.cl" -KERNEL(shuffle_channels_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output) +KERNEL(shuffle_channels_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output) { const uint batch = get_global_id(0); const uint feature = get_global_id(1); const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X; const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X; - const uint dimensions[] = { batch, feature, y, x }; + uint dimensions[] = { batch, feature, y, x }; const uint current_group = dimensions[AXIS] / GROUP_SIZE; const uint position_in_group = dimensions[AXIS] % GROUP_SIZE; - const uint input_index = INPUT0_OFFSET + (batch * INPUT0_BATCH_PITCH) + (feature * INPUT0_FEATURE_PITCH) + (y * INPUT0_Y_PITCH) + x; + const uint input_index = INPUT0_GET_INDEX(batch, feature, y, x); - uint output_index = OUTPUT_OFFSET; - - for (uint i = 0; i < AXIS; ++i) { - output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1]; - } - - output_index += (position_in_group * GROUPS_NUMBER + current_group) * INPUT0_PITCHES[INPUT0_DIMS - AXIS - 1]; - - for (uint i = AXIS + 1; i < INPUT0_DIMS; ++i) { - output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1]; - } + dimensions[AXIS] = (position_in_group * GROUPS_NUMBER + current_group); + uint output_index = OUTPUT_GET_INDEX(dimensions[0], dimensions[1], dimensions[2], dimensions[3]); output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS); } diff --git a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp index 5558106003d..3d8dd07c091 100644 --- a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp +++ b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp @@ -64,10 +64,28 @@ namespace detail { attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() { auto val_fw = shuffle_channels_gpu::create; - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), - val_fw); - implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), - val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw); + + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw); + + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw); + + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw); + + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw); + implementation_map::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw); } } // namespace detail diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp index 69c8d78d88e..42e387ed8c1 100644 --- a/inference-engine/thirdparty/clDNN/src/program.cpp +++ b/inference-engine/thirdparty/clDNN/src/program.cpp @@ -47,6 +47,7 @@ #include "deconvolution_inst.h" #include "detection_output_inst.h" #include "input_layout_inst.h" +#include "shuffle_channels_inst.h" #include "arg_max_min_inst.h" #include "lstm_inst.h" #include "lstm_elt_inst.h" @@ -1157,6 +1158,7 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) { prim.type() != cldnn::crop::type_id() && prim.type() != cldnn::scale::type_id() && prim.type() != cldnn::depth_to_space::type_id() && + prim.type() != cldnn::shuffle_channels::type_id() && (prim.type() != cldnn::mvn::type_id() || (prim.as().input().get_output_layout().data_type != data_types::u8 && prim.as().input().get_output_layout().data_type != data_types::i8)