[IE CLDNN] Enable ShuffleChannels op (#787)

This commit is contained in:
Vladimir Paramuzov 2020-06-07 22:57:20 +03:00 committed by GitHub
parent 63ee9f8916
commit f0498ad011
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 94 additions and 214 deletions

View File

@ -22,7 +22,7 @@ class TRANSFORMATIONS_API ConvertShuffleChannels3;
class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
public:
ConvertShuffleChannels3() : GraphRewrite() {
ConvertShuffleChannels3() : GraphRewrite(), PassParam() {
convert_shuffle_channels3();
}

View File

@ -0,0 +1,33 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "single_layer_tests/shuffle_channels.hpp"
using namespace LayerTestsDefinitions;
const std::vector<InferenceEngine::Precision> inputPrecision = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16,
InferenceEngine::Precision::U8,
};
const std::vector<std::vector<size_t>> inputShapes = {
{3, 4, 9, 5}, {2, 16, 24, 15}, {1, 32, 12, 25}
};
const std::vector<std::tuple<int, int>> shuffleParameters = {
std::make_tuple(1, 2), std::make_tuple(-3, 2),
std::make_tuple(2, 3), std::make_tuple(-2, 3),
std::make_tuple(3, 5), std::make_tuple(-1, 5)
};
const auto testCases = ::testing::Combine(::testing::ValuesIn(shuffleParameters),
::testing::ValuesIn(inputPrecision),
::testing::ValuesIn(inputShapes),
::testing::Values(CommonTestUtils::DEVICE_GPU));
INSTANTIATE_TEST_CASE_P(smoke_GPU_ShuffleChannels, ShuffleChannelsLayerTest, testCases, ShuffleChannelsLayerTest::getTestCaseName);

View File

@ -1,189 +0,0 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <ie_core.hpp>
#include <cmath>
#include "tests_common.hpp"
#include "single_layer_common.hpp"
using namespace ::testing;
using namespace InferenceEngine;
using namespace std;
struct shuffle_channels_test_params {
std::string device_name;
std::string inPrecision;
SizeVector in_out_shape;
int axis;
int group;
std::vector<float> reference;
};
void ref_shuffle_channels(TBlob<float> &src, TBlob<float> &dst, int axis, int group) {
size_t i;
const float *src_data = src.data();
float* dst_data = dst.data();
SizeVector dst_dims = dst.getTensorDesc().getDims();
SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
if (axis < 0)
axis += dst_dims.size();
if (axis < 0 || axis >= dst_dims.size())
FAIL() << "Incorrect input parameters dimensions and axis number!";
if (dst_dims[axis] % group)
FAIL() << "Group parameter must evenly divide the channel dimension!";
// Find number of dictionaries, index range and data length
size_t numDictionaries = 1;
for (i = 0; i <= axis; i++)
numDictionaries *= dst_dims[i];
size_t channelsNum = dst_dims[axis] / group;
size_t dataLength = 1;
for (i = axis + 1; i < dst_dims.size(); i++)
dataLength *= dst_dims[i];
if (dataLength == 0)
FAIL() << "Incorrect input parameters dimension!";
size_t j, k;
for (j = 0, k = 0; j < numDictionaries; j += dst_dims[axis]) {
for (i = 0; i < (dst_dims[axis] * channelsNum); i += channelsNum, k += dataLength) {
int idx = j + i / dst_dims[axis] + i % dst_dims[axis];
memcpy(&dst_data[k], &src_data[dataLength * idx], sizeof(float) * dataLength);
}
}
}
class ShuffleChannelsTests : public TestsCommon, public WithParamInterface<shuffle_channels_test_params> {
std::string model_t = R"V0G0N(
<net Name="ShuffleChannels_net" version="2" precision="FP32" batch="1">
<layers>
<layer name="input" type="Input" precision="FP32" id="1">
<output>
<port id="1">
_IN_OUT_
</port>
</output>
</layer>
<layer name="output" id="2" type="ShuffleChannels" precision="FP32">
<data axis="_AX_" group="_GR_"/>
<input>
<port id="1">
_IN_OUT_
</port>
</input>
<output>
<port id="2">
_IN_OUT_
</port>
</output>
</layer>
</layers>
<edges>
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
</edges>
</net>
)V0G0N";
std::string getModel(shuffle_channels_test_params p) {
std::string model = model_t;
std::string in_out_shape;
for (size_t i = 0; i < p.in_out_shape.size(); i++) {
in_out_shape += "<dim>";
in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
}
REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
REPLACE_WITH_NUM(model, "_AX_", p.axis);
REPLACE_WITH_NUM(model, "_GR_", p.group);
return model;
}
protected:
virtual void TearDown() {
}
virtual void SetUp() {
try {
shuffle_channels_test_params p = ::testing::WithParamInterface<shuffle_channels_test_params>::GetParam();
std::string model = getModel(p);
Core ie;
CNNNetwork net = ie.ReadNetwork(model, Blob::CPtr());
// Output Data
OutputsDataMap out = net.getOutputsInfo();
auto item = *out.begin();
// Input Data
Blob::Ptr src = make_shared_blob<float>({Precision::FP32,
p.in_out_shape,
TensorDesc::getLayoutByDims(p.in_out_shape)});
src->allocate();
fill_data_dbgval(src->buffer(), src->size());
auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
if (srcPtr == nullptr)
FAIL() << "Cannot cast input blob to TBlob<float>.";
// Output Reference
TBlob<float> dst_ref(item.second->getTensorDesc());
dst_ref.allocate();
ref_shuffle_channels(*srcPtr, dst_ref, p.axis, p.group);
// Check results
if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
FAIL() << "Wrong result of TF reference comparison!";
// Infer
ExecutableNetwork executable_network = ie.LoadNetwork(net, p.device_name);
InferRequest inferRequest = executable_network.CreateInferRequest();
inferRequest.SetBlob("input", src);
auto output = inferRequest.GetBlob(item.first);
inferRequest.Infer();
// Check results
auto * dstPtr = dynamic_cast<TBlob<float>*>(output.get());
if (dstPtr == nullptr)
FAIL() << "Cannot cast output blob to TBlob<float>.";
compare(*dstPtr, dst_ref);
} catch (const details::InferenceEngineException &e) {
FAIL() << e.what();
}
}
};
TEST_P(ShuffleChannelsTests, smoke_GPU_TestsShuffleChannels) {}
static std::vector<float> test0 = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f };
static std::vector<float> test4 = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, 12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f };
static std::vector<float> test5 = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, 12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f };
static std::vector<float> test6 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, 12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f };
static std::vector<float> test7 = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, 12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f };
static std::vector<float> test8 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f };
INSTANTIATE_TEST_CASE_P(
smoke_TestsShuffleChannels, ShuffleChannelsTests,
::testing::Values(
shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, 1, 5, test0 },
shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, -3, 5, test0 },
shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, 0, 5, test0 },
shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, -3, 5, test0 },
shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 3, test4 },
shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 3, test5 },
shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 2, test6 },
shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 2, test7 }
));

View File

@ -1,5 +1,5 @@
/*
// Copyright (c) 2019 Intel Corporation
// Copyright (c) 2019-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -20,20 +20,39 @@
#include <vector>
namespace kernel_selector {
ParamsKey ShuffleChannelsKernelRef::GetSupportedKey() const {
ParamsKey k;
k.EnableInputDataType(Datatype::UINT8);
k.EnableInputDataType(Datatype::INT8);
k.EnableInputDataType(Datatype::F16);
k.EnableInputDataType(Datatype::F32);
k.EnableOutputDataType(Datatype::UINT8);
k.EnableOutputDataType(Datatype::INT8);
k.EnableOutputDataType(Datatype::F16);
k.EnableOutputDataType(Datatype::F32);
k.EnableInputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableAllInputLayout();
k.EnableAllOutputLayout();
k.EnableTensorOffset();
k.EnableTensorPitches();
k.EnableBatching();
return k;
}
bool ShuffleChannelsKernelRef::Validate(const Params& p, const optional_params& o) const {
if (p.GetType() != KernelType::SHUFFLE_CHANNELS ||
o.GetType() != KernelType::SHUFFLE_CHANNELS) {
return false;
}
const shuffle_channels_params& params = static_cast<const shuffle_channels_params&>(p);
if (params.inputs[0].Dimentions() > 4)
return false;
return true;
}
CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params,
const optional_params&) const {
CommonDispatchData runInfo;
@ -81,6 +100,10 @@ JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_pa
}
KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
if (!Validate(params, options)) {
return {};
}
KernelData kd = KernelData::Default<shuffle_channels_params>(params);
shuffle_channels_params& newParams = *static_cast<shuffle_channels_params*>(kd.params.get());

View File

@ -42,9 +42,11 @@ class ShuffleChannelsKernelRef : public common_kernel_base {
public:
ShuffleChannelsKernelRef() : common_kernel_base("shuffle_channels_ref") {}
virtual ~ShuffleChannelsKernelRef() {}
virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override;
protected:
bool Validate(const Params&, const optional_params&) const override;
virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
};
} // namespace kernel_selector

View File

@ -1,4 +1,4 @@
// Copyright (c) 2019 Intel Corporation
// Copyright (c) 2019-2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -15,29 +15,20 @@
#include "include/include_all.cl"
KERNEL(shuffle_channels_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
KERNEL(shuffle_channels_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
{
const uint batch = get_global_id(0);
const uint feature = get_global_id(1);
const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
const uint dimensions[] = { batch, feature, y, x };
uint dimensions[] = { batch, feature, y, x };
const uint current_group = dimensions[AXIS] / GROUP_SIZE;
const uint position_in_group = dimensions[AXIS] % GROUP_SIZE;
const uint input_index = INPUT0_OFFSET + (batch * INPUT0_BATCH_PITCH) + (feature * INPUT0_FEATURE_PITCH) + (y * INPUT0_Y_PITCH) + x;
const uint input_index = INPUT0_GET_INDEX(batch, feature, y, x);
uint output_index = OUTPUT_OFFSET;
for (uint i = 0; i < AXIS; ++i) {
output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
}
output_index += (position_in_group * GROUPS_NUMBER + current_group) * INPUT0_PITCHES[INPUT0_DIMS - AXIS - 1];
for (uint i = AXIS + 1; i < INPUT0_DIMS; ++i) {
output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
}
dimensions[AXIS] = (position_in_group * GROUPS_NUMBER + current_group);
uint output_index = OUTPUT_GET_INDEX(dimensions[0], dimensions[1], dimensions[2], dimensions[3]);
output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
}

View File

@ -64,10 +64,28 @@ namespace detail {
attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
auto val_fw = shuffle_channels_gpu::create;
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
}
} // namespace detail

View File

@ -47,6 +47,7 @@
#include "deconvolution_inst.h"
#include "detection_output_inst.h"
#include "input_layout_inst.h"
#include "shuffle_channels_inst.h"
#include "arg_max_min_inst.h"
#include "lstm_inst.h"
#include "lstm_elt_inst.h"
@ -1157,6 +1158,7 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
prim.type() != cldnn::crop::type_id() &&
prim.type() != cldnn::scale::type_id() &&
prim.type() != cldnn::depth_to_space::type_id() &&
prim.type() != cldnn::shuffle_channels::type_id() &&
(prim.type() != cldnn::mvn::type_id()
|| (prim.as<mvn>().input().get_output_layout().data_type != data_types::u8 &&
prim.as<mvn>().input().get_output_layout().data_type != data_types::i8)