[IE CLDNN] Enable ShuffleChannels op (#787)
This commit is contained in:
parent
63ee9f8916
commit
f0498ad011
@ -22,7 +22,7 @@ class TRANSFORMATIONS_API ConvertShuffleChannels3;
|
||||
|
||||
class ngraph::pass::ConvertShuffleChannels3: public ngraph::pass::GraphRewrite, public ngraph::pass::PassParam {
|
||||
public:
|
||||
ConvertShuffleChannels3() : GraphRewrite() {
|
||||
ConvertShuffleChannels3() : GraphRewrite(), PassParam() {
|
||||
convert_shuffle_channels3();
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,33 @@
|
||||
// Copyright (C) 2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "single_layer_tests/shuffle_channels.hpp"
|
||||
|
||||
using namespace LayerTestsDefinitions;
|
||||
|
||||
const std::vector<InferenceEngine::Precision> inputPrecision = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16,
|
||||
InferenceEngine::Precision::U8,
|
||||
};
|
||||
|
||||
const std::vector<std::vector<size_t>> inputShapes = {
|
||||
{3, 4, 9, 5}, {2, 16, 24, 15}, {1, 32, 12, 25}
|
||||
};
|
||||
|
||||
const std::vector<std::tuple<int, int>> shuffleParameters = {
|
||||
std::make_tuple(1, 2), std::make_tuple(-3, 2),
|
||||
std::make_tuple(2, 3), std::make_tuple(-2, 3),
|
||||
std::make_tuple(3, 5), std::make_tuple(-1, 5)
|
||||
};
|
||||
|
||||
const auto testCases = ::testing::Combine(::testing::ValuesIn(shuffleParameters),
|
||||
::testing::ValuesIn(inputPrecision),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU));
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_GPU_ShuffleChannels, ShuffleChannelsLayerTest, testCases, ShuffleChannelsLayerTest::getTestCaseName);
|
@ -1,189 +0,0 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <ie_core.hpp>
|
||||
#include <cmath>
|
||||
|
||||
#include "tests_common.hpp"
|
||||
#include "single_layer_common.hpp"
|
||||
|
||||
|
||||
using namespace ::testing;
|
||||
using namespace InferenceEngine;
|
||||
using namespace std;
|
||||
|
||||
|
||||
struct shuffle_channels_test_params {
|
||||
std::string device_name;
|
||||
std::string inPrecision;
|
||||
SizeVector in_out_shape;
|
||||
int axis;
|
||||
int group;
|
||||
std::vector<float> reference;
|
||||
};
|
||||
|
||||
void ref_shuffle_channels(TBlob<float> &src, TBlob<float> &dst, int axis, int group) {
|
||||
size_t i;
|
||||
const float *src_data = src.data();
|
||||
float* dst_data = dst.data();
|
||||
SizeVector dst_dims = dst.getTensorDesc().getDims();
|
||||
SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
|
||||
|
||||
if (axis < 0)
|
||||
axis += dst_dims.size();
|
||||
|
||||
if (axis < 0 || axis >= dst_dims.size())
|
||||
FAIL() << "Incorrect input parameters dimensions and axis number!";
|
||||
|
||||
if (dst_dims[axis] % group)
|
||||
FAIL() << "Group parameter must evenly divide the channel dimension!";
|
||||
|
||||
// Find number of dictionaries, index range and data length
|
||||
size_t numDictionaries = 1;
|
||||
for (i = 0; i <= axis; i++)
|
||||
numDictionaries *= dst_dims[i];
|
||||
|
||||
size_t channelsNum = dst_dims[axis] / group;
|
||||
|
||||
size_t dataLength = 1;
|
||||
for (i = axis + 1; i < dst_dims.size(); i++)
|
||||
dataLength *= dst_dims[i];
|
||||
|
||||
if (dataLength == 0)
|
||||
FAIL() << "Incorrect input parameters dimension!";
|
||||
|
||||
size_t j, k;
|
||||
for (j = 0, k = 0; j < numDictionaries; j += dst_dims[axis]) {
|
||||
for (i = 0; i < (dst_dims[axis] * channelsNum); i += channelsNum, k += dataLength) {
|
||||
int idx = j + i / dst_dims[axis] + i % dst_dims[axis];
|
||||
memcpy(&dst_data[k], &src_data[dataLength * idx], sizeof(float) * dataLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class ShuffleChannelsTests : public TestsCommon, public WithParamInterface<shuffle_channels_test_params> {
|
||||
std::string model_t = R"V0G0N(
|
||||
<net Name="ShuffleChannels_net" version="2" precision="FP32" batch="1">
|
||||
<layers>
|
||||
<layer name="input" type="Input" precision="FP32" id="1">
|
||||
<output>
|
||||
<port id="1">
|
||||
_IN_OUT_
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
<layer name="output" id="2" type="ShuffleChannels" precision="FP32">
|
||||
<data axis="_AX_" group="_GR_"/>
|
||||
<input>
|
||||
<port id="1">
|
||||
_IN_OUT_
|
||||
</port>
|
||||
</input>
|
||||
<output>
|
||||
<port id="2">
|
||||
_IN_OUT_
|
||||
</port>
|
||||
</output>
|
||||
</layer>
|
||||
</layers>
|
||||
<edges>
|
||||
<edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
|
||||
</edges>
|
||||
</net>
|
||||
)V0G0N";
|
||||
|
||||
std::string getModel(shuffle_channels_test_params p) {
|
||||
std::string model = model_t;
|
||||
std::string in_out_shape;
|
||||
|
||||
for (size_t i = 0; i < p.in_out_shape.size(); i++) {
|
||||
in_out_shape += "<dim>";
|
||||
in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
|
||||
}
|
||||
REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
|
||||
REPLACE_WITH_NUM(model, "_AX_", p.axis);
|
||||
REPLACE_WITH_NUM(model, "_GR_", p.group);
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void TearDown() {
|
||||
}
|
||||
|
||||
virtual void SetUp() {
|
||||
try {
|
||||
shuffle_channels_test_params p = ::testing::WithParamInterface<shuffle_channels_test_params>::GetParam();
|
||||
std::string model = getModel(p);
|
||||
|
||||
Core ie;
|
||||
CNNNetwork net = ie.ReadNetwork(model, Blob::CPtr());
|
||||
|
||||
// Output Data
|
||||
OutputsDataMap out = net.getOutputsInfo();
|
||||
auto item = *out.begin();
|
||||
|
||||
// Input Data
|
||||
Blob::Ptr src = make_shared_blob<float>({Precision::FP32,
|
||||
p.in_out_shape,
|
||||
TensorDesc::getLayoutByDims(p.in_out_shape)});
|
||||
src->allocate();
|
||||
fill_data_dbgval(src->buffer(), src->size());
|
||||
auto * srcPtr = dynamic_cast<TBlob<float>*>(src.get());
|
||||
if (srcPtr == nullptr)
|
||||
FAIL() << "Cannot cast input blob to TBlob<float>.";
|
||||
|
||||
// Output Reference
|
||||
TBlob<float> dst_ref(item.second->getTensorDesc());
|
||||
dst_ref.allocate();
|
||||
ref_shuffle_channels(*srcPtr, dst_ref, p.axis, p.group);
|
||||
|
||||
// Check results
|
||||
if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
|
||||
FAIL() << "Wrong result of TF reference comparison!";
|
||||
|
||||
// Infer
|
||||
ExecutableNetwork executable_network = ie.LoadNetwork(net, p.device_name);
|
||||
InferRequest inferRequest = executable_network.CreateInferRequest();
|
||||
inferRequest.SetBlob("input", src);
|
||||
auto output = inferRequest.GetBlob(item.first);
|
||||
|
||||
inferRequest.Infer();
|
||||
|
||||
// Check results
|
||||
auto * dstPtr = dynamic_cast<TBlob<float>*>(output.get());
|
||||
if (dstPtr == nullptr)
|
||||
FAIL() << "Cannot cast output blob to TBlob<float>.";
|
||||
compare(*dstPtr, dst_ref);
|
||||
} catch (const details::InferenceEngineException &e) {
|
||||
FAIL() << e.what();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(ShuffleChannelsTests, smoke_GPU_TestsShuffleChannels) {}
|
||||
|
||||
static std::vector<float> test0 = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
|
||||
4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
|
||||
8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f };
|
||||
static std::vector<float> test4 = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, 12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f };
|
||||
static std::vector<float> test5 = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, 12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f };
|
||||
static std::vector<float> test6 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, 12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f };
|
||||
static std::vector<float> test7 = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, 12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f };
|
||||
static std::vector<float> test8 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f };
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
smoke_TestsShuffleChannels, ShuffleChannelsTests,
|
||||
::testing::Values(
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, 1, 5, test0 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 1, 15, 2, 2 }, -3, 5, test0 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, 0, 5, test0 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 15, 2, 2 }, -3, 5, test0 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 3, test4 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 3, test5 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 2, 2, 6 }, -1, 2, test6 },
|
||||
shuffle_channels_test_params{ "GPU", "FP32", { 2, 6, 2 }, -2, 2, test7 }
|
||||
));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -20,20 +20,39 @@
|
||||
#include <vector>
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
ParamsKey ShuffleChannelsKernelRef::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::UINT8);
|
||||
k.EnableInputDataType(Datatype::INT8);
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
k.EnableInputDataType(Datatype::F32);
|
||||
k.EnableOutputDataType(Datatype::UINT8);
|
||||
k.EnableOutputDataType(Datatype::INT8);
|
||||
k.EnableOutputDataType(Datatype::F16);
|
||||
k.EnableOutputDataType(Datatype::F32);
|
||||
k.EnableInputLayout(DataLayout::bfyx);
|
||||
k.EnableOutputLayout(DataLayout::bfyx);
|
||||
k.EnableAllInputLayout();
|
||||
k.EnableAllOutputLayout();
|
||||
k.EnableTensorOffset();
|
||||
k.EnableTensorPitches();
|
||||
k.EnableBatching();
|
||||
return k;
|
||||
}
|
||||
|
||||
bool ShuffleChannelsKernelRef::Validate(const Params& p, const optional_params& o) const {
|
||||
if (p.GetType() != KernelType::SHUFFLE_CHANNELS ||
|
||||
o.GetType() != KernelType::SHUFFLE_CHANNELS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const shuffle_channels_params& params = static_cast<const shuffle_channels_params&>(p);
|
||||
|
||||
if (params.inputs[0].Dimentions() > 4)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params,
|
||||
const optional_params&) const {
|
||||
CommonDispatchData runInfo;
|
||||
@ -81,6 +100,10 @@ JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_pa
|
||||
}
|
||||
|
||||
KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
if (!Validate(params, options)) {
|
||||
return {};
|
||||
}
|
||||
|
||||
KernelData kd = KernelData::Default<shuffle_channels_params>(params);
|
||||
shuffle_channels_params& newParams = *static_cast<shuffle_channels_params*>(kd.params.get());
|
||||
|
||||
|
@ -42,9 +42,11 @@ class ShuffleChannelsKernelRef : public common_kernel_base {
|
||||
public:
|
||||
ShuffleChannelsKernelRef() : common_kernel_base("shuffle_channels_ref") {}
|
||||
virtual ~ShuffleChannelsKernelRef() {}
|
||||
virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
protected:
|
||||
bool Validate(const Params&, const optional_params&) const override;
|
||||
virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
|
||||
virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2019 Intel Corporation
|
||||
// Copyright (c) 2019-2020 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@ -15,29 +15,20 @@
|
||||
|
||||
#include "include/include_all.cl"
|
||||
|
||||
KERNEL(shuffle_channels_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
|
||||
KERNEL(shuffle_channels_ref)(const __global INPUT0_TYPE* input, __global OUTPUT_TYPE* output)
|
||||
{
|
||||
const uint batch = get_global_id(0);
|
||||
const uint feature = get_global_id(1);
|
||||
const uint y = (uint)get_global_id(2) / OUTPUT_SIZE_X;
|
||||
const uint x = (uint)get_global_id(2) % OUTPUT_SIZE_X;
|
||||
const uint dimensions[] = { batch, feature, y, x };
|
||||
uint dimensions[] = { batch, feature, y, x };
|
||||
|
||||
const uint current_group = dimensions[AXIS] / GROUP_SIZE;
|
||||
const uint position_in_group = dimensions[AXIS] % GROUP_SIZE;
|
||||
const uint input_index = INPUT0_OFFSET + (batch * INPUT0_BATCH_PITCH) + (feature * INPUT0_FEATURE_PITCH) + (y * INPUT0_Y_PITCH) + x;
|
||||
const uint input_index = INPUT0_GET_INDEX(batch, feature, y, x);
|
||||
|
||||
uint output_index = OUTPUT_OFFSET;
|
||||
|
||||
for (uint i = 0; i < AXIS; ++i) {
|
||||
output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
|
||||
}
|
||||
|
||||
output_index += (position_in_group * GROUPS_NUMBER + current_group) * INPUT0_PITCHES[INPUT0_DIMS - AXIS - 1];
|
||||
|
||||
for (uint i = AXIS + 1; i < INPUT0_DIMS; ++i) {
|
||||
output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
|
||||
}
|
||||
dimensions[AXIS] = (position_in_group * GROUPS_NUMBER + current_group);
|
||||
uint output_index = OUTPUT_GET_INDEX(dimensions[0], dimensions[1], dimensions[2], dimensions[3]);
|
||||
|
||||
output[output_index] = ACTIVATION(input[input_index], ACTIVATION_PARAMS);
|
||||
}
|
||||
|
@ -64,10 +64,28 @@ namespace detail {
|
||||
|
||||
attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
|
||||
auto val_fw = shuffle_channels_gpu::create;
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include "deconvolution_inst.h"
|
||||
#include "detection_output_inst.h"
|
||||
#include "input_layout_inst.h"
|
||||
#include "shuffle_channels_inst.h"
|
||||
#include "arg_max_min_inst.h"
|
||||
#include "lstm_inst.h"
|
||||
#include "lstm_elt_inst.h"
|
||||
@ -1157,6 +1158,7 @@ void program_impl::set_layout_optimizer_attributes(layout_optimizer& lo) {
|
||||
prim.type() != cldnn::crop::type_id() &&
|
||||
prim.type() != cldnn::scale::type_id() &&
|
||||
prim.type() != cldnn::depth_to_space::type_id() &&
|
||||
prim.type() != cldnn::shuffle_channels::type_id() &&
|
||||
(prim.type() != cldnn::mvn::type_id()
|
||||
|| (prim.as<mvn>().input().get_output_layout().data_type != data_types::u8 &&
|
||||
prim.as<mvn>().input().get_output_layout().data_type != data_types::i8)
|
||||
|
Loading…
Reference in New Issue
Block a user