[IE CLDNN] Added 6d tensor support in eltwise/scale primitives (#826)
This commit is contained in:
parent
b0eb3e67ee
commit
fe198dd544
@ -20,8 +20,8 @@ std::vector<std::vector<std::vector<size_t>>> inShapes = {
|
|||||||
{{1, 2, 4}},
|
{{1, 2, 4}},
|
||||||
{{1, 4, 4}},
|
{{1, 4, 4}},
|
||||||
{{1, 4, 4, 1}},
|
{{1, 4, 4, 1}},
|
||||||
{{1, 1, 1, 1, 1, 1, 3}},
|
{{1, 4, 3, 2, 1, 3}},
|
||||||
{{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}}
|
{{1, 3, 1, 1, 1, 3}, {1, 3, 1, 1, 1, 1}},
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<InferenceEngine::Precision> netPrecisions = {
|
std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||||
@ -53,7 +53,7 @@ const auto multiply_params = ::testing::Combine(
|
|||||||
::testing::ValuesIn(secondaryInputTypes),
|
::testing::ValuesIn(secondaryInputTypes),
|
||||||
::testing::ValuesIn(opTypes),
|
::testing::ValuesIn(opTypes),
|
||||||
::testing::ValuesIn(netPrecisions),
|
::testing::ValuesIn(netPrecisions),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||||
::testing::Values(additional_config));
|
::testing::Values(additional_config));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(CompareWithRefs, EltwiseLayerTest, multiply_params, EltwiseLayerTest::getTestCaseName);
|
INSTANTIATE_TEST_CASE_P(CompareWithRefs, EltwiseLayerTest, multiply_params, EltwiseLayerTest::getTestCaseName);
|
||||||
|
@ -9,6 +9,8 @@
|
|||||||
|
|
||||||
std::vector<std::string> disabledTestPatterns() {
|
std::vector<std::string> disabledTestPatterns() {
|
||||||
return {
|
return {
|
||||||
R"(.*(EltwiseLayerTest).*IS=\(.*\..*\..*\..*\..*\).*secondaryInputType=PARAMETER.*opType=SCALAR.*)",
|
// cldnn treats 1d constant as [1, f, 1, 1] tensor instead of [b, 1, 1, 1] which leads to fails of these tests
|
||||||
|
R"(.*(EltwiseLayerTest).*IS=\(.*\..*\..*\..*\..*\).*secondaryInputType=CONSTANT.*opType=SCALAR.*)",
|
||||||
|
R"(.*(EltwiseLayerTest).*IS=\(.*\).*secondaryInputType=CONSTANT.*)",
|
||||||
};
|
};
|
||||||
}
|
}
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2016-2019 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -397,6 +397,8 @@ JitConstants EltwiseKernelBase::MakeIndexJitConstants(const eltwise_params& para
|
|||||||
{1, 1, 1})));
|
{1, 1, 1})));
|
||||||
} else if (out_c == 5) {
|
} else if (out_c == 5) {
|
||||||
jit.AddConstant(MakeJitConstant(out_idx_order, "d5,d4,d3,d2,d1"));
|
jit.AddConstant(MakeJitConstant(out_idx_order, "d5,d4,d3,d2,d1"));
|
||||||
|
} else if (out_c == 6) {
|
||||||
|
jit.AddConstant(MakeJitConstant(out_idx_order, "d6,d5,d4,d3,d2,d1"));
|
||||||
} else {
|
} else {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -445,6 +447,14 @@ JitConstants EltwiseKernelBase::MakeIndexJitConstants(const eltwise_params& para
|
|||||||
// quite strange case, but can happen due to reorders fusing
|
// quite strange case, but can happen due to reorders fusing
|
||||||
// it means that z coord is equal to 1, so z offset will be always equal to 0
|
// it means that z coord is equal to 1, so z offset will be always equal to 0
|
||||||
jit.AddConstant(MakeJitConstant(idx_order, "d4,d3,0,d2,d1"));
|
jit.AddConstant(MakeJitConstant(idx_order, "d4,d3,0,d2,d1"));
|
||||||
|
} else if (out_c == 6) {
|
||||||
|
if (in_c < 5)
|
||||||
|
jit.AddConstant(MakeJitConstant(idx_order, "d6,d5,d2,d1"));
|
||||||
|
else if (in_c == 5) {
|
||||||
|
jit.AddConstant(MakeJitConstant(idx_order, "d6,d5,d3,d2,d1"));
|
||||||
|
} else {
|
||||||
|
jit.AddConstant(MakeJitConstant(idx_order, "d6,d5,d4,d3,d2,d1"));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
@ -526,19 +536,16 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
|
|||||||
gws.push_back(o.v);
|
gws.push_back(o.v);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t n_dims;
|
size_t n_dims = DataTensor::ChannelsCount(out.GetLayout());
|
||||||
if ((out.GetLayout() == DataLayout::bfzyx) || (out.GetLayout() == DataLayout::b_fs_zyx_fsv16) ||
|
|
||||||
(out.GetLayout() == DataLayout::bs_fs_zyx_bsv16_fsv16))
|
|
||||||
n_dims = 5;
|
|
||||||
else
|
|
||||||
n_dims = 4;
|
|
||||||
|
|
||||||
for (size_t i = gws.size(); i < n_dims; i++) {
|
for (size_t i = gws.size(); i < n_dims; i++) {
|
||||||
gws.push_back(1U);
|
gws.push_back(1U);
|
||||||
}
|
}
|
||||||
|
|
||||||
kd.gws0 = gws[0];
|
kd.gws0 = gws[0];
|
||||||
if (n_dims == 5) {
|
if (n_dims == 6) {
|
||||||
|
kd.gws1 = gws[1] * gws[2] * gws[3]; // y*z*w
|
||||||
|
kd.gws2 = gws[4] * gws[5];
|
||||||
|
} else if (n_dims == 5) {
|
||||||
kd.gws1 = gws[1] * gws[2]; // y*z
|
kd.gws1 = gws[1] * gws[2]; // y*z
|
||||||
kd.gws2 = gws[3] * gws[4];
|
kd.gws2 = gws[3] * gws[4];
|
||||||
} else {
|
} else {
|
||||||
|
@ -52,6 +52,7 @@ JitConstants PermuteKernelRef::GetJitConstants(const permute_params& params) con
|
|||||||
default: in_idx = {"b", "f", "x", "y" }; break;
|
default: in_idx = {"b", "f", "x", "y" }; break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(params.order.size() == in_idx.size());
|
||||||
for (auto& o : params.order) {
|
for (auto& o : params.order) {
|
||||||
out_idx.push_back(in_idx[o]);
|
out_idx.push_back(in_idx[o]);
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2016-2019 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -32,7 +32,39 @@ KERNEL(eltwise)(
|
|||||||
#endif
|
#endif
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
#if OUTPUT_DIMS == 5 // 3D spatial
|
|
||||||
|
#if OUTPUT_DIMS == 6 // 4D spatial
|
||||||
|
#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
|
||||||
|
uint data_idx = get_global_id(GWS_YX);
|
||||||
|
const uint d1 = data_idx % OUTPUT_SIZE_X; // X
|
||||||
|
data_idx = data_idx / OUTPUT_SIZE_X;
|
||||||
|
|
||||||
|
const uint d2 = data_idx % OUTPUT_SIZE_Y; // Y
|
||||||
|
data_idx = data_idx / OUTPUT_SIZE_Y;
|
||||||
|
|
||||||
|
const uint d3 = data_idx % OUTPUT_SIZE_Z; // Z
|
||||||
|
data_idx = data_idx / OUTPUT_SIZE_Z;
|
||||||
|
|
||||||
|
const uint d4 = data_idx % OUTPUT_SIZE_W; // W
|
||||||
|
|
||||||
|
const uint d5 = get_global_id(GWS_FEATURE); // Feature
|
||||||
|
const uint d6 = get_global_id(GWS_BATCH); // Batch
|
||||||
|
|
||||||
|
uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
|
||||||
|
#elif ELTWISE_NO_PITCH_SAME_DIMS
|
||||||
|
const uint d1 = get_global_id(0);
|
||||||
|
uint output_offset = OUTPUT_OFFSET + d1;
|
||||||
|
#else
|
||||||
|
const uint d1 = get_global_id(0);
|
||||||
|
const uint d2 = (uint)get_global_id(1) % OUTPUT_SIZES[1];
|
||||||
|
const uint d3 = (uint)get_global_id(1) / OUTPUT_SIZES[1] % OUTPUT_SIZE[2];
|
||||||
|
const uint d4 = (uint)get_global_id(1) / OUTPUT_SIZES[1] / OUTPUT_SIZE[2];
|
||||||
|
const uint d5 = (uint)get_global_id(2) % OUTPUT_SIZES[3];
|
||||||
|
const uint d6 = (uint)get_global_id(2) / OUTPUT_SIZES[3];
|
||||||
|
|
||||||
|
uint output_offset = OUTPUT_GET_INDEX(d6, d5, d4, d3, d2, d1);
|
||||||
|
#endif
|
||||||
|
#elif OUTPUT_DIMS == 5 // 3D spatial
|
||||||
#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
|
#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
|
||||||
uint data_idx = get_global_id(GWS_YX);
|
uint data_idx = get_global_id(GWS_YX);
|
||||||
const uint d1 = data_idx % OUTPUT_SIZE_X; // X
|
const uint d1 = data_idx % OUTPUT_SIZE_X; // X
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2019 Intel Corporation
|
// Copyright (c) 2019-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -255,6 +255,13 @@ attach_eltwise_gpu::attach_eltwise_gpu() {
|
|||||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
|
||||||
|
// 4D
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create },
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create },
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create },
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create },
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create },
|
||||||
|
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create },
|
||||||
|
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2016-2019 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -98,6 +98,11 @@ attach_scale_gpu::attach_scale_gpu() {
|
|||||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||||
|
|
||||||
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||||
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||||
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||||
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||||
|
|
||||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2016-2019 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -3214,39 +3214,53 @@ class BaseEltwiseTest : public ::testing::TestWithParam<T> {
|
|||||||
|
|
||||||
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
||||||
public:
|
public:
|
||||||
VF<float> eltwise_ref(VVVVF<float> input0, VVVVF<float> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
VF<float> eltwise_ref(VVVVVVF<float> input0, VVVVVVF<float> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
||||||
auto out_size = tensor::max(input0_size, input1_size);
|
auto out_size = tensor::max(input0_size, input1_size);
|
||||||
|
|
||||||
int output_b = out_size.batch[0];
|
int output_b = out_size.batch[0];
|
||||||
int output_f = out_size.feature[0];
|
int output_f = out_size.feature[0];
|
||||||
|
int output_w = out_size.spatial[3];
|
||||||
|
int output_z = out_size.spatial[2];
|
||||||
int output_y = out_size.spatial[1];
|
int output_y = out_size.spatial[1];
|
||||||
int output_x = out_size.spatial[0];
|
int output_x = out_size.spatial[0];
|
||||||
|
|
||||||
VVVVF<float> output(output_b, VVVF<float>(output_f, VVF<float>(output_y, VF<float>(output_x))));
|
VVVVVVF<float> output(output_b, VVVVVF<float>(output_f,
|
||||||
|
VVVVF<float>(output_w,
|
||||||
|
VVVF<float>(output_z,
|
||||||
|
VVF<float>(output_y,
|
||||||
|
VF<float>(output_x))))));
|
||||||
|
|
||||||
for (int b = 0; b < output_b; ++b) {
|
for (int b = 0; b < output_b; ++b) {
|
||||||
for (int f = 0; f < output_f; ++f) {
|
for (int f = 0; f < output_f; ++f) {
|
||||||
for (int y = 0; y <output_y; ++y) {
|
for (int w = 0; w <output_w; ++w) {
|
||||||
for (int x = 0; x < output_x; ++x) {
|
for (int z = 0; z <output_z; ++z) {
|
||||||
int in0_b = b % input0_size.batch[0];
|
for (int y = 0; y <output_y; ++y) {
|
||||||
int in0_f = f % input0_size.feature[0];
|
for (int x = 0; x < output_x; ++x) {
|
||||||
int in0_y = y % input0_size.spatial[1];
|
int in0_b = b % input0_size.batch[0];
|
||||||
int in0_x = x % input0_size.spatial[0];
|
int in0_f = f % input0_size.feature[0];
|
||||||
|
int in0_w = w % input0_size.spatial[3];
|
||||||
|
int in0_z = z % input0_size.spatial[2];
|
||||||
|
int in0_y = y % input0_size.spatial[1];
|
||||||
|
int in0_x = x % input0_size.spatial[0];
|
||||||
|
|
||||||
int in1_b = b % input1_size.batch[0];
|
int in1_b = b % input1_size.batch[0];
|
||||||
int in1_f = f % input1_size.feature[0];
|
int in1_f = f % input1_size.feature[0];
|
||||||
int in1_y = y % input1_size.spatial[1];
|
int in1_w = w % input1_size.spatial[3];
|
||||||
int in1_x = x % input1_size.spatial[0];
|
int in1_z = z % input1_size.spatial[2];
|
||||||
|
int in1_y = y % input1_size.spatial[1];
|
||||||
|
int in1_x = x % input1_size.spatial[0];
|
||||||
|
|
||||||
auto in0 = input0[in0_b][in0_f][in0_y][in0_x];
|
auto in0 = input0[in0_b][in0_f][in0_w][in0_z][in0_y][in0_x];
|
||||||
auto in1 = input1[in1_b][in1_f][in1_y][in1_x];
|
auto in1 = input1[in1_b][in1_f][in1_w][in1_z][in1_y][in1_x];
|
||||||
output[b][f][y][x] = eltwise_execute<float>(mode, in0, in1);
|
output[b][f][w][z][y][x] = eltwise_execute<float>(mode, in0, in1);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return flatten_4d<float>(format::bfyx, output);;
|
return flatten_6d<float>(format::bfwzyx, output);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3271,10 +3285,10 @@ TEST_P(eltwise_test, b_fs_yx_fsv16) {
|
|||||||
int x1 = input1_size.spatial[0];
|
int x1 = input1_size.spatial[0];
|
||||||
|
|
||||||
int min_random = -2, max_random = 2;
|
int min_random = -2, max_random = 2;
|
||||||
VVVVF<float> input1_rnd = generate_random_4d<float>(b0, f0, y0, x0, min_random, max_random);
|
VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, 1, y0, x0, min_random, max_random);
|
||||||
VVVVF<float> input2_rnd = generate_random_4d<float>(b1, f1, y1, x1, min_random, max_random);
|
VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, 1, 1, y1, x1, min_random, max_random);
|
||||||
VF<float> input1_rnd_vec = flatten_4d<float>(format::bfyx, input1_rnd);
|
VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
|
||||||
VF<float> input2_rnd_vec = flatten_4d<float>(format::bfyx, input2_rnd);
|
VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);
|
||||||
|
|
||||||
const auto& engine = get_test_engine();
|
const auto& engine = get_test_engine();
|
||||||
auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, input0_size });
|
auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, input0_size });
|
||||||
@ -3339,3 +3353,82 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test,
|
|||||||
::testing::ValuesIn(types),
|
::testing::ValuesIn(types),
|
||||||
::testing::ValuesIn(inputs)
|
::testing::ValuesIn(inputs)
|
||||||
), );
|
), );
|
||||||
|
|
||||||
|
class eltwise_test_6d : public eltwise_test {};
|
||||||
|
TEST_P(eltwise_test_6d, bfwzyx) {
|
||||||
|
auto p = GetParam();
|
||||||
|
|
||||||
|
ASSERT_EQ(std::get<2>(p).size(), 2);
|
||||||
|
|
||||||
|
auto mode = std::get<0>(p);
|
||||||
|
auto dt = std::get<1>(p);
|
||||||
|
auto input0_size = std::get<2>(p)[0];
|
||||||
|
auto input1_size = std::get<2>(p)[1];
|
||||||
|
|
||||||
|
int b0 = input0_size.batch[0];
|
||||||
|
int f0 = input0_size.feature[0];
|
||||||
|
int w0 = input0_size.spatial[3];
|
||||||
|
int z0 = input0_size.spatial[2];
|
||||||
|
int y0 = input0_size.spatial[1];
|
||||||
|
int x0 = input0_size.spatial[0];
|
||||||
|
|
||||||
|
int b1 = input1_size.batch[0];
|
||||||
|
int f1 = input1_size.feature[0];
|
||||||
|
int w1 = input1_size.spatial[3];
|
||||||
|
int z1 = input1_size.spatial[2];
|
||||||
|
int y1 = input1_size.spatial[1];
|
||||||
|
int x1 = input1_size.spatial[0];
|
||||||
|
|
||||||
|
int min_random = -2, max_random = 2;
|
||||||
|
VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, w0, z0, y0, x0, min_random, max_random);
|
||||||
|
VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, w1, z1, y1, x1, min_random, max_random);
|
||||||
|
VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
|
||||||
|
VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);
|
||||||
|
|
||||||
|
const auto& engine = get_test_engine();
|
||||||
|
auto input1 = memory::allocate(engine, { data_types::f32, format::bfwzyx, input0_size });
|
||||||
|
auto input2 = memory::allocate(engine, { data_types::f32, format::bfwzyx, input1_size });
|
||||||
|
set_values(input1, input1_rnd_vec);
|
||||||
|
set_values(input2, input2_rnd_vec);
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input1", input1.get_layout()));
|
||||||
|
topology.add(input_layout("input2", input2.get_layout()));
|
||||||
|
topology.add(reorder("reorder1", "input1", format::bfwzyx, dt));
|
||||||
|
topology.add(reorder("reorder2", "input2", format::bfwzyx, dt));
|
||||||
|
topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
|
||||||
|
topology.add(reorder("out", "eltwise", format::bfwzyx, data_types::f32));
|
||||||
|
primitive_id out_id = "out";
|
||||||
|
|
||||||
|
build_options bo;
|
||||||
|
bo.set_option(build_option::optimize_data(true));
|
||||||
|
network network(engine, topology, bo);
|
||||||
|
|
||||||
|
network.set_input_data("input1", input1);
|
||||||
|
network.set_input_data("input2", input2);
|
||||||
|
auto outputs = network.execute();
|
||||||
|
EXPECT_EQ(outputs.size(), size_t(1));
|
||||||
|
EXPECT_EQ(outputs.begin()->first, out_id);
|
||||||
|
|
||||||
|
auto output_memory = outputs.at(out_id).get_memory();
|
||||||
|
auto output_ptr = output_memory.pointer<float>();
|
||||||
|
|
||||||
|
VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, input0_size, input1_size, mode);
|
||||||
|
for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
|
||||||
|
EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
|
||||||
|
ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<std::vector<tensor>> inputs_6d = {
|
||||||
|
{tensor(format::bfwzyx, {1, 2, 3, 4, 5, 6}), tensor(format::bfwzyx, {1, 2, 3, 4, 5, 6})},
|
||||||
|
{tensor(format::bfwzyx, {1, 32, 1, 1, 1, 1}), tensor(format::bfwzyx, {8, 32, 4, 5, 6, 7})},
|
||||||
|
{tensor(format::bfwzyx, {1, 32, 1, 1, 1, 7}), tensor(format::bfwzyx, {8, 32, 4, 5, 6, 7})},
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_6d,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(modes),
|
||||||
|
::testing::ValuesIn(types),
|
||||||
|
::testing::ValuesIn(inputs_6d)
|
||||||
|
), );
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2017-2019 Intel Corporation
|
// Copyright (c) 2017-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -1388,6 +1388,122 @@ TEST(scale_gpu, basic_in2x2x2x2x3_scale_xyz) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(scale_gpu, basic_in2x2x2x2x2x3_scale_4d) {
|
||||||
|
// Scale : 1x2x1x1
|
||||||
|
// Input : 2x2x2x2x2x3
|
||||||
|
// Output : 2x2x2x2x2x3
|
||||||
|
|
||||||
|
const auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
auto batch_num = 2;
|
||||||
|
auto feature_num = 2;
|
||||||
|
auto w_size = 2;
|
||||||
|
auto z_size = 2;
|
||||||
|
auto y_size = 2;
|
||||||
|
auto x_size = 3;
|
||||||
|
|
||||||
|
tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, x_size, y_size, z_size, w_size });
|
||||||
|
tensor scale_size = tensor(format::bfyx, { 1, feature_num, 1, 1 });
|
||||||
|
|
||||||
|
auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, in_size});
|
||||||
|
auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, scale_size });
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", input.get_layout()));
|
||||||
|
topology.add(input_layout("scale_input", scale_input.get_layout()));
|
||||||
|
topology.add(scale("scale", "input", "scale_input"));
|
||||||
|
|
||||||
|
std::vector<float> input_vec = generate_random_1d<float>(in_size.count(), -10, 10);
|
||||||
|
set_values(input, input_vec);
|
||||||
|
|
||||||
|
std::vector<float> scale_input_vec = generate_random_1d<float>(scale_input.count(), -10, 10);
|
||||||
|
set_values(scale_input, scale_input_vec);
|
||||||
|
|
||||||
|
network network(engine, topology);
|
||||||
|
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
network.set_input_data("scale_input", scale_input);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
auto output = outputs.at("scale").get_memory();
|
||||||
|
auto output_ptr = output.pointer<float>();
|
||||||
|
|
||||||
|
for (int b = 0; b < batch_num; ++b) {
|
||||||
|
for (int f = 0; f < feature_num; ++f) {
|
||||||
|
for (int w = 0; w < w_size; ++w) {
|
||||||
|
for (int z = 0; z < z_size; ++z) {
|
||||||
|
for (int y = 0; y < y_size; ++y) {
|
||||||
|
for (int x = 0; x < x_size; ++x) {
|
||||||
|
int linear_id = x + x_size * (y + y_size * (z + z_size * (w + w_size * (f + feature_num * b))));
|
||||||
|
int linear_id_scale = f;
|
||||||
|
EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(scale_gpu, basic_in2x2x2x2x2x3_scale_6d) {
|
||||||
|
// Scale : 1x2x1x1x1x1
|
||||||
|
// Input : 2x2x2x2x2x3
|
||||||
|
// Output : 2x2x2x2x2x3
|
||||||
|
|
||||||
|
const auto& engine = get_test_engine();
|
||||||
|
|
||||||
|
auto batch_num = 2;
|
||||||
|
auto feature_num = 2;
|
||||||
|
auto w_size = 2;
|
||||||
|
auto z_size = 2;
|
||||||
|
auto y_size = 2;
|
||||||
|
auto x_size = 3;
|
||||||
|
|
||||||
|
tensor in_size = tensor(format::bfwzyx, { batch_num, feature_num, x_size, y_size, z_size, w_size });
|
||||||
|
tensor scale_size = tensor(format::bfwzyx, { 1, feature_num, 1, 1, 1, 1 });
|
||||||
|
|
||||||
|
auto input = memory::allocate(engine, { data_types::f32, format::bfwzyx, in_size});
|
||||||
|
auto scale_input = memory::allocate(engine, { data_types::f32, format::bfwzyx, scale_size });
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input", input.get_layout()));
|
||||||
|
topology.add(input_layout("scale_input", scale_input.get_layout()));
|
||||||
|
topology.add(scale("scale", "input", "scale_input"));
|
||||||
|
|
||||||
|
std::vector<float> input_vec = generate_random_1d<float>(in_size.count(), -10, 10);
|
||||||
|
set_values(input, input_vec);
|
||||||
|
|
||||||
|
std::vector<float> scale_input_vec = generate_random_1d<float>(scale_input.count(), -10, 10);
|
||||||
|
set_values(scale_input, scale_input_vec);
|
||||||
|
|
||||||
|
network network(engine, topology);
|
||||||
|
|
||||||
|
network.set_input_data("input", input);
|
||||||
|
network.set_input_data("scale_input", scale_input);
|
||||||
|
|
||||||
|
auto outputs = network.execute();
|
||||||
|
|
||||||
|
auto output = outputs.at("scale").get_memory();
|
||||||
|
auto output_ptr = output.pointer<float>();
|
||||||
|
|
||||||
|
for (int b = 0; b < batch_num; ++b) {
|
||||||
|
for (int f = 0; f < feature_num; ++f) {
|
||||||
|
for (int w = 0; w < w_size; ++w) {
|
||||||
|
for (int z = 0; z < z_size; ++z) {
|
||||||
|
for (int y = 0; y < y_size; ++y) {
|
||||||
|
for (int x = 0; x < x_size; ++x) {
|
||||||
|
int linear_id = x + x_size * (y + y_size * (z + z_size * (w + w_size * (f + feature_num * b))));
|
||||||
|
int linear_id_scale = f;
|
||||||
|
EXPECT_NEAR(output_ptr[linear_id], input_vec[linear_id] * scale_input_vec[linear_id_scale], 1e-05f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// //
|
// //
|
||||||
// Exhaustive Negative Matrix tests //
|
// Exhaustive Negative Matrix tests //
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
// Copyright (c) 2016-2019 Intel Corporation
|
// Copyright (c) 2016-2020 Intel Corporation
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@ -127,6 +127,33 @@ inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
|
|||||||
return vec;
|
return vec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline VF<T> flatten_6d(cldnn::format input_format, VVVVVVF<T> &data) {
|
||||||
|
size_t a = data.size();
|
||||||
|
size_t b = data[0].size();
|
||||||
|
size_t c = data[0][0].size();
|
||||||
|
size_t d = data[0][0][0].size();
|
||||||
|
size_t e = data[0][0][0][0].size();
|
||||||
|
size_t f = data[0][0][0][0][0].size();
|
||||||
|
VF<T> vec(a * b * c * d * e * f, (T)(0.0f));
|
||||||
|
size_t idx = 0;
|
||||||
|
|
||||||
|
switch (input_format.value) {
|
||||||
|
case cldnn::format::bfwzyx:
|
||||||
|
for (size_t bi = 0; bi < a; ++bi)
|
||||||
|
for (size_t fi = 0; fi < b; ++fi)
|
||||||
|
for (size_t wi = 0; wi < c; ++wi)
|
||||||
|
for (size_t zi = 0; zi < d; ++zi)
|
||||||
|
for (size_t yi = 0; yi < e; ++yi)
|
||||||
|
for (size_t xi = 0; xi < f; ++xi)
|
||||||
|
vec[idx++] = data[bi][fi][wi][zi][yi][xi];
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(0);
|
||||||
|
}
|
||||||
|
return vec;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
std::vector<T> generate_random_1d(size_t a, int min, int max, int k = 8) {
|
std::vector<T> generate_random_1d(size_t a, int min, int max, int k = 8) {
|
||||||
static std::default_random_engine generator(random_seed);
|
static std::default_random_engine generator(random_seed);
|
||||||
|
Loading…
Reference in New Issue
Block a user