Files
openvino/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp
Egor Churaev 2caca604ca [IE CLDNN] Fix reshape for yxfb layout (#1632)
In one of the network it was the following pipeline:
```
FullyConnected -> Reshape -> FullyConnected
```
And the output of Reshape wasn't in the same order as input for this
layer. I found that the problem was connected with format of the layers.
During optimization passes this pipeline was transformed to the
following:
```
FullyConnected -> Reorder -> Reshape -> Reorder -> FullyConnected
```
Both `FullyConnected` layers works with `yxfb` format.  This is why
Reorder layer after the Reshape has output layout with format `yxfb` and
`reshape_in_layout.format` returns `yxfb` format. But in this case we
have to convert Reshape to `bfyx` format because in this case we won't
change the order of elements.
I replaced `reshape_in_layout.format` (which returns `yxfb`) and
explicitly set `bfyx` format.

JIRA: 35288
2020-08-11 14:52:04 +03:00

713 lines
22 KiB
C++

/*
// Copyright (c) 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
///////////////////////////////////////////////////////////////////////////////////////////////////
#include <gtest/gtest.h>
#include <api/topology.hpp>
#include <api/network.hpp>
#include <api/engine.hpp>
#include <api/data.hpp>
#include <api/reshape.hpp>
#include <api/input_layout.hpp>
#include "test_utils/test_utils.h"
using namespace cldnn;
using namespace tests;
using namespace testing;
void verify_float(const float& output_value, const float& value) {
EXPECT_FLOAT_EQ(output_value, value);
}
void verify_int(const int32_t& output_value, const int32_t& value) {
EXPECT_EQ(output_value, value);
}
template <class ElemType>
void generic_reshape_test(format fmt, tensor const& input_size, tensor const& reshape_size, bool in_place, padding const& input_padd = padding(), padding const& output_padd = padding()) {
const auto& engine = get_test_engine();
//allocate input memory
auto data_type = data_types::f32;
if (std::is_same<ElemType, FLOAT16>::value)
data_type = data_types::f16;
else if (std::is_same<ElemType, int8_t>::value)
data_type = data_types::i8;
else if (std::is_same<ElemType, int32_t>::value)
data_type = data_types::i32;
else if (std::is_same<ElemType, int64_t>::value)
data_type = data_types::i64;
auto input = memory::allocate(engine, {data_type, fmt, input_size});
{
auto input_ptr = input.cldnn::memory::pointer<ElemType>();
auto input_itr = input_ptr.begin();
auto elements = input_size.count();
int value = 1;
for (size_t i = 0; i < elements; ++i)
*input_itr++ = (ElemType)value++;
}
topology tpl;
std::string reshape_input = "input";
tpl.add(input_layout("input", input.get_layout()));
if (input_padd) {
auto padded_input_layout = input.get_layout();
padded_input_layout.data_padding = input_padd;
tpl.add(reorder("reorder", "input", padded_input_layout));
reshape_input = "reorder";
}
tpl.add(reshape("reshape", reshape_input, reshape_size, output_padd));
build_options bo;
bo.set_option(build_option::outputs({reshape_input, "reshape"}));
network net(engine, tpl, bo);
net.set_input_data("input", input);
auto outputs = net.execute();
ASSERT_TRUE(outputs.size() == 2 && outputs.count("reshape") == 1 && outputs.count(reshape_input) == 1);
auto net_input = outputs.at(reshape_input).get_memory();
auto output = outputs.at("reshape").get_memory();
EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type); //reshape should not change data_type
EXPECT_TRUE(output.get_layout().format.value == input.get_layout().format.value); //reshape should not change format
//output size should be equal to requested plus output padding
ASSERT_TRUE(output.get_layout().size == reshape_size);
ASSERT_TRUE(output.get_layout().get_buffer_size() == reshape_size.add(output_padd.lower_size()).add(output_padd.upper_size()));
{
auto output_ptr = output.pointer<const ElemType>();
auto output_itr = output_ptr.begin();
auto sizes = reshape_size.sizes(fmt);
auto lower = output_padd.lower_size().sizes(fmt);
auto upper = output_padd.upper_size().sizes(fmt);
auto buffer_sizes = sizes;
int32_t accum = 1;
for (size_t i = 1; i <= sizes.size(); ++i) {
buffer_sizes[sizes.size() - i] = accum;
accum *= lower[sizes.size() - i] + sizes[sizes.size() - i] + upper[sizes.size() - i];
}
int value = 1;
output_itr += lower[0] * buffer_sizes[0];
for (int d1 = 0; d1 < sizes[0]; ++d1) {
output_itr += lower[1] * buffer_sizes[1];
for (int d2 = 0; d2 < sizes[1]; ++d2) {
output_itr += lower[2] * buffer_sizes[2];
for (int d3 = 0; d3 < sizes[2]; ++d3) {
output_itr += lower[3] * buffer_sizes[3];
for (int d4 = 0; d4 < sizes[3]; ++d4) {
auto& output_value = *output_itr;
++output_itr;
if (data_type == data_types::f16 || data_type == data_types::f32)
verify_float(static_cast<float>(output_value), static_cast<float>((ElemType)value));
else
verify_int(static_cast<int32_t>(output_value), static_cast<int32_t>(value));
++value;
}
output_itr += upper[3] * buffer_sizes[3];
}
output_itr += upper[2] * buffer_sizes[2];
}
output_itr += upper[1] * buffer_sizes[1];
}
}
}
TEST(reshape_gpu_f32, basic_2dim_in_place) {
generic_reshape_test<float>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 4, 1),
true);
}
TEST(reshape_gpu_f16, basic_2dim_in_place) {
generic_reshape_test<FLOAT16>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true);
}
TEST(reshape_gpu_i8, basic_2dim_in_place) {
generic_reshape_test<int8_t>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true);
}
TEST(reshape_gpu_i32, basic_2dim_in_place) {
generic_reshape_test<int32_t>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true);
}
TEST(reshape_gpu_i64, basic_2dim_in_place) {
generic_reshape_test<int64_t>(
format::bfyx,
tensor(1, 1, 2, 2),
tensor(1, 1, 1, 4),
true);
}
TEST(reshape_gpu_f32, basic_4dim_in_place) {
generic_reshape_test<float>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(27, 2, 3, 4),
true);
}
TEST(reshape_gpu_f16, basic_4dim_in_place) {
generic_reshape_test<FLOAT16>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(3, 4, 27, 2),
true);
}
TEST(reshape_gpu_i32, basic_4dim_in_place) {
generic_reshape_test<int32_t>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(3, 4, 27, 2),
true);
}
TEST(reshape_gpu_i64, basic_4dim_in_place) {
generic_reshape_test<int64_t>(
format::yxfb,
tensor(9, 9, 2, 4),
tensor(3, 4, 27, 2),
true);
}
TEST(reshpape_gpu_f32, basic_2dim_output_padd) {
generic_reshape_test<float>(
format::byxf,
tensor(1, 1, 4, 2),
tensor(1, 1, 8, 1),
false,
padding(),
padding(std::vector<int>{0, 0, 1, 1}));
}
TEST(reshape_gpu_f16, basic_2dim_output_padd) {
generic_reshape_test<FLOAT16>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}));
}
TEST(reshape_gpu_i8, basic_2dim_output_padd) {
generic_reshape_test<int8_t>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}));
}
TEST(reshape_gpu_i32, basic_2dim_output_padd) {
generic_reshape_test<int32_t>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}));
}
TEST(reshape_gpu_i64, basic_2dim_output_padd) {
generic_reshape_test<int64_t>(
format::byxf,
tensor(1, 1, 3, 4),
tensor(1, 1, 2, 6),
false,
padding(),
padding(std::vector<int>{0, 0, 2, 2}));
}
TEST(reshape_gpu_f32, basic_2dim_input_padd) {
generic_reshape_test<float>(
format::fyxb,
tensor(1, 1, 2, 5),
tensor(1, 1, 5, 2),
false,
padding({0, 0, 3, 2}, {0, 0, 1, 4}));
}
TEST(reshape_gpu_f16, basic_2dim_input_padd) {
generic_reshape_test<FLOAT16>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}));
}
TEST(reshape_gpu_i8, basic_2dim_input_padd) {
generic_reshape_test<int8_t>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}));
}
TEST(reshape_gpu_i32, basic_2dim_input_padd) {
generic_reshape_test<int32_t>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}));
}
TEST(reshape_gpu_i64, basic_2dim_input_padd) {
generic_reshape_test<int64_t>(
format::fyxb,
tensor(1, 1, 3, 3),
tensor(1, 1, 1, 9),
false,
padding({0, 0, 4, 1}, {0, 0, 2, 3}));
}
TEST(reshape_gpu_f32, basic_2dim_input_output_padd) {
generic_reshape_test<float>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}));
}
TEST(reshape_gpu_f16, basic_2dim_input_output_padd) {
generic_reshape_test<FLOAT16>(
format::byxf,
tensor(1, 1, 6, 6),
tensor(1, 1, 3, 12),
false,
padding({0, 0, 1, 1}, {0, 0, 0, 0}),
padding({0, 0, 2, 1}, {0, 0, 1, 2}));
}
TEST(reshape_gpu_i8, basic_2dim_input_output_padd) {
generic_reshape_test<int8_t>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}));
}
TEST(reshape_gpu_i32, basic_2dim_input_output_padd) {
generic_reshape_test<int32_t>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}));
}
TEST(reshape_gpu_i64, basic_2dim_input_output_padd) {
generic_reshape_test<int64_t>(
format::byxf,
tensor(1, 1, 5, 7),
tensor(1, 1, 7, 5),
false,
padding({0, 0, 4, 4}, {0, 0, 1, 1}),
padding({0, 0, 0, 0}, {0, 0, 3, 0}));
}
TEST(reshpape_gpu_f32, basic_4dim_output_padd) {
generic_reshape_test<float>(
format::bfyx,
tensor(2, 5, 7, 3),
tensor(1, 14, 15, 1),
false,
padding(),
padding({1, 0, 0, 1}, {0, 2, 3, 0}));
}
TEST(reshape_gpu_f16, basic_4dim_output_padd) {
generic_reshape_test<FLOAT16>(
format::bfyx,
tensor(5, 4, 2, 2),
tensor(40, 2, 1, 1),
false,
padding(),
padding({0, 2, 0, 1}, {0, 2, 3, 0}));
}
TEST(reshape_gpu_f32, basic_4dim_input_padd) {
generic_reshape_test<float>(
format::yxfb,
tensor(8, 128, 3, 3),
tensor(16, 8, 8, 9),
false,
padding({0, 1, 3, 3}, {0, 1, 1, 1}));
}
TEST(reshape_gpu_f16, basic_4dim_input_padd) {
generic_reshape_test<FLOAT16>(
format::yxfb,
tensor(2, 32, 8, 8),
tensor(8, 128, 1, 4),
false,
padding({2, 2, 1, 0}, {1, 2, 2, 0}));
}
TEST(reshape_gpu_f32, basic_4dim_input_output_padd) {
generic_reshape_test<float>(
format::fyxb,
tensor(8, 1024, 25, 25),
tensor(8, 64, 100, 100),
false,
padding({2, 0, 2, 1}, {0, 1, 4, 0}),
padding({1, 2, 3, 4}, {0, 4, 1, 1}));
}
TEST(reshape_gpu_f16, basic_4dim_input_output_padd) {
generic_reshape_test<FLOAT16>(
format::byxf,
tensor(32, 3, 227, 227),
tensor(8, 12, 227, 227),
false,
padding({0, 1, 4, 4}, {0, 1, 1, 1}),
padding({0, 29, 29, 0}, {0, 0, 0, 0}));
}
TEST(reshape_gpu_f32, basic_5dim_in_place) {
generic_reshape_test<float>(
format::bfzyx,
tensor(9, 9, 2, 4, 2),
tensor(27, 2, 1, 4, 6),
true);
}
TEST(reshape_gpu_f32, multiple_users_with_reorder) {
// Tests split with crop implementation
// _ REORDER(yxfb) --> RELU(yxfb)
// |
// INPUT(bfyx,2x2x1x1)--RELU(bfyx)--RESHAPE(4x1x1x1)
// |_
// RELU(bfyx)
// Input:
// b0f0: -1.0
// b0f1: 2.0
// b1f0: -3.0
// b1f1: 4.0
// Out1:
// b0f0: 0.0
// b0f1: 0.0
// b1f0: 2.0
// b1f1: 4.0
// Out2:
// b0f0: 0.0
// b0f1: 2.0
// b1f0: 0.0
// b1f1: 4.0
const auto& engine = get_test_engine();
auto batch_num = 2;
auto feature_num = 2;
auto x_size = 1;
auto y_size = 1;
auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}});
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(activation("relu", "input", activation_func::relu));
topology.add(reshape("reshape", "relu", tensor(batch(4))));
topology.add(reorder("reorder1", "reshape", format::yxfb, data_types::f32));
topology.add(activation("relu1", "reorder1", activation_func::relu));
topology.add(activation("relu2", "reshape", activation_func::relu));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out1 = {0.f, 2.f, 0.f, 4.0f};
std::vector<float> out2 = {0.f, 2.f, 0.f, 4.0f};
set_values(input, input_vec);
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("relu1").get_memory();
auto output_ptr = output.pointer<float>();
for (size_t i = 0; i < out1.size(); i++)
EXPECT_EQ(output_ptr[i], out1[i]);
auto output_2 = outputs.at("relu2").get_memory();
auto output_ptr_2 = output_2.pointer<float>();
for (size_t i = 0; i < out2.size(); i++)
EXPECT_EQ(output_ptr_2[i], out2[i]);
}
TEST(reshape_gpu_f32, calc_output_shape) {
// INPUT(bfyx,2x2x1x1) -- RESHAPE(1, 1, 0, -1)
// Input:
// b0f0: -1.0
// b0f1: 2.0
// b1f0: -3.0
// b1f1: 4.0
//
// output_shape (1, 1, 1, 4)
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 2, 1, 1}});
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(reshape("reshape", "input", tensor(1, 1, 0, -1)));
set_values(input, {-1.f, 2.f, -3.f, 4.f});
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "reshape");
auto output = outputs.at("reshape").get_memory();
EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type);
EXPECT_TRUE(output.get_layout().format == input.get_layout().format);
ASSERT_TRUE(output.get_layout().size == tensor(1, 1, 1, 4));
float answers[4] = {-1.f, 2.f, -3.f, 4.f};
auto output_ptr = output.pointer<float>();
for (int i = 0; i < 4; i++) {
EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
}
}
TEST(reshape_gpu_f32, basic_bfwzyx) {
// input: bfwzyx, (3, 3, 2, 2, 1, 1)
// reshape: (1, 1, 2, 2, 3, 3), pad (0, 0, 0, 0, 0, 1)
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, layout{data_types::f32, format::bfwzyx, tensor{batch(3), feature(3), spatial(1, 1, 2, 2)}});
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(reshape("reshape", "input", tensor(batch(1), feature(1), spatial(2, 2, 3, 3)), padding({0, 0, 0, 0, 0, 1}, 0.f)));
// clang-format off
std::vector<float> input_data = {
1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f,
};
std::vector<float> expected_out = {
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
1.f, 2.f, 3.f, 4.f,
5.f, 6.f, 7.f, 8.f,
9.f, 1.f, 2.f, 3.f,
4.f, 5.f, 6.f, 7.f,
8.f, 9.f, 1.f, 2.f,
3.f, 4.f, 5.f, 6.f,
7.f, 8.f, 9.f, 1.f,
2.f, 3.f, 4.f, 5.f,
6.f, 7.f, 8.f, 9.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
0.f, 0.f, 0.f, 0.f,
};
// clang-format on
set_values(input, input_data);
network network(engine, topology);
network.set_input_data("input", input);
auto outputs = network.execute();
EXPECT_EQ(outputs.size(), size_t(1));
EXPECT_EQ(outputs.begin()->first, "reshape");
auto output = outputs.at("reshape").get_memory();
EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type);
EXPECT_TRUE(output.get_layout().format == input.get_layout().format);
auto output_ptr = output.pointer<float>();
ASSERT_EQ(output_ptr.size(), expected_out.size());
for (size_t i = 0; i < expected_out.size(); i++) {
EXPECT_TRUE(are_equal(expected_out[i], output_ptr[i]));
}
}
TEST(reshape_gpu_f32, shrink_chain_partial) {
const auto& engine = get_test_engine();
auto batch_num = 2;
auto feature_num = 2;
auto x_size = 1;
auto y_size = 1;
auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}});
auto scale_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(data("scale_in", scale_in));
topology.add(data("shift_in", shift_in));
topology.add(activation("relu", "input", activation_func::relu));
topology.add(reshape("reshape", "relu", tensor(spatial(2, 2))));
topology.add(reorder("reorder", "reshape", format::bfyx, data_types::f32));
topology.add(reshape("reshape1", "reorder", tensor(feature(4))));
topology.add(scale("scale", "reshape1", "scale_in", "shift_in"));
topology.add(reorder("out_reorder", "scale", format::yxfb, data_types::f32));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
set_values(input, input_vec);
build_options bo;
bo.set_option(build_option::optimize_data(true));
network network(engine, topology, bo);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("out_reorder").get_memory();
auto output_ptr = output.pointer<float>();
for (size_t i = 0; i < out.size(); i++)
EXPECT_EQ(output_ptr[i], out[i]) << " i=" << i;
}
TEST(reshape_gpu_f32, shrink_chain_full) {
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto scale_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(data("scale_in", scale_in));
topology.add(data("shift_in", shift_in));
topology.add(activation("relu", "input", activation_func::relu));
topology.add(reshape("reshape", "relu", tensor(spatial(2, 2))));
topology.add(reorder("reorder", "reshape", format::bfyx, data_types::f32));
topology.add(reshape("reshape1", "reorder", tensor(feature(4))));
topology.add(scale("scale", "reshape1", "scale_in", "shift_in"));
topology.add(reorder("out_reorder", "scale", format::yxfb, data_types::f32));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out = {5.f, 12.f, 15.f, 32.0f};
set_values(input, input_vec);
build_options bo;
bo.set_option(build_option::optimize_data(true));
network network(engine, topology, bo);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("out_reorder").get_memory();
auto output_ptr = output.pointer<float>();
for (size_t i = 0; i < out.size(); i++)
EXPECT_EQ(output_ptr[i], out[i]) << " i=" << i;
}
TEST(reshape_gpu_f32, shrink_chain_out) {
const auto& engine = get_test_engine();
auto input = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto scale_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
auto shift_in = memory::allocate(engine, {data_types::f32, format::bfyx, { tensor(feature(4)) }});
std::vector<float> scale_vals = {0.f, 1.f, 2.f, 3.f};
std::vector<float> scale_shifts = {5.f, 10.f, 15.f, 20.0f};
set_values(scale_in, scale_vals);
set_values(shift_in, scale_shifts);
topology topology;
topology.add(input_layout("input", input.get_layout()));
topology.add(activation("relu", "input", activation_func::relu));
topology.add(reshape("reshape", "relu", tensor(spatial(2, 2))));
topology.add(reorder("reorder", "reshape", format::bfyx, data_types::f32));
topology.add(reshape("reshape1", "reorder", tensor(feature(4))));
std::vector<float> input_vec = {-1.f, 2.f, -3.f, 4.f};
std::vector<float> out = {0.f, 2.f, 0.f, 4.0f};
set_values(input, input_vec);
build_options bo;
bo.set_option(build_option::optimize_data(true));
network network(engine, topology, bo);
network.set_input_data("input", input);
auto outputs = network.execute();
auto output = outputs.at("reshape1").get_memory();
auto output_ptr = output.pointer<float>();
for (size_t i = 0; i < out.size(); i++)
EXPECT_EQ(output_ptr[i], out[i]) << " i=" << i;
}