[IE CLDNN] Fix regression for supporting eltwise fsv broadcasting (#5182)
This commit is contained in:
parent
5ba5e9b5d7
commit
887c8c46cc
@ -9,6 +9,14 @@
|
||||
|
||||
namespace kernel_selector {
|
||||
|
||||
static inline bool IsBroadcastingPossibleInput(const DataTensor& input, const DataTensor& output) {
|
||||
if ((input.LogicalSize() == 1) ||
|
||||
(input.LogicalSize() == output.Feature().v && input.Feature().v == output.Feature().v)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
||||
ParamsKey k;
|
||||
k.EnableInputDataType(Datatype::F16);
|
||||
@ -34,7 +42,7 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
||||
static inline size_t GetBlockSize(const eltwise_params& params) {
|
||||
// Set blocksize 1 when broadcasting X dim
|
||||
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||
if (params.inputs[i].X().v == 1 && params.inputs[i].LogicalSize() != 1) {
|
||||
if ((params.inputs[i].X().v == 1) && !IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -56,9 +64,9 @@ static inline bool OpHasFeatureBroadcast(const eltwise_params& params, const siz
|
||||
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
||||
const auto &input = ew.inputs[input_idx];
|
||||
if (input.mode == EltwiseInputMode::INPUT_BUFFER) {
|
||||
if (params.inputs[input_idx].LogicalSize() != 1
|
||||
&& params.inputs[input_idx].Feature().v == 1
|
||||
&& params.output.Feature().v != 1) {
|
||||
if (params.inputs[input_idx].LogicalSize() != 1 &&
|
||||
params.inputs[input_idx].Feature().v == 1 &&
|
||||
params.output.Feature().v != 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -193,31 +201,45 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::GetJitConstants(const eltwise_params&
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
}
|
||||
|
||||
jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast));
|
||||
if (params.broadcast) {
|
||||
bool need_idx_safe = true;
|
||||
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||
if (IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
|
||||
need_idx_safe = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (need_idx_safe)
|
||||
jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast));
|
||||
}
|
||||
|
||||
return jit;
|
||||
}
|
||||
|
||||
bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_params& o) const {
|
||||
if (!EltwiseKernelBase::Validate(params, o)) {
|
||||
bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const {
|
||||
if (!EltwiseKernelBase::Validate(p, o)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto& ewParams = static_cast<const eltwise_params&>(params);
|
||||
const auto& params = static_cast<const eltwise_params&>(p);
|
||||
|
||||
const auto& output = ewParams.output;
|
||||
const auto count = params.output.PhysicalSize();
|
||||
|
||||
for (size_t i = 0; i < ewParams.inputs.size(); i++) {
|
||||
if (ewParams.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16 && GetBlockSize(ewParams) != 1) {
|
||||
if (count % 8 != 0)
|
||||
return false;
|
||||
|
||||
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||
if ((params.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16) &&
|
||||
!IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto input0 = ewParams.inputs[0];
|
||||
auto input0 = params.inputs[0];
|
||||
|
||||
// Check that padding before features doesn't miss-align the blocks
|
||||
auto feature_block_size = 16;
|
||||
if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
|
||||
if (input0.Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -240,10 +262,10 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
|
||||
return same;
|
||||
};
|
||||
|
||||
for (size_t i = 1; i < ewParams.inputs.size(); i++) {
|
||||
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0)))
|
||||
for (size_t i = 1; i < params.inputs.size(); i++) {
|
||||
if (params.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(params.inputs[i], input0)))
|
||||
return false;
|
||||
if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
|
||||
if (params.inputs[i].Feature().pad.before % feature_block_size != 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -3198,9 +3198,6 @@ using eltwise_test_params = std::tuple<eltwise_mode, data_types, std::vector<std
|
||||
|
||||
template<typename T>
|
||||
class BaseEltwiseTest : public ::testing::TestWithParam<T> {
|
||||
};
|
||||
|
||||
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
||||
public:
|
||||
template<typename T1, typename T2>
|
||||
VF<float> eltwise_ref(VVVVVVF<T1> input0, VVVVVVF<T2> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
||||
@ -3253,6 +3250,9 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
||||
};
|
||||
|
||||
TEST_P(eltwise_test, fsv16) {
|
||||
auto p = GetParam();
|
||||
|
||||
@ -3322,6 +3322,7 @@ TEST_P(eltwise_test, fsv16) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static std::vector<eltwise_mode> modes = {eltwise_mode::sum, eltwise_mode::prod};
|
||||
static std::vector<data_types> types = {data_types::f32, data_types::f16};
|
||||
static std::vector<std::vector<std::vector<int32_t>>> inputs = {
|
||||
@ -3520,3 +3521,102 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision,
|
||||
::testing::ValuesIn(mixed_types),
|
||||
::testing::ValuesIn(inputs)
|
||||
), );
|
||||
|
||||
|
||||
struct eltwise_layout_test_params {
|
||||
eltwise_mode mode;
|
||||
std::vector<int32_t> input0_size;
|
||||
std::vector<int32_t> input1_size;
|
||||
format input0_format;
|
||||
format input1_format;
|
||||
std::string selected_kernel_name;
|
||||
};
|
||||
|
||||
#define CASE_ELTWISE_TEST1 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
|
||||
#define CASE_ELTWISE_TEST2 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16"
|
||||
#define CASE_ELTWISE_TEST3 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
|
||||
#define CASE_ELTWISE_TEST4 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16"
|
||||
#define CASE_ELTWISE_TEST5 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||
#define CASE_ELTWISE_TEST6 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||
#define CASE_ELTWISE_TEST7 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||
#define CASE_ELTWISE_TEST8 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||
|
||||
class eltwise_layout_test : public BaseEltwiseTest<eltwise_layout_test_params> {
|
||||
};
|
||||
|
||||
class eltwise_test_mixed_layout : public eltwise_layout_test {};
|
||||
TEST_P(eltwise_test_mixed_layout, mixed_layout) {
|
||||
auto p = GetParam();
|
||||
|
||||
auto mode = p.mode;
|
||||
auto input0_size = p.input0_size;
|
||||
auto input1_size = p.input1_size;
|
||||
auto format0 = p.input0_format;
|
||||
auto format1 = p.input1_format;
|
||||
auto selected_kernel = p.selected_kernel_name;
|
||||
|
||||
int b0 = input0_size[0];
|
||||
int f0 = input0_size[1];
|
||||
int y0 = input0_size[2];
|
||||
int x0 = input0_size[3];
|
||||
|
||||
int b1 = input1_size[0];
|
||||
int f1 = input1_size[1];
|
||||
int y1 = input1_size[2];
|
||||
int x1 = input1_size[3];
|
||||
|
||||
int min_random = -2, max_random = 2;
|
||||
VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, 1, y0, x0, min_random, max_random);
|
||||
VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, 1, 1, y1, x1, min_random, max_random);
|
||||
VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
|
||||
VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);
|
||||
|
||||
const auto& engine = get_test_engine();
|
||||
auto in0_size = tensor(format::bfyx, input0_size);
|
||||
auto in1_size = tensor(format::bfyx, input1_size);
|
||||
|
||||
auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, in0_size });
|
||||
auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, in1_size });
|
||||
set_values(input1, input1_rnd_vec);
|
||||
set_values(input2, input2_rnd_vec);
|
||||
|
||||
topology topology;
|
||||
topology.add(input_layout("input1", input1.get_layout()));
|
||||
topology.add(input_layout("input2", input2.get_layout()));
|
||||
topology.add(reorder("reorder1", "input1", format0, data_types::f32));
|
||||
topology.add(reorder("reorder2", "input2", format1, data_types::f32));
|
||||
topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
|
||||
topology.add(reorder("out", "eltwise", format::bfyx, data_types::f32));
|
||||
primitive_id out_id = "out";
|
||||
|
||||
network network(engine, topology);
|
||||
|
||||
network.set_input_data("input1", input1);
|
||||
network.set_input_data("input2", input2);
|
||||
auto outputs = network.execute();
|
||||
EXPECT_EQ(outputs.size(), size_t(1));
|
||||
EXPECT_EQ(outputs.begin()->first, out_id);
|
||||
|
||||
EXPECT_TRUE(network.get_primitive_info("eltwise").find(selected_kernel) != std::string::npos);
|
||||
|
||||
auto output_memory = outputs.at(out_id).get_memory();
|
||||
auto output_ptr = output_memory.pointer<float>();
|
||||
|
||||
VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode);
|
||||
for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
|
||||
EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
|
||||
ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_layout,
|
||||
::testing::ValuesIn(std::vector<eltwise_layout_test_params>{
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST1},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST2},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST3},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST4},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST5},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST6},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST7},
|
||||
eltwise_layout_test_params{CASE_ELTWISE_TEST8},
|
||||
}), );
|
||||
|
Loading…
Reference in New Issue
Block a user