[IE CLDNN] Fix regression for supporting eltwise fsv broadcasting (#5182)
This commit is contained in:
parent
5ba5e9b5d7
commit
887c8c46cc
@ -9,6 +9,14 @@
|
|||||||
|
|
||||||
namespace kernel_selector {
|
namespace kernel_selector {
|
||||||
|
|
||||||
|
static inline bool IsBroadcastingPossibleInput(const DataTensor& input, const DataTensor& output) {
|
||||||
|
if ((input.LogicalSize() == 1) ||
|
||||||
|
(input.LogicalSize() == output.Feature().v && input.Feature().v == output.Feature().v)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
||||||
ParamsKey k;
|
ParamsKey k;
|
||||||
k.EnableInputDataType(Datatype::F16);
|
k.EnableInputDataType(Datatype::F16);
|
||||||
@ -34,7 +42,7 @@ ParamsKey EltwiseKernel_b_fs_yx_fsv16::GetSupportedKey() const {
|
|||||||
static inline size_t GetBlockSize(const eltwise_params& params) {
|
static inline size_t GetBlockSize(const eltwise_params& params) {
|
||||||
// Set blocksize 1 when broadcasting X dim
|
// Set blocksize 1 when broadcasting X dim
|
||||||
for (size_t i = 0; i < params.inputs.size(); i++) {
|
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||||
if (params.inputs[i].X().v == 1 && params.inputs[i].LogicalSize() != 1) {
|
if ((params.inputs[i].X().v == 1) && !IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -56,9 +64,9 @@ static inline bool OpHasFeatureBroadcast(const eltwise_params& params, const siz
|
|||||||
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++) {
|
||||||
const auto &input = ew.inputs[input_idx];
|
const auto &input = ew.inputs[input_idx];
|
||||||
if (input.mode == EltwiseInputMode::INPUT_BUFFER) {
|
if (input.mode == EltwiseInputMode::INPUT_BUFFER) {
|
||||||
if (params.inputs[input_idx].LogicalSize() != 1
|
if (params.inputs[input_idx].LogicalSize() != 1 &&
|
||||||
&& params.inputs[input_idx].Feature().v == 1
|
params.inputs[input_idx].Feature().v == 1 &&
|
||||||
&& params.output.Feature().v != 1) {
|
params.output.Feature().v != 1) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -193,31 +201,45 @@ JitConstants EltwiseKernel_b_fs_yx_fsv16::GetJitConstants(const eltwise_params&
|
|||||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||||
}
|
}
|
||||||
|
|
||||||
jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast));
|
if (params.broadcast) {
|
||||||
|
bool need_idx_safe = true;
|
||||||
|
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||||
|
if (IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
|
||||||
|
need_idx_safe = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (need_idx_safe)
|
||||||
|
jit.AddConstant(MakeJitConstant("ELTWISE_BROADCAST", params.broadcast));
|
||||||
|
}
|
||||||
|
|
||||||
return jit;
|
return jit;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_params& o) const {
|
bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& p, const optional_params& o) const {
|
||||||
if (!EltwiseKernelBase::Validate(params, o)) {
|
if (!EltwiseKernelBase::Validate(p, o)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& ewParams = static_cast<const eltwise_params&>(params);
|
const auto& params = static_cast<const eltwise_params&>(p);
|
||||||
|
|
||||||
const auto& output = ewParams.output;
|
const auto count = params.output.PhysicalSize();
|
||||||
|
|
||||||
for (size_t i = 0; i < ewParams.inputs.size(); i++) {
|
if (count % 8 != 0)
|
||||||
if (ewParams.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16 && GetBlockSize(ewParams) != 1) {
|
return false;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < params.inputs.size(); i++) {
|
||||||
|
if ((params.inputs[i].GetLayout() != DataLayout::b_fs_yx_fsv16) &&
|
||||||
|
!IsBroadcastingPossibleInput(params.inputs[i], params.output)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto input0 = ewParams.inputs[0];
|
auto input0 = params.inputs[0];
|
||||||
|
|
||||||
// Check that padding before features doesn't miss-align the blocks
|
// Check that padding before features doesn't miss-align the blocks
|
||||||
auto feature_block_size = 16;
|
auto feature_block_size = 16;
|
||||||
if (input0.Feature().pad.before % feature_block_size != 0 || output.Feature().pad.before % feature_block_size != 0) {
|
if (input0.Feature().pad.before % feature_block_size != 0 || params.output.Feature().pad.before % feature_block_size != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,10 +262,10 @@ bool EltwiseKernel_b_fs_yx_fsv16::Validate(const Params& params, const optional_
|
|||||||
return same;
|
return same;
|
||||||
};
|
};
|
||||||
|
|
||||||
for (size_t i = 1; i < ewParams.inputs.size(); i++) {
|
for (size_t i = 1; i < params.inputs.size(); i++) {
|
||||||
if (ewParams.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(ewParams.inputs[i], input0)))
|
if (params.inputs[i].LogicalSize() == input0.LogicalSize() && !(compareTensors(params.inputs[i], input0)))
|
||||||
return false;
|
return false;
|
||||||
if (ewParams.inputs[i].Feature().pad.before % feature_block_size != 0) {
|
if (params.inputs[i].Feature().pad.before % feature_block_size != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3198,9 +3198,6 @@ using eltwise_test_params = std::tuple<eltwise_mode, data_types, std::vector<std
|
|||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
class BaseEltwiseTest : public ::testing::TestWithParam<T> {
|
class BaseEltwiseTest : public ::testing::TestWithParam<T> {
|
||||||
};
|
|
||||||
|
|
||||||
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
|
||||||
public:
|
public:
|
||||||
template<typename T1, typename T2>
|
template<typename T1, typename T2>
|
||||||
VF<float> eltwise_ref(VVVVVVF<T1> input0, VVVVVVF<T2> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
VF<float> eltwise_ref(VVVVVVF<T1> input0, VVVVVVF<T2> input1, tensor input0_size, tensor input1_size, eltwise_mode mode) {
|
||||||
@ -3253,6 +3250,9 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class eltwise_test : public BaseEltwiseTest<eltwise_test_params> {
|
||||||
|
};
|
||||||
|
|
||||||
TEST_P(eltwise_test, fsv16) {
|
TEST_P(eltwise_test, fsv16) {
|
||||||
auto p = GetParam();
|
auto p = GetParam();
|
||||||
|
|
||||||
@ -3322,6 +3322,7 @@ TEST_P(eltwise_test, fsv16) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static std::vector<eltwise_mode> modes = {eltwise_mode::sum, eltwise_mode::prod};
|
static std::vector<eltwise_mode> modes = {eltwise_mode::sum, eltwise_mode::prod};
|
||||||
static std::vector<data_types> types = {data_types::f32, data_types::f16};
|
static std::vector<data_types> types = {data_types::f32, data_types::f16};
|
||||||
static std::vector<std::vector<std::vector<int32_t>>> inputs = {
|
static std::vector<std::vector<std::vector<int32_t>>> inputs = {
|
||||||
@ -3520,3 +3521,102 @@ INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_precision,
|
|||||||
::testing::ValuesIn(mixed_types),
|
::testing::ValuesIn(mixed_types),
|
||||||
::testing::ValuesIn(inputs)
|
::testing::ValuesIn(inputs)
|
||||||
), );
|
), );
|
||||||
|
|
||||||
|
|
||||||
|
struct eltwise_layout_test_params {
|
||||||
|
eltwise_mode mode;
|
||||||
|
std::vector<int32_t> input0_size;
|
||||||
|
std::vector<int32_t> input1_size;
|
||||||
|
format input0_format;
|
||||||
|
format input1_format;
|
||||||
|
std::string selected_kernel_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CASE_ELTWISE_TEST1 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
|
||||||
|
#define CASE_ELTWISE_TEST2 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16"
|
||||||
|
#define CASE_ELTWISE_TEST3 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::b_fs_yx_fsv16, format::bfyx, "generic_eltwise_ref"
|
||||||
|
#define CASE_ELTWISE_TEST4 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::b_fs_yx_fsv16, format::bfyx, "eltwise_b_fs_yx_fsv16"
|
||||||
|
#define CASE_ELTWISE_TEST5 eltwise_mode::sum, {1, 2, 1, 1}, {4, 2, 4, 4}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||||
|
#define CASE_ELTWISE_TEST6 eltwise_mode::sum, {4, 1, 4, 4}, {1, 5, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||||
|
#define CASE_ELTWISE_TEST7 eltwise_mode::sum, {4, 5, 4, 1}, {4, 1, 4, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||||
|
#define CASE_ELTWISE_TEST8 eltwise_mode::sum, {4, 2, 4, 4}, {1, 1, 1, 1}, format::bfyx, format::b_fs_yx_fsv16, "generic_eltwise_ref"
|
||||||
|
|
||||||
|
class eltwise_layout_test : public BaseEltwiseTest<eltwise_layout_test_params> {
|
||||||
|
};
|
||||||
|
|
||||||
|
class eltwise_test_mixed_layout : public eltwise_layout_test {};
|
||||||
|
TEST_P(eltwise_test_mixed_layout, mixed_layout) {
|
||||||
|
auto p = GetParam();
|
||||||
|
|
||||||
|
auto mode = p.mode;
|
||||||
|
auto input0_size = p.input0_size;
|
||||||
|
auto input1_size = p.input1_size;
|
||||||
|
auto format0 = p.input0_format;
|
||||||
|
auto format1 = p.input1_format;
|
||||||
|
auto selected_kernel = p.selected_kernel_name;
|
||||||
|
|
||||||
|
int b0 = input0_size[0];
|
||||||
|
int f0 = input0_size[1];
|
||||||
|
int y0 = input0_size[2];
|
||||||
|
int x0 = input0_size[3];
|
||||||
|
|
||||||
|
int b1 = input1_size[0];
|
||||||
|
int f1 = input1_size[1];
|
||||||
|
int y1 = input1_size[2];
|
||||||
|
int x1 = input1_size[3];
|
||||||
|
|
||||||
|
int min_random = -2, max_random = 2;
|
||||||
|
VVVVVVF<float> input1_rnd = generate_random_6d<float>(b0, f0, 1, 1, y0, x0, min_random, max_random);
|
||||||
|
VVVVVVF<float> input2_rnd = generate_random_6d<float>(b1, f1, 1, 1, y1, x1, min_random, max_random);
|
||||||
|
VF<float> input1_rnd_vec = flatten_6d<float>(format::bfwzyx, input1_rnd);
|
||||||
|
VF<float> input2_rnd_vec = flatten_6d<float>(format::bfwzyx, input2_rnd);
|
||||||
|
|
||||||
|
const auto& engine = get_test_engine();
|
||||||
|
auto in0_size = tensor(format::bfyx, input0_size);
|
||||||
|
auto in1_size = tensor(format::bfyx, input1_size);
|
||||||
|
|
||||||
|
auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, in0_size });
|
||||||
|
auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, in1_size });
|
||||||
|
set_values(input1, input1_rnd_vec);
|
||||||
|
set_values(input2, input2_rnd_vec);
|
||||||
|
|
||||||
|
topology topology;
|
||||||
|
topology.add(input_layout("input1", input1.get_layout()));
|
||||||
|
topology.add(input_layout("input2", input2.get_layout()));
|
||||||
|
topology.add(reorder("reorder1", "input1", format0, data_types::f32));
|
||||||
|
topology.add(reorder("reorder2", "input2", format1, data_types::f32));
|
||||||
|
topology.add(eltwise("eltwise", {"reorder1", "reorder2"}, mode));
|
||||||
|
topology.add(reorder("out", "eltwise", format::bfyx, data_types::f32));
|
||||||
|
primitive_id out_id = "out";
|
||||||
|
|
||||||
|
network network(engine, topology);
|
||||||
|
|
||||||
|
network.set_input_data("input1", input1);
|
||||||
|
network.set_input_data("input2", input2);
|
||||||
|
auto outputs = network.execute();
|
||||||
|
EXPECT_EQ(outputs.size(), size_t(1));
|
||||||
|
EXPECT_EQ(outputs.begin()->first, out_id);
|
||||||
|
|
||||||
|
EXPECT_TRUE(network.get_primitive_info("eltwise").find(selected_kernel) != std::string::npos);
|
||||||
|
|
||||||
|
auto output_memory = outputs.at(out_id).get_memory();
|
||||||
|
auto output_ptr = output_memory.pointer<float>();
|
||||||
|
|
||||||
|
VF<float> output_cpu_vec = eltwise_ref(input1_rnd, input2_rnd, in0_size, in1_size, mode);
|
||||||
|
for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
|
||||||
|
EXPECT_TRUE(!(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i])));
|
||||||
|
ASSERT_FLOAT_EQ(output_cpu_vec[i], output_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(eltwise, eltwise_test_mixed_layout,
|
||||||
|
::testing::ValuesIn(std::vector<eltwise_layout_test_params>{
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST1},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST2},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST3},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST4},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST5},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST6},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST7},
|
||||||
|
eltwise_layout_test_params{CASE_ELTWISE_TEST8},
|
||||||
|
}), );
|
||||||
|
Loading…
Reference in New Issue
Block a user