[IE CLDNN] Fix eltwise fused ops in NO_PITCH_SAME_DIMS mode (#2454)
This commit is contained in:
parent
6b456e58a1
commit
534a7b81a9
@ -70,8 +70,9 @@ JitConstants EltwiseKernelRef::GetJitConstants(const eltwise_params& params) con
|
||||
idx_order = {"d6", "d5", "d4", "d3", "d2", "d1"};
|
||||
}
|
||||
|
||||
FusedOpsConfiguration conf = {"", idx_order, "res", input_dt, 1};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {conf}));
|
||||
FusedOpsConfiguration tensor_coord = {"_TENSOR", idx_order, "res", input_dt, 1};
|
||||
FusedOpsConfiguration linear_coord = {"_LINEAR", {"d1"}, "res", input_dt, 1, LoadType::LT_UNALIGNED, BoundaryCheck::ENABLED, IndexType::LINEAR_OFFSET};
|
||||
jit.Merge(MakeFusedOpsJitConstants(params, {tensor_coord, linear_coord}));
|
||||
}
|
||||
|
||||
return jit;
|
||||
|
@ -117,8 +117,13 @@ KERNEL(eltwise)(
|
||||
DO_ELTWISE;
|
||||
|
||||
#if HAS_FUSED_OPS
|
||||
FUSED_OPS;
|
||||
OUTPUT_TYPE out = FUSED_OPS_RESULT;
|
||||
#if ELTWISE_NO_PITCH_SAME_DIMS
|
||||
FUSED_OPS_LINEAR;
|
||||
OUTPUT_TYPE out = FUSED_OPS_RESULT_LINEAR;
|
||||
#else
|
||||
FUSED_OPS_TENSOR;
|
||||
OUTPUT_TYPE out = FUSED_OPS_RESULT_TENSOR;
|
||||
#endif
|
||||
#else
|
||||
#define out res
|
||||
#endif
|
||||
|
@ -237,6 +237,19 @@ bool CheckInputsOutputNoPitchSameDims(const base_params& params) {
|
||||
(params.inputs[0].GetLayout() == DataLayout::b_fs_zyx_fsv16 && params.inputs[0].Feature().v % 16 != 0))
|
||||
return false;
|
||||
|
||||
if (params.fused_ops.size()) {
|
||||
for (auto fused_op : params.fused_ops) {
|
||||
for (size_t in = 0; in < fused_op.tensors.size(); in++) {
|
||||
if (fused_op.tensors[in].LogicalSize() == 1)
|
||||
continue;
|
||||
if ((fused_op.tensors[in].GetLayout() == DataLayout::b_fs_yx_fsv16 && fused_op.tensors[in].Feature().v % 16 != 0) ||
|
||||
(fused_op.tensors[in].GetLayout() == DataLayout::b_fs_zyx_fsv16 && fused_op.tensors[in].Feature().v % 16 != 0))
|
||||
return false;
|
||||
no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == fused_op.tensors[in]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < params.inputs.size(); i++) {
|
||||
no_pitch_same_dims = no_pitch_same_dims && (params.inputs[0] == params.inputs[i]);
|
||||
|
||||
|
@ -6253,6 +6253,34 @@ INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_test_params{CASE_ELTWISE_U8_FP16_3},
|
||||
}), );
|
||||
|
||||
class eltwise_no_pitches_same_dims_quantize : public EltwiseFusingTest {};
|
||||
TEST_P(eltwise_no_pitches_same_dims_quantize, quantize_f32_output) {
|
||||
auto p = GetParam();
|
||||
create_topologies(input_layout("input", get_input_layout(p)),
|
||||
input_layout("input2", get_input_layout2(p)),
|
||||
eltwise("eltwise", {"input", "input2"}, p.mode, p.default_type),
|
||||
data("in_lo", get_mem(get_per_channel_layout(p), min_random, 0)),
|
||||
data("in_hi", get_mem(get_per_channel_layout(p), 1, max_random)),
|
||||
data("out_lo", get_mem(get_single_element_layout(p), -128)),
|
||||
data("out_hi", get_mem(get_single_element_layout(p), 127)),
|
||||
quantize("quantize", "eltwise", "in_lo", "in_hi", "out_lo", "out_hi", 256, p.input_type),
|
||||
reorder("out", "quantize", p.default_format, data_types::f32));
|
||||
|
||||
tolerance = 1.f;
|
||||
execute(p);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(fusings_gpu,
|
||||
eltwise_no_pitches_same_dims_quantize,
|
||||
::testing::ValuesIn(std::vector<eltwise_test_params>{
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP16_3},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_1},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_2},
|
||||
eltwise_test_params{CASE_ELTWISE_FP32_3},
|
||||
}), );
|
||||
|
||||
class eltwise_activation : public EltwiseFusingTest {};
|
||||
TEST_P(eltwise_activation, basic) {
|
||||
auto p = GetParam();
|
||||
|
Loading…
Reference in New Issue
Block a user