[GPU] Move adding biases to the end convolution_bfyx_to_bfyx_f16 kernel (#10533)

This commit is contained in:
Andrei Molotkov
2022-02-21 09:30:00 +03:00
committed by GitHub
parent ea3bd087c4
commit 575ded54a9

View File

@@ -90,17 +90,7 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
const uint filter_offset = f_block * filter_os_pitch;
#endif
#if BIAS_TERM
uint bias_offset = f_block * FEATURE_SLICE_SIZE;
# if GROUPED && !DEPTHWISE_SEPARABLE_OPT
bias_offset += split_idx * BIAS_LENGTH;
# endif
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset));
#else
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO;
#endif
INPUT0_TYPE line_cache[INPUT0_FEATURE_NUM * INPUT_BLOCK_SIZE];
for (int ic = 0; ic < INPUT0_FEATURE_NUM; ic++)
@@ -151,6 +141,16 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
}
}
#if BIAS_TERM
uint bias_offset = f_block * FEATURE_SLICE_SIZE;
# if GROUPED && !DEPTHWISE_SEPARABLE_OPT
bias_offset += split_idx * BIAS_LENGTH;
# endif
dst += (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset));
#endif
OUTPUT_PACKED_TYPE res;
#ifndef HAS_FUSED_OPS
res = TO_OUTPUT_PACKED_TYPE(ACTIVATION(dst, ACTIVATION_PARAMS));