[GPU] Merge kernel updates (#7699)
+ fix a bug due to bias type mismatching - convolution_gpu_bfyx_to_bfyx_f16 + refactoring - depth_to_space_kernel_base - depth_to_space_kernel_ref + Adjusting LWS - eltwise_kernel_base
This commit is contained in:
parent
ee93823b3a
commit
1e6fd56e01
@ -27,18 +27,6 @@ bool DepthToSpaceKernelBase::Validate(const Params& p, const optional_params& o)
|
||||
return true;
|
||||
}
|
||||
|
||||
CommonDispatchData DepthToSpaceKernelBase::SetDefault(const depth_to_space_params& params) const {
|
||||
CommonDispatchData dispatchData;
|
||||
|
||||
dispatchData.gws = { params.output.Batch().v,
|
||||
params.output.Feature().v,
|
||||
params.output.Z().v * params.output.Y().v * params.output.X().v };
|
||||
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
|
||||
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const {
|
||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||
|
||||
|
@ -45,7 +45,7 @@ public:
|
||||
protected:
|
||||
bool Validate(const Params&, const optional_params&) const override;
|
||||
virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const;
|
||||
virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const = 0;
|
||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||
};
|
||||
} // namespace kernel_selector
|
||||
|
@ -28,6 +28,18 @@ ParamsKey DepthToSpaceKernelRef::GetSupportedKey() const {
|
||||
return k;
|
||||
}
|
||||
|
||||
CommonDispatchData DepthToSpaceKernelRef::SetDefault(const depth_to_space_params& params) const {
|
||||
CommonDispatchData dispatchData;
|
||||
|
||||
dispatchData.gws = { params.output.Batch().v,
|
||||
params.output.Feature().v,
|
||||
params.output.Z().v * params.output.Y().v * params.output.X().v };
|
||||
|
||||
// this kernel only supports bfyx and b_fs_yx_fsv16 layout.
|
||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes({1, dispatchData.gws[1], dispatchData.gws[2]}, params.engineInfo);
|
||||
return dispatchData;
|
||||
}
|
||||
|
||||
KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||
return GetCommonKernelsData(params, options);
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ public:
|
||||
DepthToSpaceKernelRef() : DepthToSpaceKernelBase("depth_to_space_ref") {}
|
||||
virtual ~DepthToSpaceKernelRef() {}
|
||||
|
||||
CommonDispatchData SetDefault(const depth_to_space_params& params) const override;
|
||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||
ParamsKey GetSupportedKey() const override;
|
||||
|
@ -620,6 +620,13 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
|
||||
dispatchData.lws[1] = bs_fsv32_local[2];
|
||||
dispatchData.lws[2] = bs_fsv32_local[0];
|
||||
}
|
||||
} else if (params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 &&
|
||||
(params.output.Feature().v % 16 != 0 || dispatchData.gws[1] % 16 != 0)) {
|
||||
auto bs_fsv16_local = GetLimitedOptimalLocalWorkGroupSizes({dispatchData.gws[1], dispatchData.gws[2], dispatchData.gws[0]},
|
||||
params.engineInfo, {16, 32, params.engineInfo.maxWorkGroupSize});
|
||||
dispatchData.lws[0] = bs_fsv16_local[2];
|
||||
dispatchData.lws[1] = bs_fsv16_local[0];
|
||||
dispatchData.lws[2] = bs_fsv16_local[1];
|
||||
} else {
|
||||
dispatchData.lws[0] = local[0];
|
||||
dispatchData.lws[1] = local[1];
|
||||
|
@ -9,6 +9,14 @@
|
||||
|
||||
#define DT_OUTPUT_BLOCK_WRITEN(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE, ptr, offset, val)
|
||||
|
||||
#define OUTPUT_PACKED_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE)
|
||||
|
||||
#define TO_OUTPUT_PACKED_TYPE CAT(convert_, OUTPUT_PACKED_TYPE)
|
||||
|
||||
#if defined(BIAS_TYPE_SIZE) && FILTER_TYPE_SIZE != BIAS_TYPE_SIZE
|
||||
#error "convolution_gpu_bfyx_to_bfyx_f16: Filter and bias has different data type."
|
||||
#endif
|
||||
|
||||
__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
|
||||
__attribute__((reqd_work_group_size(1, SUB_GROUP_SIZE, 1)))
|
||||
KERNEL(convolution_bfyx_to_bfyx_f16)(
|
||||
@ -89,7 +97,7 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
|
||||
bias_offset += split_idx * BIAS_LENGTH;
|
||||
# endif
|
||||
|
||||
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_INPUT_BLOCK_READ(biases, bias_offset));
|
||||
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset));
|
||||
#else
|
||||
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO;
|
||||
#endif
|
||||
@ -143,9 +151,9 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
|
||||
}
|
||||
}
|
||||
|
||||
MAKE_VECTOR_TYPE(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE) res;
|
||||
OUTPUT_PACKED_TYPE res;
|
||||
#ifndef HAS_FUSED_OPS
|
||||
res = ACTIVATION(dst, ACTIVATION_PARAMS);
|
||||
res = TO_OUTPUT_PACKED_TYPE(ACTIVATION(dst, ACTIVATION_PARAMS));
|
||||
#endif
|
||||
|
||||
#if OUTPUT_LEFTOVERS
|
||||
|
Loading…
Reference in New Issue
Block a user