[GPU] Merge kernel updates (#7699)

+ fix a bug due to bias type mismatching
  - convolution_gpu_bfyx_to_bfyx_f16

+ refactoring
  - depth_to_space_kernel_base
  - depth_to_space_kernel_ref

+ Adjusting LWS
  - eltwise_kernel_base
This commit is contained in:
Jade Cho 2021-09-29 17:10:53 +09:00 committed by GitHub
parent ee93823b3a
commit 1e6fd56e01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 32 additions and 16 deletions

View File

@ -27,18 +27,6 @@ bool DepthToSpaceKernelBase::Validate(const Params& p, const optional_params& o)
return true; return true;
} }
CommonDispatchData DepthToSpaceKernelBase::SetDefault(const depth_to_space_params& params) const {
CommonDispatchData dispatchData;
dispatchData.gws = { params.output.Batch().v,
params.output.Feature().v,
params.output.Z().v * params.output.Y().v * params.output.X().v };
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
return dispatchData;
}
JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const { JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const {
JitConstants jit = MakeBaseParamsJitConstants(params); JitConstants jit = MakeBaseParamsJitConstants(params);

View File

@ -45,7 +45,7 @@ public:
protected: protected:
bool Validate(const Params&, const optional_params&) const override; bool Validate(const Params&, const optional_params&) const override;
virtual JitConstants GetJitConstants(const depth_to_space_params& params) const; virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const; virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const = 0;
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const; KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
}; };
} // namespace kernel_selector } // namespace kernel_selector

View File

@ -28,6 +28,18 @@ ParamsKey DepthToSpaceKernelRef::GetSupportedKey() const {
return k; return k;
} }
CommonDispatchData DepthToSpaceKernelRef::SetDefault(const depth_to_space_params& params) const {
CommonDispatchData dispatchData;
dispatchData.gws = { params.output.Batch().v,
params.output.Feature().v,
params.output.Z().v * params.output.Y().v * params.output.X().v };
// this kernel only supports bfyx and b_fs_yx_fsv16 layout.
dispatchData.lws = GetOptimalLocalWorkGroupSizes({1, dispatchData.gws[1], dispatchData.gws[2]}, params.engineInfo);
return dispatchData;
}
KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const { KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
return GetCommonKernelsData(params, options); return GetCommonKernelsData(params, options);
} }

View File

@ -14,6 +14,7 @@ public:
DepthToSpaceKernelRef() : DepthToSpaceKernelBase("depth_to_space_ref") {} DepthToSpaceKernelRef() : DepthToSpaceKernelBase("depth_to_space_ref") {}
virtual ~DepthToSpaceKernelRef() {} virtual ~DepthToSpaceKernelRef() {}
CommonDispatchData SetDefault(const depth_to_space_params& params) const override;
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override; KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override; KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
ParamsKey GetSupportedKey() const override; ParamsKey GetSupportedKey() const override;

View File

@ -620,6 +620,13 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
dispatchData.lws[1] = bs_fsv32_local[2]; dispatchData.lws[1] = bs_fsv32_local[2];
dispatchData.lws[2] = bs_fsv32_local[0]; dispatchData.lws[2] = bs_fsv32_local[0];
} }
} else if (params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 &&
(params.output.Feature().v % 16 != 0 || dispatchData.gws[1] % 16 != 0)) {
auto bs_fsv16_local = GetLimitedOptimalLocalWorkGroupSizes({dispatchData.gws[1], dispatchData.gws[2], dispatchData.gws[0]},
params.engineInfo, {16, 32, params.engineInfo.maxWorkGroupSize});
dispatchData.lws[0] = bs_fsv16_local[2];
dispatchData.lws[1] = bs_fsv16_local[0];
dispatchData.lws[2] = bs_fsv16_local[1];
} else { } else {
dispatchData.lws[0] = local[0]; dispatchData.lws[0] = local[0];
dispatchData.lws[1] = local[1]; dispatchData.lws[1] = local[1];

View File

@ -9,6 +9,14 @@
#define DT_OUTPUT_BLOCK_WRITEN(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE, ptr, offset, val) #define DT_OUTPUT_BLOCK_WRITEN(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE, ptr, offset, val)
#define OUTPUT_PACKED_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE)
#define TO_OUTPUT_PACKED_TYPE CAT(convert_, OUTPUT_PACKED_TYPE)
#if defined(BIAS_TYPE_SIZE) && FILTER_TYPE_SIZE != BIAS_TYPE_SIZE
#error "convolution_gpu_bfyx_to_bfyx_f16: Filter and bias has different data type."
#endif
__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE))) __attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
__attribute__((reqd_work_group_size(1, SUB_GROUP_SIZE, 1))) __attribute__((reqd_work_group_size(1, SUB_GROUP_SIZE, 1)))
KERNEL(convolution_bfyx_to_bfyx_f16)( KERNEL(convolution_bfyx_to_bfyx_f16)(
@ -89,7 +97,7 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
bias_offset += split_idx * BIAS_LENGTH; bias_offset += split_idx * BIAS_LENGTH;
# endif # endif
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_INPUT_BLOCK_READ(biases, bias_offset)); MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset));
#else #else
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO; MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO;
#endif #endif
@ -143,9 +151,9 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
} }
} }
MAKE_VECTOR_TYPE(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE) res; OUTPUT_PACKED_TYPE res;
#ifndef HAS_FUSED_OPS #ifndef HAS_FUSED_OPS
res = ACTIVATION(dst, ACTIVATION_PARAMS); res = TO_OUTPUT_PACKED_TYPE(ACTIVATION(dst, ACTIVATION_PARAMS));
#endif #endif
#if OUTPUT_LEFTOVERS #if OUTPUT_LEFTOVERS