[GPU] Merge kernel updates (#7699)
+ fix a bug due to bias type mismatching - convolution_gpu_bfyx_to_bfyx_f16 + refactoring - depth_to_space_kernel_base - depth_to_space_kernel_ref + Adjusting LWS - eltwise_kernel_base
This commit is contained in:
parent
ee93823b3a
commit
1e6fd56e01
@ -27,18 +27,6 @@ bool DepthToSpaceKernelBase::Validate(const Params& p, const optional_params& o)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
CommonDispatchData DepthToSpaceKernelBase::SetDefault(const depth_to_space_params& params) const {
|
|
||||||
CommonDispatchData dispatchData;
|
|
||||||
|
|
||||||
dispatchData.gws = { params.output.Batch().v,
|
|
||||||
params.output.Feature().v,
|
|
||||||
params.output.Z().v * params.output.Y().v * params.output.X().v };
|
|
||||||
|
|
||||||
dispatchData.lws = GetOptimalLocalWorkGroupSizes(dispatchData.gws, params.engineInfo);
|
|
||||||
|
|
||||||
return dispatchData;
|
|
||||||
}
|
|
||||||
|
|
||||||
JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const {
|
JitConstants DepthToSpaceKernelBase::GetJitConstants(const depth_to_space_params& params) const {
|
||||||
JitConstants jit = MakeBaseParamsJitConstants(params);
|
JitConstants jit = MakeBaseParamsJitConstants(params);
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ public:
|
|||||||
protected:
|
protected:
|
||||||
bool Validate(const Params&, const optional_params&) const override;
|
bool Validate(const Params&, const optional_params&) const override;
|
||||||
virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
|
virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
|
||||||
virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const;
|
virtual CommonDispatchData SetDefault(const depth_to_space_params& params) const = 0;
|
||||||
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
|
||||||
};
|
};
|
||||||
} // namespace kernel_selector
|
} // namespace kernel_selector
|
||||||
|
@ -28,6 +28,18 @@ ParamsKey DepthToSpaceKernelRef::GetSupportedKey() const {
|
|||||||
return k;
|
return k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CommonDispatchData DepthToSpaceKernelRef::SetDefault(const depth_to_space_params& params) const {
|
||||||
|
CommonDispatchData dispatchData;
|
||||||
|
|
||||||
|
dispatchData.gws = { params.output.Batch().v,
|
||||||
|
params.output.Feature().v,
|
||||||
|
params.output.Z().v * params.output.Y().v * params.output.X().v };
|
||||||
|
|
||||||
|
// this kernel only supports bfyx and b_fs_yx_fsv16 layout.
|
||||||
|
dispatchData.lws = GetOptimalLocalWorkGroupSizes({1, dispatchData.gws[1], dispatchData.gws[2]}, params.engineInfo);
|
||||||
|
return dispatchData;
|
||||||
|
}
|
||||||
|
|
||||||
KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const {
|
||||||
return GetCommonKernelsData(params, options);
|
return GetCommonKernelsData(params, options);
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ public:
|
|||||||
DepthToSpaceKernelRef() : DepthToSpaceKernelBase("depth_to_space_ref") {}
|
DepthToSpaceKernelRef() : DepthToSpaceKernelBase("depth_to_space_ref") {}
|
||||||
virtual ~DepthToSpaceKernelRef() {}
|
virtual ~DepthToSpaceKernelRef() {}
|
||||||
|
|
||||||
|
CommonDispatchData SetDefault(const depth_to_space_params& params) const override;
|
||||||
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
|
||||||
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
KernelsPriority GetKernelsPriority(const Params& params, const optional_params& options) const override;
|
||||||
ParamsKey GetSupportedKey() const override;
|
ParamsKey GetSupportedKey() const override;
|
||||||
|
@ -620,6 +620,13 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
|
|||||||
dispatchData.lws[1] = bs_fsv32_local[2];
|
dispatchData.lws[1] = bs_fsv32_local[2];
|
||||||
dispatchData.lws[2] = bs_fsv32_local[0];
|
dispatchData.lws[2] = bs_fsv32_local[0];
|
||||||
}
|
}
|
||||||
|
} else if (params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 &&
|
||||||
|
(params.output.Feature().v % 16 != 0 || dispatchData.gws[1] % 16 != 0)) {
|
||||||
|
auto bs_fsv16_local = GetLimitedOptimalLocalWorkGroupSizes({dispatchData.gws[1], dispatchData.gws[2], dispatchData.gws[0]},
|
||||||
|
params.engineInfo, {16, 32, params.engineInfo.maxWorkGroupSize});
|
||||||
|
dispatchData.lws[0] = bs_fsv16_local[2];
|
||||||
|
dispatchData.lws[1] = bs_fsv16_local[0];
|
||||||
|
dispatchData.lws[2] = bs_fsv16_local[1];
|
||||||
} else {
|
} else {
|
||||||
dispatchData.lws[0] = local[0];
|
dispatchData.lws[0] = local[0];
|
||||||
dispatchData.lws[1] = local[1];
|
dispatchData.lws[1] = local[1];
|
||||||
|
@ -9,6 +9,14 @@
|
|||||||
|
|
||||||
#define DT_OUTPUT_BLOCK_WRITEN(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE, ptr, offset, val)
|
#define DT_OUTPUT_BLOCK_WRITEN(ptr, offset, val) BLOCK_WRITEN(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE, ptr, offset, val)
|
||||||
|
|
||||||
|
#define OUTPUT_PACKED_TYPE MAKE_VECTOR_TYPE(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE)
|
||||||
|
|
||||||
|
#define TO_OUTPUT_PACKED_TYPE CAT(convert_, OUTPUT_PACKED_TYPE)
|
||||||
|
|
||||||
|
#if defined(BIAS_TYPE_SIZE) && FILTER_TYPE_SIZE != BIAS_TYPE_SIZE
|
||||||
|
#error "convolution_gpu_bfyx_to_bfyx_f16: Filter and bias has different data type."
|
||||||
|
#endif
|
||||||
|
|
||||||
__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
|
__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
|
||||||
__attribute__((reqd_work_group_size(1, SUB_GROUP_SIZE, 1)))
|
__attribute__((reqd_work_group_size(1, SUB_GROUP_SIZE, 1)))
|
||||||
KERNEL(convolution_bfyx_to_bfyx_f16)(
|
KERNEL(convolution_bfyx_to_bfyx_f16)(
|
||||||
@ -89,7 +97,7 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
|
|||||||
bias_offset += split_idx * BIAS_LENGTH;
|
bias_offset += split_idx * BIAS_LENGTH;
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_INPUT_BLOCK_READ(biases, bias_offset));
|
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = (MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE))(DT_BIAS_BLOCK_READ(biases, bias_offset));
|
||||||
#else
|
#else
|
||||||
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO;
|
MAKE_VECTOR_TYPE(INPUT0_TYPE, OUTPUT_X_BLOCK_SIZE) dst = INPUT0_VAL_ZERO;
|
||||||
#endif
|
#endif
|
||||||
@ -143,9 +151,9 @@ KERNEL(convolution_bfyx_to_bfyx_f16)(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MAKE_VECTOR_TYPE(OUTPUT_TYPE, OUTPUT_X_BLOCK_SIZE) res;
|
OUTPUT_PACKED_TYPE res;
|
||||||
#ifndef HAS_FUSED_OPS
|
#ifndef HAS_FUSED_OPS
|
||||||
res = ACTIVATION(dst, ACTIVATION_PARAMS);
|
res = TO_OUTPUT_PACKED_TYPE(ACTIVATION(dst, ACTIVATION_PARAMS));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if OUTPUT_LEFTOVERS
|
#if OUTPUT_LEFTOVERS
|
||||||
|
Loading…
Reference in New Issue
Block a user