[GPU] Fix some bugs of cldnn eltwise kernel at bs_fs_yx_bsv32_fsv16 format. (#9198)
* Fix some bugs of cldnn eltwise kernel at bs_fs_yx_bsv32_fsv16 format. + Add a condition so that the eltwise_simple_vload8 kernel is not selected when the tensor is not aligned as bsv32_fsv16 or bsv32_fsv32. + Optimize gws/lws of eltwise_ref kernel for bsv32_fsv16 format. * Check feature align of b_fs_yx_fsv32 for eltwise vload8 kernel + Minor fix for OV_GPU_Help option
This commit is contained in:
parent
63121e28ca
commit
7bcca1b82d
@ -624,11 +624,11 @@ EltwiseKernelBase::DispatchData EltwiseKernelBase::SetDefault(const eltwise_para
|
||||
}
|
||||
} else if (params.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 &&
|
||||
(params.output.Feature().v % 16 != 0 || dispatchData.gws[1] % 16 != 0)) {
|
||||
auto bs_fsv16_local = GetLimitedOptimalLocalWorkGroupSizes({dispatchData.gws[1], dispatchData.gws[2], dispatchData.gws[0]},
|
||||
params.engineInfo, {16, 32, params.engineInfo.maxWorkGroupSize});
|
||||
dispatchData.lws[0] = bs_fsv16_local[2];
|
||||
dispatchData.lws[1] = bs_fsv16_local[0];
|
||||
dispatchData.lws[2] = bs_fsv16_local[1];
|
||||
auto bs_fsv16_local = GetLimitedOptimalLocalWorkGroupSizes({dispatchData.gws[2], dispatchData.gws[0], dispatchData.gws[1]},
|
||||
params.engineInfo, {32 * 16, 1024, 1024});
|
||||
dispatchData.lws[0] = bs_fsv16_local[1];
|
||||
dispatchData.lws[1] = bs_fsv16_local[2];
|
||||
dispatchData.lws[2] = bs_fsv16_local[0];
|
||||
} else {
|
||||
dispatchData.lws[0] = local[0];
|
||||
dispatchData.lws[1] = local[1];
|
||||
|
@ -32,18 +32,29 @@ bool EltwiseKernel_vload8::Validate(const Params& params, const optional_params&
|
||||
|
||||
const auto& ewParams = static_cast<const eltwise_params&>(params);
|
||||
|
||||
for (size_t i = 0; i < ewParams.inputs.size(); i++) {
|
||||
if ((ewParams.inputs[i].GetLayout() == DataLayout::b_fs_yx_fsv16 && ewParams.inputs[i].Feature().v % 16 != 0) ||
|
||||
(ewParams.inputs[i].GetLayout() == DataLayout::b_fs_zyx_fsv16 && ewParams.inputs[i].Feature().v % 16 != 0) ||
|
||||
(ewParams.inputs[i].GetLayout() == DataLayout::b_fs_yx_fsv4 && ewParams.inputs[i].Feature().v % 8 != 0) ||
|
||||
ewParams.inputs[i].GetLayout() == DataLayout::fs_b_yx_fsv32)
|
||||
for (size_t i = 0; i < ewParams.inputs.size(); i++) {
|
||||
const auto input_layout = ewParams.inputs[i].GetLayout();
|
||||
const auto batch_size = ewParams.inputs[i].Batch().v;
|
||||
const auto feature_size = ewParams.inputs[i].Feature().v;
|
||||
if ((input_layout == DataLayout::b_fs_yx_fsv16 && feature_size % 16 != 0) ||
|
||||
(input_layout == DataLayout::b_fs_yx_fsv32 && feature_size % 32 != 0) ||
|
||||
(input_layout == DataLayout::b_fs_zyx_fsv16 && feature_size % 16 != 0) ||
|
||||
(input_layout == DataLayout::b_fs_yx_fsv4 && feature_size % 8 != 0) ||
|
||||
input_layout == DataLayout::fs_b_yx_fsv32 ||
|
||||
(input_layout == DataLayout::bs_fs_yx_bsv32_fsv16 && (feature_size % 16 != 0 || batch_size % 32 != 0)) ||
|
||||
(input_layout == DataLayout::bs_fs_yx_bsv32_fsv32 && (feature_size % 32 != 0 || batch_size % 32 != 0)))
|
||||
return false;
|
||||
}
|
||||
if ((ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && ewParams.output.Feature().v % 16 != 0) ||
|
||||
(ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv32 && ewParams.output.Feature().v % 32 != 0) ||
|
||||
(ewParams.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 && ewParams.output.Feature().v % 16 != 0) ||
|
||||
(ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv4 && ewParams.output.Feature().v % 8 != 0) ||
|
||||
ewParams.output.GetLayout() == DataLayout::fs_b_yx_fsv32 ||
|
||||
(ewParams.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv16 &&
|
||||
(ewParams.output.Feature().v % 16 != 0 || ewParams.output.Batch().v % 32 != 0)) ||
|
||||
(ewParams.output.GetLayout() == DataLayout::bs_fs_yx_bsv32_fsv32 &&
|
||||
(ewParams.output.Feature().v % 32 != 0 || ewParams.output.Batch().v % 32 != 0)))
|
||||
return false;
|
||||
}
|
||||
if ((ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv16 && ewParams.output.Feature().v % 16 != 0) ||
|
||||
(ewParams.output.GetLayout() == DataLayout::b_fs_zyx_fsv16 && ewParams.output.Feature().v % 16 != 0) ||
|
||||
(ewParams.output.GetLayout() == DataLayout::b_fs_yx_fsv4 && ewParams.output.Feature().v % 8 != 0) ||
|
||||
ewParams.output.GetLayout() == DataLayout::fs_b_yx_fsv32)
|
||||
return false;
|
||||
|
||||
const auto& output = ewParams.output;
|
||||
const auto count = output.PhysicalSize();
|
||||
|
@ -425,7 +425,9 @@ bool CheckInputsOutputNoPitchSameDims(const base_params& params) {
|
||||
{DataLayout::bs_f_bsv16__af8, {16, 8}},
|
||||
{DataLayout::b_fs_yx_fsv4, {1, 4}},
|
||||
{DataLayout::fs_b_yx_fsv32, {1, 32}},
|
||||
{DataLayout::b_fs_yx_32fp, {1, 32}}
|
||||
{DataLayout::b_fs_yx_32fp, {1, 32}},
|
||||
{DataLayout::bs_fs_yx_bsv32_fsv16, {32, 16}},
|
||||
{DataLayout::bs_fs_yx_bsv32_fsv32, {32, 32}}
|
||||
};
|
||||
|
||||
if (params.inputs.size()) {
|
||||
|
@ -123,7 +123,7 @@ static void print_help_messages() {
|
||||
|
||||
GPU_DEBUG_COUT << "Supported environment variables for debugging" << std::endl;
|
||||
for (auto& p : message_list) {
|
||||
GPU_DEBUG_COUT << " - " << std::left << std::setw(name_width) << p.first + ": " << p.second << std::endl;
|
||||
GPU_DEBUG_COUT << " - " << std::left << std::setw(name_width) << p.first + " " << p.second << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user