[GPU] Num threads per eu update (#7823)

This commit is contained in:
Ilya Znamenskiy
2021-10-06 11:44:49 +03:00
committed by GitHub
parent b5499f6573
commit 17dc82a00a
3 changed files with 2 additions and 7 deletions

View File

@@ -30,8 +30,6 @@ struct gfx_version {
struct device_info {
uint32_t execution_units_count; ///< Number of available execution units.
uint32_t gpu_frequency; ///< Clock frequency in MHz.
uint32_t max_threads_per_execution_unit; ///< Number of available HW threads on EU.
uint32_t max_threads_per_device; ///< Maximum number of HW threads on device.
uint64_t max_work_group_size; ///< Maximum number of work-items in a work-group executing a kernel using the data parallel execution model.
uint64_t max_local_mem_size; ///< Maximum size of local memory arena in bytes.

View File

@@ -226,9 +226,6 @@ device_info init_device_info(const cl::Device& device) {
info.supports_imad = get_imad_support(device);
info.supports_immad = false;
info.max_threads_per_execution_unit = 7;
info.max_threads_per_device = static_cast<uint32_t>(info.execution_units_count * info.max_threads_per_execution_unit);
info.supports_usm = extensions.find("cl_intel_unified_shared_memory") != std::string::npos;
info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&

View File

@@ -832,8 +832,8 @@ void set_params(const program_node& node, kernel_selector::params& params) {
params.engineInfo.maxImage2dWidth = device_info.max_image2d_width;
params.engineInfo.maxImage2dHeight = device_info.max_image2d_height;
params.engineInfo.computeUnitsCount = device_info.execution_units_count;
params.engineInfo.maxThreadsPerExecutionUnit = device_info.max_threads_per_execution_unit;
params.engineInfo.maxThreadsPerDevice = device_info.max_threads_per_device;
params.engineInfo.maxThreadsPerExecutionUnit = device_info.num_threads_per_eu > 0 ? device_info.num_threads_per_eu : 7;
params.engineInfo.maxThreadsPerDevice = params.engineInfo.maxThreadsPerExecutionUnit * device_info.execution_units_count;
params.engineInfo.deviceCache = program.get_tuning_cache();
params.engineInfo.driverVersion = device_info.driver_version;