[GPU] Add CL_QUEUE_INDEX property and queue configuration refactoring (#7633)

This commit is contained in:
Sergey Shlyapnikov
2021-09-28 11:37:09 +03:00
committed by GitHub
parent a3dfa980ad
commit 6bd0873a40
6 changed files with 80 additions and 63 deletions

View File

@@ -48,6 +48,7 @@ struct device_info {
bool supports_subgroups_short; ///< Does engine support cl_intel_subgroups_short extension.
bool supports_subgroups_char; ///< Does engine support cl_intel_subgroups_char extension.
bool supports_local_block_io; ///< Does engine support cl_intel_subgroup_local_block_io extension.
bool supports_queue_families; ///< Does engine support cl_intel_command_queue_families extension.
bool supports_image; ///< Does engine support images (CL_DEVICE_IMAGE_SUPPORT cap).
bool supports_imad; ///< Does engine support int8 mad.

View File

@@ -12,79 +12,78 @@ namespace ocl {
command_queues_builder::command_queues_builder()
: _profiling(false),
_out_of_order(false),
_supports_queue_families(false),
_priority_mode(priority_mode_types::disabled),
_throttle_mode(throttle_mode_types::disabled) {}
cl_command_queue_properties command_queues_builder::get_properties() {
cl_command_queue_properties ret =
std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
std::vector<cl_queue_properties> properties;
if (_priority_mode != priority_mode_types::disabled) {
unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
switch (_priority_mode) {
case priority_mode_types::high:
cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
break;
case priority_mode_types::low:
cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
break;
default:
break;
}
properties.insert(properties.end(), {CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value});
}
if (_throttle_mode != throttle_mode_types::disabled) {
unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
switch (_throttle_mode) {
case throttle_mode_types::high:
cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
break;
case throttle_mode_types::low:
cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
break;
default:
break;
}
properties.insert(properties.end(), {CL_QUEUE_THROTTLE_KHR, cl_queue_throttle_value});
}
if (_supports_queue_families) {
cl_uint num_queues = 0;
cl_uint family = 0;
std::vector<cl_queue_family_properties_intel> qfprops = device.getInfo<CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL>();
for (cl_uint q = 0; q < qfprops.size(); q++) {
if (qfprops[q].capabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL && qfprops[q].count > num_queues) {
family = q;
num_queues = qfprops[q].count;
}
}
if (num_queues)
properties.insert(properties.end(), {CL_QUEUE_FAMILY_INTEL, family,
CL_QUEUE_INDEX_INTEL, stream_id % num_queues});
}
cl_command_queue_properties cl_queue_properties =
((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0));
return ret;
properties.insert(properties.end(), {CL_QUEUE_PROPERTIES, cl_queue_properties, 0});
return properties;
}
ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) {
auto properties = get_properties();
ocl_queue_type queue;
if (_priority_mode == priority_mode_types::disabled && _throttle_mode == throttle_mode_types::disabled) {
queue = ocl_queue_type(context, device, properties);
}
unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
switch (_priority_mode) {
case priority_mode_types::high:
cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
break;
case priority_mode_types::low:
cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
break;
default:
break;
}
unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
switch (_throttle_mode) {
case throttle_mode_types::high:
cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
break;
case throttle_mode_types::low:
cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
break;
default:
break;
}
cl_int error_code = CL_SUCCESS;
static std::atomic<uint16_t> stream_id{0};
if (_priority_mode != priority_mode_types::disabled && _throttle_mode != throttle_mode_types::disabled) {
cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR,
cl_queue_priority_value,
CL_QUEUE_THROTTLE_KHR,
cl_queue_throttle_value,
CL_QUEUE_PROPERTIES,
properties,
0};
auto properties = get_properties(device, stream_id++);
queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code));
} else if (_priority_mode != priority_mode_types::disabled) {
cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR,
cl_queue_priority_value,
CL_QUEUE_PROPERTIES,
properties,
0};
queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code));
} else if (_throttle_mode != throttle_mode_types::disabled) {
cl_queue_properties properties_low[] = {CL_QUEUE_THROTTLE_KHR,
cl_queue_throttle_value,
CL_QUEUE_PROPERTIES,
properties,
0};
queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code));
}
queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code);
if (error_code != CL_SUCCESS) {
CLDNN_ERROR_MESSAGE("Command queues builders",
@@ -112,5 +111,9 @@ void command_queues_builder::set_throttle_mode(throttle_mode_types throttle, boo
}
_throttle_mode = throttle;
}
void command_queues_builder::set_supports_queue_families(bool extension_support) {
_supports_queue_families = extension_support;
}
} // namespace ocl
} // namespace cldnn

View File

@@ -18,14 +18,16 @@ public:
void set_priority_mode(priority_mode_types priority, bool extension_support);
void set_profiling(bool flag) { _profiling = flag; }
void set_out_of_order(bool flag) { _out_of_order = flag; }
void set_supports_queue_families(bool extension_support);
private:
bool _profiling;
bool _out_of_order;
bool _supports_queue_families;
priority_mode_types _priority_mode;
throttle_mode_types _throttle_mode;
cl_command_queue_properties get_properties();
std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0);
};
} // namespace ocl

View File

@@ -234,6 +234,8 @@ device_info init_device_info(const cl::Device& device) {
info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&
is_local_block_io_supported(device);
info.supports_queue_families = extensions.find("cl_intel_command_queue_families") != std::string::npos;
bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos;
if (device_attr_supported) {

View File

@@ -35,6 +35,11 @@ typedef cl_bitfield cl_device_feature_capabilities_intel;
#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0)
#define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1)
#define CL_HPP_PARAM_NAME_CL_INTEL_COMMAND_QUEUE_FAMILIES_(F) \
F(cl_device_info, CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, cl::vector<cl_queue_family_properties_intel>) \
\
F(cl_command_queue_info, CL_QUEUE_FAMILY_INTEL, cl_uint) \
F(cl_command_queue_info, CL_QUEUE_INDEX_INTEL, cl_uint)
namespace cl {
namespace detail {
@@ -45,6 +50,7 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_THREADS_PER_EU_INTEL, cl_uint)
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_FEATURE_CAPABILITIES_INTEL, cl_device_feature_capabilities_intel)
CL_HPP_PARAM_NAME_CL_INTEL_COMMAND_QUEUE_FAMILIES_(CL_HPP_DECLARE_PARAM_TRAITS_)
} // namespace detail
} // namespace cl

View File

@@ -276,6 +276,9 @@ ocl_stream::ocl_stream(const ocl_engine& engine) : stream(engine.configuration()
bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue");
queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions);
bool queue_families_extension = engine.get_device_info().supports_queue_families;
queue_builder.set_supports_queue_families(queue_families_extension);
_command_queue = queue_builder.build(context, device);
#ifdef ENABLE_ONEDNN_FOR_GPU
if (config.queue_type == queue_types::in_order) {