[GPU] Add CL_QUEUE_INDEX property and queue configuration refactoring (#7633)
This commit is contained in:
committed by
GitHub
parent
a3dfa980ad
commit
6bd0873a40
@@ -48,6 +48,7 @@ struct device_info {
|
||||
bool supports_subgroups_short; ///< Does engine support cl_intel_subgroups_short extension.
|
||||
bool supports_subgroups_char; ///< Does engine support cl_intel_subgroups_char extension.
|
||||
bool supports_local_block_io; ///< Does engine support cl_intel_subgroup_local_block_io extension.
|
||||
bool supports_queue_families; ///< Does engine support cl_intel_command_queue_families extension.
|
||||
bool supports_image; ///< Does engine support images (CL_DEVICE_IMAGE_SUPPORT cap).
|
||||
|
||||
bool supports_imad; ///< Does engine support int8 mad.
|
||||
|
||||
@@ -12,79 +12,78 @@ namespace ocl {
|
||||
command_queues_builder::command_queues_builder()
|
||||
: _profiling(false),
|
||||
_out_of_order(false),
|
||||
_supports_queue_families(false),
|
||||
_priority_mode(priority_mode_types::disabled),
|
||||
_throttle_mode(throttle_mode_types::disabled) {}
|
||||
|
||||
cl_command_queue_properties command_queues_builder::get_properties() {
|
||||
cl_command_queue_properties ret =
|
||||
std::vector<cl_queue_properties> command_queues_builder::get_properties(const cl::Device& device, uint16_t stream_id) {
|
||||
std::vector<cl_queue_properties> properties;
|
||||
|
||||
if (_priority_mode != priority_mode_types::disabled) {
|
||||
unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
|
||||
switch (_priority_mode) {
|
||||
case priority_mode_types::high:
|
||||
cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
|
||||
break;
|
||||
case priority_mode_types::low:
|
||||
cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
properties.insert(properties.end(), {CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value});
|
||||
}
|
||||
|
||||
if (_throttle_mode != throttle_mode_types::disabled) {
|
||||
unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
|
||||
switch (_throttle_mode) {
|
||||
case throttle_mode_types::high:
|
||||
cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
|
||||
break;
|
||||
case throttle_mode_types::low:
|
||||
cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
properties.insert(properties.end(), {CL_QUEUE_THROTTLE_KHR, cl_queue_throttle_value});
|
||||
}
|
||||
|
||||
if (_supports_queue_families) {
|
||||
cl_uint num_queues = 0;
|
||||
cl_uint family = 0;
|
||||
|
||||
std::vector<cl_queue_family_properties_intel> qfprops = device.getInfo<CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL>();
|
||||
for (cl_uint q = 0; q < qfprops.size(); q++) {
|
||||
if (qfprops[q].capabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL && qfprops[q].count > num_queues) {
|
||||
family = q;
|
||||
num_queues = qfprops[q].count;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_queues)
|
||||
properties.insert(properties.end(), {CL_QUEUE_FAMILY_INTEL, family,
|
||||
CL_QUEUE_INDEX_INTEL, stream_id % num_queues});
|
||||
}
|
||||
|
||||
cl_command_queue_properties cl_queue_properties =
|
||||
((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0));
|
||||
return ret;
|
||||
|
||||
properties.insert(properties.end(), {CL_QUEUE_PROPERTIES, cl_queue_properties, 0});
|
||||
|
||||
return properties;
|
||||
}
|
||||
|
||||
ocl_queue_type command_queues_builder::build(const cl::Context& context, const cl::Device& device) {
|
||||
auto properties = get_properties();
|
||||
|
||||
ocl_queue_type queue;
|
||||
|
||||
if (_priority_mode == priority_mode_types::disabled && _throttle_mode == throttle_mode_types::disabled) {
|
||||
queue = ocl_queue_type(context, device, properties);
|
||||
}
|
||||
|
||||
unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
|
||||
|
||||
switch (_priority_mode) {
|
||||
case priority_mode_types::high:
|
||||
cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
|
||||
break;
|
||||
case priority_mode_types::low:
|
||||
cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
|
||||
|
||||
switch (_throttle_mode) {
|
||||
case throttle_mode_types::high:
|
||||
cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
|
||||
break;
|
||||
case throttle_mode_types::low:
|
||||
cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
cl_int error_code = CL_SUCCESS;
|
||||
static std::atomic<uint16_t> stream_id{0};
|
||||
|
||||
if (_priority_mode != priority_mode_types::disabled && _throttle_mode != throttle_mode_types::disabled) {
|
||||
cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR,
|
||||
cl_queue_priority_value,
|
||||
CL_QUEUE_THROTTLE_KHR,
|
||||
cl_queue_throttle_value,
|
||||
CL_QUEUE_PROPERTIES,
|
||||
properties,
|
||||
0};
|
||||
auto properties = get_properties(device, stream_id++);
|
||||
|
||||
queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code));
|
||||
} else if (_priority_mode != priority_mode_types::disabled) {
|
||||
cl_queue_properties properties_low[] = {CL_QUEUE_PRIORITY_KHR,
|
||||
cl_queue_priority_value,
|
||||
CL_QUEUE_PROPERTIES,
|
||||
properties,
|
||||
0};
|
||||
|
||||
queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code));
|
||||
} else if (_throttle_mode != throttle_mode_types::disabled) {
|
||||
cl_queue_properties properties_low[] = {CL_QUEUE_THROTTLE_KHR,
|
||||
cl_queue_throttle_value,
|
||||
CL_QUEUE_PROPERTIES,
|
||||
properties,
|
||||
0};
|
||||
|
||||
queue = ocl_queue_type(clCreateCommandQueueWithProperties(context.get(), device.get(), properties_low, &error_code));
|
||||
}
|
||||
queue = clCreateCommandQueueWithProperties(context.get(), device.get(), properties.data(), &error_code);
|
||||
|
||||
if (error_code != CL_SUCCESS) {
|
||||
CLDNN_ERROR_MESSAGE("Command queues builders",
|
||||
@@ -112,5 +111,9 @@ void command_queues_builder::set_throttle_mode(throttle_mode_types throttle, boo
|
||||
}
|
||||
_throttle_mode = throttle;
|
||||
}
|
||||
|
||||
void command_queues_builder::set_supports_queue_families(bool extension_support) {
|
||||
_supports_queue_families = extension_support;
|
||||
}
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
||||
|
||||
@@ -18,14 +18,16 @@ public:
|
||||
void set_priority_mode(priority_mode_types priority, bool extension_support);
|
||||
void set_profiling(bool flag) { _profiling = flag; }
|
||||
void set_out_of_order(bool flag) { _out_of_order = flag; }
|
||||
void set_supports_queue_families(bool extension_support);
|
||||
|
||||
private:
|
||||
bool _profiling;
|
||||
bool _out_of_order;
|
||||
bool _supports_queue_families;
|
||||
priority_mode_types _priority_mode;
|
||||
throttle_mode_types _throttle_mode;
|
||||
|
||||
cl_command_queue_properties get_properties();
|
||||
std::vector<cl_queue_properties> get_properties(const cl::Device& device, uint16_t stream_id = 0);
|
||||
};
|
||||
|
||||
} // namespace ocl
|
||||
|
||||
@@ -234,6 +234,8 @@ device_info init_device_info(const cl::Device& device) {
|
||||
info.supports_local_block_io = extensions.find("cl_intel_subgroup_local_block_io") != std::string::npos &&
|
||||
is_local_block_io_supported(device);
|
||||
|
||||
info.supports_queue_families = extensions.find("cl_intel_command_queue_families") != std::string::npos;
|
||||
|
||||
bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos;
|
||||
|
||||
if (device_attr_supported) {
|
||||
|
||||
@@ -35,6 +35,11 @@ typedef cl_bitfield cl_device_feature_capabilities_intel;
|
||||
#define CL_DEVICE_FEATURE_FLAG_DP4A_INTEL (1 << 0)
|
||||
#define CL_DEVICE_FEATURE_FLAG_DPAS_INTEL (1 << 1)
|
||||
|
||||
#define CL_HPP_PARAM_NAME_CL_INTEL_COMMAND_QUEUE_FAMILIES_(F) \
|
||||
F(cl_device_info, CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL, cl::vector<cl_queue_family_properties_intel>) \
|
||||
\
|
||||
F(cl_command_queue_info, CL_QUEUE_FAMILY_INTEL, cl_uint) \
|
||||
F(cl_command_queue_info, CL_QUEUE_INDEX_INTEL, cl_uint)
|
||||
|
||||
namespace cl {
|
||||
namespace detail {
|
||||
@@ -45,6 +50,7 @@ CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_SUB_SLICES_PER_SLICE_
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_EUS_PER_SUB_SLICE_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NUM_THREADS_PER_EU_INTEL, cl_uint)
|
||||
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_FEATURE_CAPABILITIES_INTEL, cl_device_feature_capabilities_intel)
|
||||
CL_HPP_PARAM_NAME_CL_INTEL_COMMAND_QUEUE_FAMILIES_(CL_HPP_DECLARE_PARAM_TRAITS_)
|
||||
} // namespace detail
|
||||
} // namespace cl
|
||||
|
||||
|
||||
@@ -276,6 +276,9 @@ ocl_stream::ocl_stream(const ocl_engine& engine) : stream(engine.configuration()
|
||||
bool throttle_extensions = engine.extension_supported("cl_khr_throttle_hints") && engine.extension_supported("cl_khr_create_command_queue");
|
||||
queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions);
|
||||
|
||||
bool queue_families_extension = engine.get_device_info().supports_queue_families;
|
||||
queue_builder.set_supports_queue_families(queue_families_extension);
|
||||
|
||||
_command_queue = queue_builder.build(context, device);
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
if (config.queue_type == queue_types::in_order) {
|
||||
|
||||
Reference in New Issue
Block a user