[GPU] Adjust default num streams (#14308)
This commit is contained in:
parent
b76168b32b
commit
48c0897b0f
@ -58,7 +58,7 @@ struct Config {
|
||||
uint32_t GetDefaultNStreamsForThroughputMode() const {
|
||||
return 2;
|
||||
}
|
||||
void UpdateFromMap(const std::map<std::string, std::string>& configMap);
|
||||
void UpdateFromMap(const std::map<std::string, std::string>& configMap, const cldnn::device_info& info);
|
||||
void adjustKeyMapValues();
|
||||
static bool isNewApiProperty(std::string property);
|
||||
static std::string ConvertPropertyToLegacy(const std::string& key, const std::string& value);
|
||||
|
@ -81,6 +81,7 @@ struct device_info {
|
||||
uint32_t num_sub_slices_per_slice; ///< Number of subslices in a slice
|
||||
uint32_t num_eus_per_sub_slice; ///< Number of execution units per subslice
|
||||
uint32_t num_threads_per_eu; ///< Number of hardware threads per execution unit
|
||||
uint32_t num_ccs; ///< Number of compute command streamers
|
||||
|
||||
device_uuid uuid; ///< UUID of the gpu device
|
||||
};
|
||||
|
@ -61,7 +61,7 @@ static int getNumberOfCores(const IStreamsExecutor::Config::PreferredCoreType co
|
||||
}
|
||||
|
||||
IE_SUPPRESS_DEPRECATED_START
|
||||
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap) {
|
||||
void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap, const cldnn::device_info& info) {
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Config::UpdateFromMap");
|
||||
for (auto& kvp : configMap) {
|
||||
std::string key = kvp.first;
|
||||
@ -250,7 +250,7 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& configMap)
|
||||
} else if (key.compare(PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS) == 0 || key == ov::num_streams) {
|
||||
if (val.compare(PluginConfigParams::GPU_THROUGHPUT_AUTO) == 0 ||
|
||||
val.compare(ov::util::to_string(ov::streams::AUTO)) == 0) {
|
||||
throughput_streams = GetDefaultNStreamsForThroughputMode();
|
||||
throughput_streams = std::max(GetDefaultNStreamsForThroughputMode(), info.num_ccs);
|
||||
} else {
|
||||
int val_i;
|
||||
try {
|
||||
|
@ -196,7 +196,7 @@ void Plugin::UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &netwo
|
||||
OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "Plugin::UpdateConfig");
|
||||
auto device_info = GetDeviceInfo(params);
|
||||
conf.enableInt8 = device_info.supports_imad || device_info.supports_immad;
|
||||
conf.UpdateFromMap(params);
|
||||
conf.UpdateFromMap(params, device_info);
|
||||
if (conf.enableDynamicBatch) {
|
||||
conf.max_dynamic_batch = static_cast<int>(network.getBatchSize());
|
||||
}
|
||||
@ -357,17 +357,27 @@ void Plugin::SetConfig(const std::map<std::string, std::string> &config) {
|
||||
config.find(CLDNNConfigParams::KEY_CLDNN_PLUGIN_THROTTLE) != config.end() ||
|
||||
config.find(ov::intel_gpu::hint::queue_throttle.name()) != config.end();
|
||||
std::string device_id;
|
||||
cldnn::device_info device_info = device_map.begin()->second->get_info();
|
||||
if (config.find(PluginConfigInternalParams::KEY_CONFIG_DEVICE_ID) != config.end()) {
|
||||
device_id = config.at(PluginConfigInternalParams::KEY_CONFIG_DEVICE_ID);
|
||||
_impl->m_configs.GetConfig(device_id).UpdateFromMap(config);
|
||||
if (!device_id.empty() && device_map.find(device_id) != device_map.end()) {
|
||||
device_info = device_map.at(device_id)->get_info();
|
||||
}
|
||||
_impl->m_configs.GetConfig(device_id).UpdateFromMap(config, device_info);
|
||||
} else {
|
||||
device_id = GetDeviceIDFromConfig(config);
|
||||
if (!device_id.empty()) {
|
||||
if (device_map.find(device_id) != device_map.end()) {
|
||||
device_info = device_map.at(device_id)->get_info();
|
||||
}
|
||||
_impl->m_configs.SetDefaultDeviceID(device_id);
|
||||
_impl->m_configs.GetConfig(device_id).UpdateFromMap(config);
|
||||
_impl->m_configs.GetConfig(device_id).UpdateFromMap(config, device_info);
|
||||
} else {
|
||||
for (auto& conf : _impl->m_configs) {
|
||||
conf.second.UpdateFromMap(config);
|
||||
if (device_map.find(conf.first) != device_map.end()) {
|
||||
device_info = device_map.at(conf.first)->get_info();
|
||||
}
|
||||
conf.second.UpdateFromMap(config, device_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -835,7 +845,7 @@ Parameter Plugin::GetMetric(const std::string& name, const std::map<std::string,
|
||||
n_streams_str != util::to_string(ov::streams::AUTO)) {
|
||||
IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\"";
|
||||
}
|
||||
n_streams = config.GetDefaultNStreamsForThroughputMode();
|
||||
n_streams = std::max(config.GetDefaultNStreamsForThroughputMode(), device_info.num_ccs);
|
||||
} else {
|
||||
IE_THROW() << "[GPU_MAX_BATCH_SIZE] bad casting: GPU_THROUGHPUT_STREAMS should be either of uint32_t type or \"GPU_THROUGHPUT_AUTO\"";
|
||||
}
|
||||
|
@ -230,6 +230,19 @@ device_info init_device_info(const cl::Device& device) {
|
||||
info.num_threads_per_eu = 0;
|
||||
}
|
||||
|
||||
info.num_ccs = 1;
|
||||
if (info.supports_queue_families) {
|
||||
cl_uint num_queues = 0;
|
||||
|
||||
std::vector<cl_queue_family_properties_intel> qfprops = device.getInfo<CL_DEVICE_QUEUE_FAMILY_PROPERTIES_INTEL>();
|
||||
for (cl_uint q = 0; q < qfprops.size(); q++) {
|
||||
if (qfprops[q].capabilities == CL_QUEUE_DEFAULT_CAPABILITIES_INTEL && qfprops[q].count > num_queues) {
|
||||
num_queues = qfprops[q].count;
|
||||
}
|
||||
}
|
||||
info.num_ccs = std::max<uint32_t>(num_queues, info.num_ccs);
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user