Used new config for streams and threads (#10150)
* Used new config for streams and threads * Fixed review coments in ba * format fix * fixed hello_query_device * Added STL string io * fixed tests * Fixed test * Fixed build * fixed format * Fixed build * try fix win * other any io specialization * Fixed after merge * renamed streams * build fixed * fixed build * fixed format * fix for old mac build * Fixed type of exception * test fix
This commit is contained in:
@@ -124,12 +124,12 @@ Options:
|
||||
Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency
|
||||
estimations the number of streams should be set to 1.
|
||||
-nthreads "<integer>" Optional. Number of threads to use for inference on the CPU (including HETERO and MULTI cases).
|
||||
-enforcebf16="<true/false>" Optional. By default floating point operations execution in bfloat16 precision are enforced if supported by platform.
|
||||
-pin "YES"/"HYBRID_AWARE"/"NUMA"/"NO"
|
||||
-pin ("YES"|"CORE")/"HYBRID_AWARE"/"NUMA"/("NO"|"NONE")
|
||||
Optional. Explicit inference threads binding options (leave empty to let the OpenVINO to make a choice):
|
||||
enabling threads->cores pinning ("YES", which is already default for a conventional CPU),
|
||||
letting the runtime to decide on the threads->different core types ("HYBRID_AWARE", which is default on the hybrid CPUs)
|
||||
threads->(NUMA)nodes ("NUMA") or completely disable ("NO") CPU inference threads pinning.
|
||||
-infer_precision device_name:infer_precision1,device_name:infer_precision2 Optional. Hint to specifies inference precision
|
||||
-ip "U8"/"FP16"/"FP32" Optional. Specifies precision for all input layers of the network.
|
||||
-op "U8"/"FP16"/"FP32" Optional. Specifies precision for all output layers of the network.
|
||||
-iop Optional. Specifies precision for input and output layers by name. Example: -iop "input:FP16, output:FP16". Notice that quotes are required.
|
||||
|
||||
@@ -191,8 +191,8 @@ static const char cache_dir_message[] = "Optional. Enables caching of loaded mod
|
||||
static const char load_from_file_message[] = "Optional. Loads model from file directly without ReadNetwork."
|
||||
" All CNNNetwork options (like re-shape) will be ignored";
|
||||
|
||||
// @brief message for quantization bits
|
||||
static const char gna_qb_message[] = "Optional. Weight bits for quantization: 8 or 16 (default)";
|
||||
// @brief message for inference_precision
|
||||
static const char inference_precision_message[] = "Optional. Inference precission";
|
||||
|
||||
static constexpr char inputs_precision_message[] = "Optional. Specifies precision for all input layers of the network.";
|
||||
|
||||
@@ -275,9 +275,6 @@ DEFINE_string(nstreams, "", infer_num_streams_message);
|
||||
/// @brief The percentile which will be reported in latency metric
|
||||
DEFINE_uint32(latency_percentile, 50, infer_latency_percentile_message);
|
||||
|
||||
/// @brief Enforces bf16 execution with bfloat16 precision on systems having this capability
|
||||
DEFINE_bool(enforcebf16, false, enforce_bf16_message);
|
||||
|
||||
/// @brief Define parameter for batch size <br>
|
||||
/// Default is 0 (that means don't specify)
|
||||
DEFINE_uint32(b, 0, batch_size_message);
|
||||
@@ -329,8 +326,8 @@ DEFINE_string(data_shape, "", data_shape_message);
|
||||
/// @brief Define flag for layout shape <br>
|
||||
DEFINE_string(layout, "", layout_message);
|
||||
|
||||
/// @brief Define flag for quantization bits (default 16)
|
||||
DEFINE_int32(qb, 16, gna_qb_message);
|
||||
/// @brief Define flag for inference precision
|
||||
DEFINE_string(infer_precision, "f32", inference_precision_message);
|
||||
|
||||
/// @brief Specify precision for all input layers of the network
|
||||
DEFINE_string(ip, "", inputs_precision_message);
|
||||
@@ -391,7 +388,6 @@ static void show_usage() {
|
||||
std::cout << std::endl << " device-specific performance options:" << std::endl;
|
||||
std::cout << " -nstreams \"<integer>\" " << infer_num_streams_message << std::endl;
|
||||
std::cout << " -nthreads \"<integer>\" " << infer_num_threads_message << std::endl;
|
||||
std::cout << " -enforcebf16=<true/false> " << enforce_bf16_message << std::endl;
|
||||
std::cout << " -pin \"YES\"/\"HYBRID_AWARE\"/\"NO\"/\"NUMA\" " << infer_threads_pinning_message << std::endl;
|
||||
#ifdef HAVE_DEVICE_MEM_SUPPORT
|
||||
std::cout << " -use_device_mem " << use_device_mem_message << std::endl;
|
||||
@@ -405,7 +401,7 @@ static void show_usage() {
|
||||
std::cout << " -pcseq " << pcseq_message << std::endl;
|
||||
std::cout << " -dump_config " << dump_config_message << std::endl;
|
||||
std::cout << " -load_config " << load_config_message << std::endl;
|
||||
std::cout << " -qb " << gna_qb_message << std::endl;
|
||||
std::cout << " -infer_precision \"<element type>\"" << inference_precision_message << std::endl;
|
||||
std::cout << " -ip <value> " << inputs_precision_message << std::endl;
|
||||
std::cout << " -op <value> " << outputs_precision_message << std::endl;
|
||||
std::cout << " -iop \"<value>\" " << iop_message << std::endl;
|
||||
|
||||
@@ -157,7 +157,9 @@ int main(int argc, char* argv[]) {
|
||||
auto devices = parse_devices(device_name);
|
||||
|
||||
// Parse nstreams per device
|
||||
std::map<std::string, std::string> device_nstreams = parse_nstreams_value_per_device(devices, FLAGS_nstreams);
|
||||
std::map<std::string, std::string> device_nstreams = parse_value_per_device(devices, FLAGS_nstreams);
|
||||
std::map<std::string, std::string> device_infer_precision =
|
||||
parse_value_per_device(devices, FLAGS_infer_precision);
|
||||
|
||||
// Load device config file if specified
|
||||
std::map<std::string, ov::AnyMap> config;
|
||||
@@ -243,9 +245,7 @@ int main(int argc, char* argv[]) {
|
||||
bool perf_counts = false;
|
||||
// Update config per device according to command line parameters
|
||||
for (auto& device : devices) {
|
||||
if (!config.count(device))
|
||||
config[device] = {};
|
||||
auto& device_config = config.at(device);
|
||||
auto& device_config = config[device];
|
||||
|
||||
// high-level performance modes
|
||||
if (ov_perf_hint != ov::hint::PerformanceMode::UNDEFINED) {
|
||||
@@ -276,24 +276,28 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
perf_counts = (device_config.at(ov::enable_profiling.name()).as<bool>()) ? true : perf_counts;
|
||||
|
||||
auto supported_properties = core.get_property(device, ov::supported_properties);
|
||||
|
||||
auto supported = [&](const std::string& key) {
|
||||
return std::find(std::begin(supported_properties), std::end(supported_properties), key) !=
|
||||
std::end(supported_properties);
|
||||
};
|
||||
// the rest are individual per-device settings (overriding the values set with perf modes)
|
||||
auto setThroughputStreams = [&]() {
|
||||
std::string key = getDeviceTypeFromName(device) + "_THROUGHPUT_STREAMS";
|
||||
if (device_nstreams.count(device)) {
|
||||
auto it_device_nstreams = device_nstreams.find(device);
|
||||
if (it_device_nstreams != device_nstreams.end()) {
|
||||
// set to user defined value
|
||||
auto supported_properties = core.get_property(device, ov::supported_properties);
|
||||
if (std::find(supported_properties.begin(), supported_properties.end(), key) !=
|
||||
supported_properties.end()) {
|
||||
device_config[key] = device_nstreams.at(device);
|
||||
} else if (std::find(supported_properties.begin(),
|
||||
supported_properties.end(),
|
||||
ov::streams::num.name()) != supported_properties.end()) {
|
||||
if (supported(key)) {
|
||||
device_config[key] = it_device_nstreams->second;
|
||||
} else if (supported(ov::num_streams.name())) {
|
||||
// Use API 2.0 key for streams
|
||||
key = ov::streams::num.name();
|
||||
device_config[key] = device_nstreams.at(device);
|
||||
key = ov::num_streams.name();
|
||||
device_config[key] = it_device_nstreams->second;
|
||||
} else {
|
||||
throw std::logic_error("Device " + device + " doesn't support config key '" + key + "' " +
|
||||
"and '" + ov::streams::num.name() + "'!" +
|
||||
"and '" + ov::num_streams.name() + "'!" +
|
||||
"Please specify -nstreams for correct devices in format "
|
||||
"<dev1>:<nstreams1>,<dev2>:<nstreams2>" +
|
||||
" or via configuration file.");
|
||||
@@ -309,45 +313,66 @@ int main(int argc, char* argv[]) {
|
||||
<< slog::endl;
|
||||
if (std::string::npos == device.find("MYRIAD")) { // MYRIAD sets the default number of
|
||||
// streams implicitly (without _AUTO)
|
||||
auto supported_properties = core.get_property(device, ov::supported_properties);
|
||||
if (std::find(supported_properties.begin(), supported_properties.end(), key) !=
|
||||
supported_properties.end()) {
|
||||
if (supported(key)) {
|
||||
device_config[key] = std::string(getDeviceTypeFromName(device) + "_THROUGHPUT_AUTO");
|
||||
} else if (std::find(supported_properties.begin(),
|
||||
supported_properties.end(),
|
||||
ov::streams::num.name()) != supported_properties.end()) {
|
||||
} else if (supported(ov::num_streams.name())) {
|
||||
// Use API 2.0 key for streams
|
||||
key = ov::streams::num.name();
|
||||
device_config[key] = std::to_string(ov::streams::AUTO);
|
||||
key = ov::num_streams.name();
|
||||
device_config[key] = ov::NumStreams::AUTO;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (device_config.count(key))
|
||||
device_nstreams[device] = device_config.at(key).as<std::string>();
|
||||
auto it_streams = device_config.find(ov::num_streams.name());
|
||||
if (it_streams != device_config.end())
|
||||
device_nstreams[device] = it_streams->second.as<std::string>();
|
||||
};
|
||||
|
||||
auto set_infer_precision = [&] {
|
||||
auto it_device_infer_precision = device_infer_precision.find(device);
|
||||
if (it_device_infer_precision != device_infer_precision.end()) {
|
||||
// set to user defined value
|
||||
if (!supported(ov::hint::inference_precision.name())) {
|
||||
throw std::logic_error("Device " + device + " doesn't support config key '" +
|
||||
ov::hint::inference_precision.name() + "'! " +
|
||||
"Please specify -infer_precision for correct devices in format "
|
||||
"<dev1>:<infer_precision1>,<dev2>:<infer_precision2>" +
|
||||
" or via configuration file.");
|
||||
}
|
||||
device_config.emplace(ov::hint::inference_precision(it_device_infer_precision->second));
|
||||
}
|
||||
};
|
||||
|
||||
auto fix_pin_option = [](const std::string& str) -> std::string {
|
||||
if (str == "NO")
|
||||
return "NONE";
|
||||
else if (str == "YES")
|
||||
return "CORE";
|
||||
else
|
||||
return str;
|
||||
};
|
||||
|
||||
if (supported(ov::inference_num_threads.name()) && isFlagSetInCommandLine("nthreads")) {
|
||||
device_config.emplace(ov::inference_num_threads(FLAGS_nthreads));
|
||||
}
|
||||
if (supported(ov::affinity.name()) && isFlagSetInCommandLine("pin")) {
|
||||
device_config.emplace(ov::affinity(fix_pin_option(FLAGS_pin)));
|
||||
}
|
||||
|
||||
if (device.find("CPU") != std::string::npos) { // CPU supports few special performance-oriented keys
|
||||
// limit threading for CPU portion of inference
|
||||
if (isFlagSetInCommandLine("nthreads"))
|
||||
device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
|
||||
|
||||
if (isFlagSetInCommandLine("enforcebf16"))
|
||||
device_config[CONFIG_KEY(ENFORCE_BF16)] = FLAGS_enforcebf16 ? CONFIG_VALUE(YES) : CONFIG_VALUE(NO);
|
||||
|
||||
if (isFlagSetInCommandLine("pin")) {
|
||||
// set to user defined value
|
||||
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
|
||||
} else if (!device_config.count(CONFIG_KEY(CPU_BIND_THREAD))) {
|
||||
if ((device_name.find("MULTI") != std::string::npos) &&
|
||||
if (!isFlagSetInCommandLine("pin")) {
|
||||
auto it_affinity = device_config.find(ov::affinity.name());
|
||||
if (it_affinity != device_config.end() && (device_name.find("MULTI") != std::string::npos) &&
|
||||
(device_name.find("GPU") != std::string::npos)) {
|
||||
slog::warn << "Turn off threads pinning for " << device
|
||||
<< " device since multi-scenario with GPU device is used." << slog::endl;
|
||||
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = CONFIG_VALUE(NO);
|
||||
it_affinity->second = ov::Affinity::NONE;
|
||||
}
|
||||
}
|
||||
|
||||
// for CPU execution, more throughput-oriented execution via streams
|
||||
setThroughputStreams();
|
||||
set_infer_precision();
|
||||
} else if (device.find("GPU") != std::string::npos) {
|
||||
// for GPU execution, more throughput-oriented execution via streams
|
||||
setThroughputStreams();
|
||||
@@ -365,25 +390,7 @@ int main(int argc, char* argv[]) {
|
||||
device_config.emplace(ov::log::level(ov::log::Level::WARNING));
|
||||
setThroughputStreams();
|
||||
} else if (device.find("GNA") != std::string::npos) {
|
||||
if (FLAGS_qb == 8)
|
||||
device_config[GNA_CONFIG_KEY(PRECISION)] = "I8";
|
||||
else
|
||||
device_config[GNA_CONFIG_KEY(PRECISION)] = "I16";
|
||||
} else {
|
||||
auto supported_properties = core.get_property(device, ov::supported_properties);
|
||||
auto supported = [&](const std::string& key) {
|
||||
return std::find(std::begin(supported_properties), std::end(supported_properties), key) !=
|
||||
std::end(supported_properties);
|
||||
};
|
||||
if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
|
||||
device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
|
||||
}
|
||||
if (supported(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) && isFlagSetInCommandLine("nstreams")) {
|
||||
device_config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = FLAGS_nstreams;
|
||||
}
|
||||
if (supported(CONFIG_KEY(CPU_BIND_THREAD)) && isFlagSetInCommandLine("pin")) {
|
||||
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
|
||||
}
|
||||
set_infer_precision();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -669,7 +676,7 @@ int main(int argc, char* argv[]) {
|
||||
const std::string key = getDeviceTypeFromName(ds.first) + "_THROUGHPUT_STREAMS";
|
||||
device_nstreams[ds.first] = core.get_property(ds.first, key).as<std::string>();
|
||||
} catch (const ov::Exception&) {
|
||||
device_nstreams[ds.first] = core.get_property(ds.first, ov::streams::num.name()).as<std::string>();
|
||||
device_nstreams[ds.first] = core.get_property(ds.first, ov::num_streams.name()).as<std::string>();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -118,8 +118,8 @@ std::vector<std::string> parse_devices(const std::string& device_string) {
|
||||
return devices;
|
||||
}
|
||||
|
||||
std::map<std::string, std::string> parse_nstreams_value_per_device(const std::vector<std::string>& devices,
|
||||
const std::string& values_string) {
|
||||
std::map<std::string, std::string> parse_value_per_device(const std::vector<std::string>& devices,
|
||||
const std::string& values_string) {
|
||||
// Format: <device1>:<value1>,<device2>:<value2> or just <value>
|
||||
std::map<std::string, std::string> result;
|
||||
auto device_value_strings = split(values_string, ',');
|
||||
|
||||
@@ -56,8 +56,8 @@ using PartialShapes = std::map<std::string, ngraph::PartialShape>;
|
||||
|
||||
std::vector<std::string> parse_devices(const std::string& device_string);
|
||||
uint32_t device_default_device_duration_in_seconds(const std::string& device);
|
||||
std::map<std::string, std::string> parse_nstreams_value_per_device(const std::vector<std::string>& devices,
|
||||
const std::string& values_string);
|
||||
std::map<std::string, std::string> parse_value_per_device(const std::vector<std::string>& devices,
|
||||
const std::string& values_string);
|
||||
std::string get_shape_string(const ov::Shape& shape);
|
||||
std::string get_shapes_string(const benchmark_app::PartialShapes& shapes);
|
||||
size_t get_batch_size(const benchmark_app::InputsInfo& inputs_info);
|
||||
|
||||
Reference in New Issue
Block a user