Added processing of layout for speech sample (#10254)

* Added processing of layout for speech sample

* fixed notes

* some improvements

* Code style format

* changed NCC value for NullStatement

* improved batch processing

* added loading batch for imported model

* fixed notes

* fixed notes

* added layout parameter to azure tests
This commit is contained in:
Maxim Gordeev
2022-02-17 16:11:57 +03:00
committed by GitHub
parent ccd7104108
commit 3144c5fab8
6 changed files with 103 additions and 12 deletions

View File

@@ -107,7 +107,8 @@ Options:
-q "<mode>" Optional. Input quantization mode: static (default), dynamic, or user (use with -sf).
-qb "<integer>" Optional. Weight bits for quantization: 8 or 16 (default)
-sf "<double>" Optional. User-specified input scale factor for quantization (use with -q user). If the network contains multiple inputs, provide scale factors by separating them with commas.
-bs "<integer>" Optional. Batch size 1-8 (default 1)
-bs "<integer>" Optional. Batch size 1-8
-layout "<string>" Optional. Prompts how network layouts should be treated by application.For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.
-r "<path>" Optional. Read reference score file and compare scores. Example of usage: <reference.ark> or <reference.npz>
-rg "<path>" Read GNA model from file using path/filename provided (required if -m is missing).
-wg "<path>" Optional. Write GNA model to file using path/filename provided.

View File

@@ -83,7 +83,7 @@ int main(int argc, char* argv[]) {
// -------------------------------------
ov::Core core;
slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs;
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs;
std::shared_ptr<ov::Model> model;
std::vector<std::string> outputs;
std::vector<size_t> ports;
@@ -115,16 +115,38 @@ int main(int argc, char* argv[]) {
}
}
check_number_of_inputs(model->inputs().size(), numInputFiles);
const ov::Layout tensor_layout{"NC"};
ov::preprocess::PrePostProcessor proc(model);
for (int i = 0; i < model->inputs().size(); i++) {
proc.input(i).tensor().set_element_type(ov::element::f32).set_layout(tensor_layout);
const auto& inputs = model->inputs();
std::map<std::string, std::string> custom_layouts;
if (!FLAGS_layout.empty()) {
custom_layouts = parse_input_layouts(FLAGS_layout, inputs);
}
for (const auto& input : inputs) {
const auto& item_name = input.get_any_name();
auto& in = proc.input(item_name);
in.tensor().set_element_type(ov::element::f32);
// Explicitly set inputs layout
if (custom_layouts.count(item_name) > 0) {
in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
}
}
for (int i = 0; i < model->outputs().size(); i++) {
proc.output(i).tensor().set_element_type(ov::element::f32);
}
model = proc.build();
ov::set_batch(model, batchSize);
if (FLAGS_bs) {
if (FLAGS_layout.empty() &&
std::any_of(inputs.begin(), inputs.end(), [](const ov::Output<ov::Node>& i) {
return ov::layout::get_layout(i).empty();
})) {
throw std::logic_error(
"-bs option is set to " + std::to_string(FLAGS_bs) +
" but model does not contain layout information for any input. Please "
"specify it explicitly using -layout option. For example, input1[NCHW], input2[NC] or [NC]");
} else {
ov::set_batch(model, batchSize);
}
}
}
// ------------------------------ Get Available Devices ------------------------------------------------------
auto isFeature = [&](const std::string xFeature) {
@@ -235,6 +257,22 @@ int main(int argc, char* argv[]) {
throw std::runtime_error("Cannot open model file " + FLAGS_rg);
}
executableNet = core.import_model(streamrq, deviceStr, genericPluginConfig);
// loading batch from exported model
const auto& imported_inputs = executableNet.inputs();
if (std::any_of(imported_inputs.begin(), imported_inputs.end(), [](const ov::Output<const ov::Node>& i) {
return ov::layout::get_layout(i).empty();
})) {
slog::warn << "No batch dimension was found at any input, assuming batch to be 1." << slog::endl;
batchSize = 1;
} else {
for (auto& info : imported_inputs) {
auto imported_layout = ov::layout::get_layout(info);
if (ov::layout::has_batch(imported_layout)) {
batchSize = (uint32_t)info.get_shape()[ov::layout::batch_idx(imported_layout)];
break;
}
}
}
}
// --------------------------- Exporting gna model using InferenceEngine AOT API---------------------
if (!FLAGS_wg.empty()) {
@@ -251,7 +289,8 @@ int main(int argc, char* argv[]) {
return 0;
}
// ---------------------------------------------------------------------------------------------------------
// --------------------------- Step 3. Create infer request --------------------------------------------------
// --------------------------- Step 3. Create infer request
// --------------------------------------------------
std::vector<InferRequestStruct> inferRequests(1);
for (auto& inferRequest : inferRequests) {
@@ -433,7 +472,8 @@ int main(int argc, char* argv[]) {
outputBlob =
inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
}
// locked memory holder should be alive all time while access to its buffer happens
// locked memory holder should be alive all time while access to its buffer
// happens
auto byteSize = numScoresPerFrame * sizeof(float);
std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
}

View File

@@ -121,6 +121,11 @@ static const char output_layer_names_message[] = "Optional. Layer names for outp
static const char input_layer_names_message[] = "Optional. Layer names for input blobs. "
"The names are separated with \",\" "
"Example: Input1,Input2 ";
/// @brief message for inputs layer names
static const char layout_message[] =
"Optional. Prompts how network layouts should be treated by application. "
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
;
/// @brief message for PWL max error percent
static const char pwl_max_error_percent_message[] = "Optional. The maximum percent of error for PWL function."
@@ -176,8 +181,8 @@ DEFINE_int32(qb, 16, quantization_bits_message);
/// @brief Scale factor for quantization
DEFINE_string(sf, "", scale_factor_message);
/// @brief Batch size (default 1)
DEFINE_int32(bs, 1, batch_size_message);
/// @brief Batch size (default 0)
DEFINE_int32(bs, 0, batch_size_message);
/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
DEFINE_int32(nthreads, 1, infer_num_threads_message);
@@ -194,6 +199,9 @@ DEFINE_string(oname, "", output_layer_names_message);
/// @brief Input layer name
DEFINE_string(iname, "", input_layer_names_message);
/// @brief Input layer name
DEFINE_string(layout, "", layout_message);
/// @brief PWL max error percent
DEFINE_double(pwl_me, 1.0, pwl_max_error_percent_message);
@@ -223,6 +231,7 @@ static void show_usage() {
std::cout << " -cw_r \"<integer>\" " << context_window_message_r << std::endl;
std::cout << " -oname \"<string>\" " << output_layer_names_message << std::endl;
std::cout << " -iname \"<string>\" " << input_layer_names_message << std::endl;
std::cout << " -layout \"<string>\" " << layout_message << std::endl;
std::cout << " -pwl_me \"<double>\" " << pwl_max_error_percent_message << std::endl;
std::cout << " -exec_target \"<string>\" " << execution_target_message << std::endl;
std::cout << " -compile_target \"<string>\" " << compile_target_message << std::endl;
@@ -282,7 +291,7 @@ bool parse_and_check_command_line(int argc, char* argv[]) {
}
uint32_t batchSize = (uint32_t)FLAGS_bs;
if ((batchSize < 1) || (batchSize > 8)) {
if (batchSize && ((batchSize < 1) || (batchSize > 8))) {
throw std::logic_error("Batch size out of range (1..8).");
}

View File

@@ -488,3 +488,43 @@ std::vector<std::string> convert_str_to_vector(std::string str) {
}
return blobName;
}
/**
* @brief Parse layout string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
* @param layout_string input names with layout values
* @param input_info reference to vector of inputs
* @return map of inputs with layout values
*/
std::map<std::string, std::string> parse_input_layouts(const std::string& layout_string,
const std::vector<ov::Output<ov::Node>>& input_info) {
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
// inputs)
std::map<std::string, std::string> return_value;
std::string search_string = layout_string;
auto start_pos = search_string.find_first_of('[');
auto input_name = search_string.substr(0, start_pos);
while (start_pos != std::string::npos) {
auto end_pos = search_string.find_first_of(']');
if (end_pos == std::string::npos)
break;
if (start_pos)
input_name = search_string.substr(0, start_pos);
auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
if (!input_name.empty()) {
return_value[input_name] = input_value;
} else {
for (auto& item : input_info) {
return_value[item.get_any_name()] = input_value;
}
}
search_string = search_string.substr(end_pos + 1);
if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
break;
if (search_string.front() == ',')
search_string = search_string.substr(1);
start_pos = search_string.find_first_of('[');
}
if (!search_string.empty())
throw std::logic_error("Can't parse input parameter string: " + layout_string);
return return_value;
}