Added processing of layout for speech sample (#10254)
* Added processing of layout for speech sample * fixed notes * some improvements * Code style format * changed NCC value for NullStatement * improved batch processing * added loading batch for imported model * fixed notes * fixed notes * added layout parameter to azure tests
This commit is contained in:
@@ -107,7 +107,8 @@ Options:
|
||||
-q "<mode>" Optional. Input quantization mode: static (default), dynamic, or user (use with -sf).
|
||||
-qb "<integer>" Optional. Weight bits for quantization: 8 or 16 (default)
|
||||
-sf "<double>" Optional. User-specified input scale factor for quantization (use with -q user). If the network contains multiple inputs, provide scale factors by separating them with commas.
|
||||
-bs "<integer>" Optional. Batch size 1-8 (default 1)
|
||||
-bs "<integer>" Optional. Batch size 1-8
|
||||
-layout "<string>" Optional. Prompts how network layouts should be treated by application.For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.
|
||||
-r "<path>" Optional. Read reference score file and compare scores. Example of usage: <reference.ark> or <reference.npz>
|
||||
-rg "<path>" Read GNA model from file using path/filename provided (required if -m is missing).
|
||||
-wg "<path>" Optional. Write GNA model to file using path/filename provided.
|
||||
|
||||
@@ -83,7 +83,7 @@ int main(int argc, char* argv[]) {
|
||||
// -------------------------------------
|
||||
ov::Core core;
|
||||
slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
|
||||
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs;
|
||||
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
std::vector<std::string> outputs;
|
||||
std::vector<size_t> ports;
|
||||
@@ -115,16 +115,38 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
}
|
||||
check_number_of_inputs(model->inputs().size(), numInputFiles);
|
||||
const ov::Layout tensor_layout{"NC"};
|
||||
ov::preprocess::PrePostProcessor proc(model);
|
||||
for (int i = 0; i < model->inputs().size(); i++) {
|
||||
proc.input(i).tensor().set_element_type(ov::element::f32).set_layout(tensor_layout);
|
||||
const auto& inputs = model->inputs();
|
||||
std::map<std::string, std::string> custom_layouts;
|
||||
if (!FLAGS_layout.empty()) {
|
||||
custom_layouts = parse_input_layouts(FLAGS_layout, inputs);
|
||||
}
|
||||
for (const auto& input : inputs) {
|
||||
const auto& item_name = input.get_any_name();
|
||||
auto& in = proc.input(item_name);
|
||||
in.tensor().set_element_type(ov::element::f32);
|
||||
// Explicitly set inputs layout
|
||||
if (custom_layouts.count(item_name) > 0) {
|
||||
in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < model->outputs().size(); i++) {
|
||||
proc.output(i).tensor().set_element_type(ov::element::f32);
|
||||
}
|
||||
model = proc.build();
|
||||
ov::set_batch(model, batchSize);
|
||||
if (FLAGS_bs) {
|
||||
if (FLAGS_layout.empty() &&
|
||||
std::any_of(inputs.begin(), inputs.end(), [](const ov::Output<ov::Node>& i) {
|
||||
return ov::layout::get_layout(i).empty();
|
||||
})) {
|
||||
throw std::logic_error(
|
||||
"-bs option is set to " + std::to_string(FLAGS_bs) +
|
||||
" but model does not contain layout information for any input. Please "
|
||||
"specify it explicitly using -layout option. For example, input1[NCHW], input2[NC] or [NC]");
|
||||
} else {
|
||||
ov::set_batch(model, batchSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
// ------------------------------ Get Available Devices ------------------------------------------------------
|
||||
auto isFeature = [&](const std::string xFeature) {
|
||||
@@ -235,6 +257,22 @@ int main(int argc, char* argv[]) {
|
||||
throw std::runtime_error("Cannot open model file " + FLAGS_rg);
|
||||
}
|
||||
executableNet = core.import_model(streamrq, deviceStr, genericPluginConfig);
|
||||
// loading batch from exported model
|
||||
const auto& imported_inputs = executableNet.inputs();
|
||||
if (std::any_of(imported_inputs.begin(), imported_inputs.end(), [](const ov::Output<const ov::Node>& i) {
|
||||
return ov::layout::get_layout(i).empty();
|
||||
})) {
|
||||
slog::warn << "No batch dimension was found at any input, assuming batch to be 1." << slog::endl;
|
||||
batchSize = 1;
|
||||
} else {
|
||||
for (auto& info : imported_inputs) {
|
||||
auto imported_layout = ov::layout::get_layout(info);
|
||||
if (ov::layout::has_batch(imported_layout)) {
|
||||
batchSize = (uint32_t)info.get_shape()[ov::layout::batch_idx(imported_layout)];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// --------------------------- Exporting gna model using InferenceEngine AOT API---------------------
|
||||
if (!FLAGS_wg.empty()) {
|
||||
@@ -251,7 +289,8 @@ int main(int argc, char* argv[]) {
|
||||
return 0;
|
||||
}
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
// --------------------------- Step 3. Create infer request --------------------------------------------------
|
||||
// --------------------------- Step 3. Create infer request
|
||||
// --------------------------------------------------
|
||||
std::vector<InferRequestStruct> inferRequests(1);
|
||||
|
||||
for (auto& inferRequest : inferRequests) {
|
||||
@@ -433,7 +472,8 @@ int main(int argc, char* argv[]) {
|
||||
outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer happens
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto byteSize = numScoresPerFrame * sizeof(float);
|
||||
std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
|
||||
}
|
||||
|
||||
@@ -121,6 +121,11 @@ static const char output_layer_names_message[] = "Optional. Layer names for outp
|
||||
static const char input_layer_names_message[] = "Optional. Layer names for input blobs. "
|
||||
"The names are separated with \",\" "
|
||||
"Example: Input1,Input2 ";
|
||||
/// @brief message for inputs layer names
|
||||
static const char layout_message[] =
|
||||
"Optional. Prompts how network layouts should be treated by application. "
|
||||
"For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
|
||||
;
|
||||
|
||||
/// @brief message for PWL max error percent
|
||||
static const char pwl_max_error_percent_message[] = "Optional. The maximum percent of error for PWL function."
|
||||
@@ -176,8 +181,8 @@ DEFINE_int32(qb, 16, quantization_bits_message);
|
||||
/// @brief Scale factor for quantization
|
||||
DEFINE_string(sf, "", scale_factor_message);
|
||||
|
||||
/// @brief Batch size (default 1)
|
||||
DEFINE_int32(bs, 1, batch_size_message);
|
||||
/// @brief Batch size (default 0)
|
||||
DEFINE_int32(bs, 0, batch_size_message);
|
||||
|
||||
/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
|
||||
DEFINE_int32(nthreads, 1, infer_num_threads_message);
|
||||
@@ -194,6 +199,9 @@ DEFINE_string(oname, "", output_layer_names_message);
|
||||
/// @brief Input layer name
|
||||
DEFINE_string(iname, "", input_layer_names_message);
|
||||
|
||||
/// @brief Input layer name
|
||||
DEFINE_string(layout, "", layout_message);
|
||||
|
||||
/// @brief PWL max error percent
|
||||
DEFINE_double(pwl_me, 1.0, pwl_max_error_percent_message);
|
||||
|
||||
@@ -223,6 +231,7 @@ static void show_usage() {
|
||||
std::cout << " -cw_r \"<integer>\" " << context_window_message_r << std::endl;
|
||||
std::cout << " -oname \"<string>\" " << output_layer_names_message << std::endl;
|
||||
std::cout << " -iname \"<string>\" " << input_layer_names_message << std::endl;
|
||||
std::cout << " -layout \"<string>\" " << layout_message << std::endl;
|
||||
std::cout << " -pwl_me \"<double>\" " << pwl_max_error_percent_message << std::endl;
|
||||
std::cout << " -exec_target \"<string>\" " << execution_target_message << std::endl;
|
||||
std::cout << " -compile_target \"<string>\" " << compile_target_message << std::endl;
|
||||
@@ -282,7 +291,7 @@ bool parse_and_check_command_line(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
uint32_t batchSize = (uint32_t)FLAGS_bs;
|
||||
if ((batchSize < 1) || (batchSize > 8)) {
|
||||
if (batchSize && ((batchSize < 1) || (batchSize > 8))) {
|
||||
throw std::logic_error("Batch size out of range (1..8).");
|
||||
}
|
||||
|
||||
|
||||
@@ -488,3 +488,43 @@ std::vector<std::string> convert_str_to_vector(std::string str) {
|
||||
}
|
||||
return blobName;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse layout string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
|
||||
* @param layout_string input names with layout values
|
||||
* @param input_info reference to vector of inputs
|
||||
* @return map of inputs with layout values
|
||||
*/
|
||||
std::map<std::string, std::string> parse_input_layouts(const std::string& layout_string,
|
||||
const std::vector<ov::Output<ov::Node>>& input_info) {
|
||||
// Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
|
||||
// inputs)
|
||||
std::map<std::string, std::string> return_value;
|
||||
std::string search_string = layout_string;
|
||||
auto start_pos = search_string.find_first_of('[');
|
||||
auto input_name = search_string.substr(0, start_pos);
|
||||
while (start_pos != std::string::npos) {
|
||||
auto end_pos = search_string.find_first_of(']');
|
||||
if (end_pos == std::string::npos)
|
||||
break;
|
||||
if (start_pos)
|
||||
input_name = search_string.substr(0, start_pos);
|
||||
auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
|
||||
if (!input_name.empty()) {
|
||||
return_value[input_name] = input_value;
|
||||
} else {
|
||||
for (auto& item : input_info) {
|
||||
return_value[item.get_any_name()] = input_value;
|
||||
}
|
||||
}
|
||||
search_string = search_string.substr(end_pos + 1);
|
||||
if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
|
||||
break;
|
||||
if (search_string.front() == ',')
|
||||
search_string = search_string.substr(1);
|
||||
start_pos = search_string.find_first_of('[');
|
||||
}
|
||||
if (!search_string.empty())
|
||||
throw std::logic_error("Can't parse input parameter string: " + layout_string);
|
||||
return return_value;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user