Removed speech_sample (#21801)

2023-12-21 13:06:57 +04:00 · 2023-12-21 13:06:57 +04:00 · 2463acc5b0
commit 2463acc5b0
parent 1a0f0ccd2a
14 changed files with 0 additions and 2693 deletions
--- a/samples/cpp/speech_sample/CMakeLists.txt
+++ b/samples/cpp/speech_sample/CMakeLists.txt
@ -1,38 +0,0 @@
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-file (GLOB SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
-file (GLOB HDR ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp)
-               ${CMAKE_CURRENT_SOURCE_DIR}/*.h)
-
-# Required zlib and cnpy dependencies
-
-if(NOT TARGET ZLIB::ZLIB)
-    if(EXISTS "${Samples_SOURCE_DIR}/thirdparty/zlib")
-        # OpenVINO package puts thirdparty to samples dir
-        add_subdirectory("${Samples_SOURCE_DIR}/thirdparty/zlib"
-                         "${Samples_BINARY_DIR}/thirdparty/zlib" EXCLUDE_FROM_ALL)
-    elseif(EXISTS "${Samples_SOURCE_DIR}/../../thirdparty/zlib")
-        # Allow running samples CMakeLists.txt as stand alone from openvino sources
-        add_subdirectory("${Samples_SOURCE_DIR}/../../thirdparty/zlib"
-                         "${Samples_BINARY_DIR}/thirdparty/zlib" EXCLUDE_FROM_ALL)
-    endif()
-endif()
-
-if(EXISTS "${Samples_SOURCE_DIR}/thirdparty/cnpy")
-    # OpenVINO package puts thirdparty to samples dir
-    add_subdirectory("${Samples_SOURCE_DIR}/thirdparty/cnpy"
-                     "${Samples_BINARY_DIR}/thirdparty/cnpy" EXCLUDE_FROM_ALL)
-elseif(EXISTS "${Samples_SOURCE_DIR}/../../thirdparty/cnpy" AND NOT TARGET cnpy)
-    # Allow running samples CMakeLists.txt as stand alone from openvino sources
-    add_subdirectory("${Samples_SOURCE_DIR}/../../thirdparty/cnpy"
-                     "${Samples_BINARY_DIR}/thirdparty/cnpy" EXCLUDE_FROM_ALL)
-endif()
-
-# add sample
-
-ov_add_sample(NAME speech_sample
-              SOURCES ${SRC}
-              HEADERS ${HDR}
-              DEPENDENCIES ${GFLAGS_TARGET} cnpy ie_samples_utils)
--- a/samples/cpp/speech_sample/README.md
+++ b/samples/cpp/speech_sample/README.md
@ -1,38 +0,0 @@
-# Automatic Speech Recognition C++ Sample
-
-> **NOTE**: This sample is being deprecated and will no longer be maintained after OpenVINO 2023.2 (LTS). The main reason for it is the outdated state of the sample and its extensive usage of GNA, which is not going to be supported by OpenVINO beyond 2023.2. 
-
-This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors.  
-
-The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
-
-For more detailed information on how this sample works, check the dedicated [article](https://docs.openvino.ai/2023.2/openvino_inference_engine_samples_speech_sample_README.html)
-
-## Requirements 
-
-| Options                    | Values                                                                                                                                   |
-| ---------------------------| -----------------------------------------------------------------------------------------------------------------------------------------|
-| Validated Models           | Acoustic model based on Kaldi\* neural networks (see                                                                                     |
-|                            | [Model Preparation](https://docs.openvino.ai/2023.2/openvino_inference_engine_samples_speech_sample_README.html)                         |
-|                            | section)                                                                                                                                 |
-| Model Format               | OpenVINO™ toolkit Intermediate Representation (*.xml + *.bin)                                                                            |
-| Supported devices          | See [Execution Modes](https://docs.openvino.ai/2023.2/openvino_inference_engine_samples_speech_sample_README.html#execution-modes)       |
-|                            | section below and [List Supported Devices](https://docs.openvino.ai/2023.2/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html) |
-
-The following C++ API is used in the application:
-
-| Feature                  | API                                                                           | Description                                                                  |
-| -------------------------| ------------------------------------------------------------------------------|------------------------------------------------------------------------------|
-| Available Devices        | ``ov::Core::get_available_devices``, ``ov::Core::get_property``               | Get information of the devices for inference                                 |
-| Import/Export Model      | ``ov::Core::import_model``, ``ov::CompiledModel::export_model``               | The GNA plugin supports loading and saving of the GNA-optimized model        |
-| Model Operations         | ``ov::set_batch``, ``ov::Model::add_output``, ``ov::CompiledModel::inputs``,  |                                                                              |
-|                          | ``ov::CompiledModel::outputs``                                                | Managing of model: configure batch_size, input and output tensors            |
-| Node Operations          | ``ov::OutputVector::size``, ``ov::Output::get_shape``                         | Get node shape                                                               |
-| Asynchronous Infer       | ``ov::InferRequest::start_async``, ``ov::InferRequest::wait``                 | Do asynchronous inference and waits until inference result becomes available |
-| InferRequest Operations  | ``ov::InferRequest::query_state``, ``ov::VariableState::reset``               | Gets and resets CompiledModel state control                                  |
-| Tensor Operations        | ``ov::Tensor::get_size``, ``ov::Tensor::data``,                               |                                                                              |
-|                          | ``ov::InferRequest::get_tensor``                                              | Get a tensor, its size and data                                              |
-| Profiling                | ``ov::InferRequest::get_profiling_info``                                      | Get infer request profiling info                                             |
-
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification C++ sample](https://docs.openvino.ai/2023.2/openvino_inference_engine_samples_hello_classification_README.html).
--- a/samples/cpp/speech_sample/fileutils.cpp
+++ b/samples/cpp/speech_sample/fileutils.cpp
@ -1,178 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "fileutils.hpp"
-
-void ArkFile::get_file_info(const char* fileName,
-                            uint32_t numArrayToFindSize,
-                            uint32_t* ptrNumArrays,
-                            uint32_t* ptrNumMemoryBytes) {
-    uint32_t numArrays = 0;
-    uint32_t numMemoryBytes = 0;
-
-    std::ifstream in_file(fileName, std::ios::binary);
-    if (in_file.good()) {
-        while (!in_file.eof()) {
-            std::string line;
-            uint32_t numRows = 0u, numCols = 0u, num_bytes = 0u;
-            std::getline(in_file, line, '\0');  // read variable length name followed by space and NUL
-            std::getline(in_file, line, '\4');  // read "BFM" followed by space and control-D
-            if (line.compare("BFM ") != 0) {
-                break;
-            }
-            in_file.read(reinterpret_cast<char*>(&numRows), sizeof(uint32_t));  // read number of rows
-            std::getline(in_file, line, '\4');                                  // read control-D
-            in_file.read(reinterpret_cast<char*>(&numCols), sizeof(uint32_t));  // read number of columns
-            num_bytes = numRows * numCols * sizeof(float);
-            in_file.seekg(num_bytes, in_file.cur);  // read data
-
-            if (numArrays == numArrayToFindSize) {
-                numMemoryBytes += num_bytes;
-            }
-            numArrays++;
-        }
-        in_file.close();
-    } else {
-        throw std::runtime_error(std::string("Failed to open %s for reading in get_file_info()!\n") + fileName);
-    }
-
-    if (ptrNumArrays != NULL)
-        *ptrNumArrays = numArrays;
-    if (ptrNumMemoryBytes != NULL)
-        *ptrNumMemoryBytes = numMemoryBytes;
-}
-
-void ArkFile::load_file(const char* fileName,
-                        uint32_t arrayIndex,
-                        std::string& ptrName,
-                        std::vector<uint8_t>& memory,
-                        uint32_t* ptrNumRows,
-                        uint32_t* ptrNumColumns,
-                        uint32_t* ptrNumBytesPerElement) {
-    std::ifstream in_file(fileName, std::ios::binary);
-    if (in_file.good()) {
-        uint32_t i = 0;
-        while (i < arrayIndex) {
-            std::string line;
-            uint32_t numRows = 0u, numCols = 0u;
-            std::getline(in_file, line, '\0');  // read variable length name followed by space and NUL
-            std::getline(in_file, line, '\4');  // read "BFM" followed by space and control-D
-            if (line.compare("BFM ") != 0) {
-                break;
-            }
-            in_file.read(reinterpret_cast<char*>(&numRows), sizeof(uint32_t));  // read number of rows
-            std::getline(in_file, line, '\4');                                  // read control-D
-            in_file.read(reinterpret_cast<char*>(&numCols), sizeof(uint32_t));  // read number of columns
-            in_file.seekg(numRows * numCols * sizeof(float), in_file.cur);      // read data
-            i++;
-        }
-        if (!in_file.eof()) {
-            std::string line;
-            std::getline(in_file, ptrName, '\0');  // read variable length name followed by space and NUL
-            std::getline(in_file, line, '\4');     // read "BFM" followed by space and control-D
-            if (line.compare("BFM ") != 0) {
-                throw std::runtime_error(std::string("Cannot find array specifier in file %s in load_file()!\n") +
-                                         fileName);
-            }
-            in_file.read(reinterpret_cast<char*>(ptrNumRows), sizeof(uint32_t));     // read number of rows
-            std::getline(in_file, line, '\4');                                       // read control-D
-            in_file.read(reinterpret_cast<char*>(ptrNumColumns), sizeof(uint32_t));  // read number of columns
-            in_file.read(reinterpret_cast<char*>(&memory.front()),
-                         *ptrNumRows * *ptrNumColumns * sizeof(float));  // read array data
-        }
-        in_file.close();
-    } else {
-        throw std::runtime_error(std::string("Failed to open %s for reading in load_file()!\n") + fileName);
-    }
-
-    *ptrNumBytesPerElement = sizeof(float);
-}
-
-void ArkFile::save_file(const char* fileName,
-                        bool shouldAppend,
-                        std::string name,
-                        void* ptrMemory,
-                        uint32_t numRows,
-                        uint32_t numColumns) {
-    std::ios_base::openmode mode = std::ios::binary;
-    if (shouldAppend) {
-        mode |= std::ios::app;
-    }
-    std::ofstream out_file(fileName, mode);
-    if (out_file.good()) {
-        out_file.write(name.c_str(), name.length());  // write name
-        out_file.write("\0", 1);
-        out_file.write("BFM ", 4);
-        out_file.write("\4", 1);
-        out_file.write(reinterpret_cast<char*>(&numRows), sizeof(uint32_t));
-        out_file.write("\4", 1);
-        out_file.write(reinterpret_cast<char*>(&numColumns), sizeof(uint32_t));
-        out_file.write(reinterpret_cast<char*>(ptrMemory), numRows * numColumns * sizeof(float));
-        out_file.close();
-    } else {
-        throw std::runtime_error(std::string("Failed to open %s for writing in save_file()!\n") + fileName);
-    }
-}
-
-void NumpyFile::get_file_info(const char* fileName,
-                              uint32_t numArrayToFindSize,
-                              uint32_t* ptrNumArrays,
-                              uint32_t* ptrNumMemoryBytes) {
-    uint32_t numArrays = 0;
-    uint32_t numMemoryBytes = 0;
-
-    cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
-    auto it = my_npz1.begin();
-    std::advance(it, numArrayToFindSize);
-    if (it != my_npz1.end()) {
-        numArrays = my_npz1.size();
-        cnpy::NpyArray my_npy = it->second;
-        numMemoryBytes = my_npy.data_holder->size();
-
-        if (ptrNumArrays != NULL)
-            *ptrNumArrays = numArrays;
-        if (ptrNumMemoryBytes != NULL)
-            *ptrNumMemoryBytes = numMemoryBytes;
-    } else {
-        throw std::runtime_error(std::string("Failed to get info %s  get_file_info()!\n") + fileName);
-    }
-}
-
-void NumpyFile::load_file(const char* fileName,
-                          uint32_t arrayIndex,
-                          std::string& ptrName,
-                          std::vector<uint8_t>& memory,
-                          uint32_t* ptrNumRows,
-                          uint32_t* ptrNumColumns,
-                          uint32_t* ptrNumBytesPerElement) {
-    cnpy::npz_t my_npz1 = cnpy::npz_load(fileName);
-    auto it = my_npz1.begin();
-    std::advance(it, arrayIndex);
-    if (it != my_npz1.end()) {
-        ptrName = it->first;
-        cnpy::NpyArray my_npy = it->second;
-        *ptrNumRows = my_npy.shape[0];
-        *ptrNumColumns = my_npy.shape[1];
-
-        for (size_t i = 0; i < my_npy.data_holder->size(); i++) {
-            memory.at(i) = my_npy.data_holder->at(i);
-        }
-
-        *ptrNumBytesPerElement = sizeof(float);
-    } else {
-        throw std::runtime_error(std::string("Failed to open %s for reading in load_file()!\n") + fileName);
-    }
-}
-
-void NumpyFile::save_file(const char* fileName,
-                          bool shouldAppend,
-                          std::string name,
-                          void* ptrMemory,
-                          uint32_t numRows,
-                          uint32_t numColumns) {
-    std::string mode;
-    shouldAppend ? mode = "a" : mode = "w";
-    std::vector<size_t> shape{numRows, numColumns};
-    cnpy::npz_save(fileName, name, reinterpret_cast<float*>(ptrMemory), shape, mode);
-}
--- a/samples/cpp/speech_sample/fileutils.hpp
+++ b/samples/cpp/speech_sample/fileutils.hpp
@ -1,139 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <cnpy.h>
-
-#include <samples/common.hpp>
-#include <samples/slog.hpp>
-
-/// @brief Interface to work with files like input and output
-class BaseFile {
-public:
-    virtual void load_file(const char* fileName,
-                           uint32_t arrayIndex,
-                           std::string& ptrName,
-                           std::vector<uint8_t>& memory,
-                           uint32_t* ptrNumRows,
-                           uint32_t* ptrNumColumns,
-                           uint32_t* ptrNumBytesPerElement) = 0;
-
-    virtual void save_file(const char* fileName,
-                           bool shouldAppend,
-                           std::string name,
-                           void* ptrMemory,
-                           uint32_t numRows,
-                           uint32_t numColumns) = 0;
-
-    virtual void get_file_info(const char* fileName,
-                               uint32_t numArrayToFindSize,
-                               uint32_t* ptrNumArrays,
-                               uint32_t* ptrNumMemoryBytes) = 0;
-};
-
-/// @brief Responsible to work with .ark files
-class ArkFile : public BaseFile {
-public:
-    /**
-     * @brief Get info from Kaldi ARK speech feature vector file
-     * @param fileName .ark file name
-     * @param numArrayToFindSize number speech feature vectors in the file
-     * @param ptrNumArrays pointer to specific number array
-     * @param ptrNumMemoryBytes pointer to specific number of memory bytes
-     * @return none.
-     */
-    void get_file_info(const char* fileName,
-                       uint32_t numArrayToFindSize,
-                       uint32_t* ptrNumArrays,
-                       uint32_t* ptrNumMemoryBytes) override;
-
-    /**
-     * @brief Load Kaldi ARK speech feature vector file
-     * @param fileName .ark file name
-     * @param arrayIndex number speech feature vector in the file
-     * @param ptrName reference to variable length name
-     * @param memory reference to speech feature vector to save
-     * @param ptrNumRows pointer to number of rows to read
-     * @param ptrNumColumns pointer to number of columns to read
-     * @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default)
-     * @return none.
-     */
-    void load_file(const char* fileName,
-                   uint32_t arrayIndex,
-                   std::string& ptrName,
-                   std::vector<uint8_t>& memory,
-                   uint32_t* ptrNumRows,
-                   uint32_t* ptrNumColumns,
-                   uint32_t* ptrNumBytesPerElement) override;
-
-    /**
-     * @brief Save Kaldi ARK speech feature vector file
-     * @param fileName .ark file name
-     * @param shouldAppend bool flag to rewrite or add to the end of file
-     * @param name reference to variable length name
-     * @param ptrMemory pointer to speech feature vector to save
-     * @param numRows number of rows
-     * @param numColumns number of columns
-     * @return none.
-     */
-    void save_file(const char* fileName,
-                   bool shouldAppend,
-                   std::string name,
-                   void* ptrMemory,
-                   uint32_t numRows,
-                   uint32_t numColumns) override;
-};
-
-/// @brief Responsible to work with .npz files
-class NumpyFile : public BaseFile {
-public:
-    /**
-     * @brief Get info from Numpy* uncompressed NPZ speech feature vector file
-     * @param fileName .npz file name
-     * @param numArrayToFindSize number speech feature vectors in the file
-     * @param ptrNumArrays pointer to specific number array
-     * @param ptrNumMemoryBytes pointer to specific number of memory bytes
-     * @return none.
-     */
-    void get_file_info(const char* fileName,
-                       uint32_t numArrayToFindSize,
-                       uint32_t* ptrNumArrays,
-                       uint32_t* ptrNumMemoryBytes) override;
-
-    /**
-     * @brief Load Numpy* uncompressed NPZ speech feature vector file
-     * @param fileName .npz file name
-     * @param arrayIndex number speech feature vector in the file
-     * @param ptrName reference to variable length name
-     * @param memory reference to speech feature vector to save
-     * @param ptrNumRows pointer to number of rows to read
-     * @param ptrNumColumns pointer to number of columns to read
-     * @param ptrNumBytesPerElement pointer to number bytes per element (size of float by default)
-     * @return none.
-     */
-    void load_file(const char* fileName,
-                   uint32_t arrayIndex,
-                   std::string& ptrName,
-                   std::vector<uint8_t>& memory,
-                   uint32_t* ptrNumRows,
-                   uint32_t* ptrNumColumns,
-                   uint32_t* ptrNumBytesPerElement) override;
-
-    /**
-     * @brief Save Numpy* uncompressed NPZ speech feature vector file
-     * @param fileName .npz file name
-     * @param shouldAppend bool flag to rewrite or add to the end of file
-     * @param name reference to variable length name
-     * @param ptrMemory pointer to speech feature vector to save
-     * @param numRows number of rows
-     * @param numColumns number of columns
-     * @return none.
-     */
-    void save_file(const char* fileName,
-                   bool shouldAppend,
-                   std::string name,
-                   void* ptrMemory,
-                   uint32_t numRows,
-                   uint32_t numColumns) override;
-};
--- a/samples/cpp/speech_sample/main.cpp
+++ b/samples/cpp/speech_sample/main.cpp
@ -1,706 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-#include <time.h>
-
-#include <chrono>
-#include <fstream>
-#include <functional>
-#include <iomanip>
-#include <iostream>
-#include <limits>
-#include <map>
-#include <memory>
-#include <random>
-#include <string>
-#include <thread>
-#include <utility>
-#include <vector>
-
-// clang-format off
-#include <openvino/openvino.hpp>
-#include <openvino/runtime/intel_gna/properties.hpp>
-
-#include <samples/args_helper.hpp>
-#include <samples/slog.hpp>
-
-#include "fileutils.hpp"
-#include "speech_sample.hpp"
-#include "utils.hpp"
-// clang-format on
-
-using namespace ov::preprocess;
-
-/**
- * @brief The entry point for OpenVINO Runtime automatic speech recognition sample
- * @file speech_sample/main.cpp
- * @example speech_sample/main.cpp
- */
-int main(int argc, char* argv[]) {
-    try {
-        // ------------------------------ Get OpenVINO Runtime version ----------------------------------------------
-        slog::info << "OpenVINO runtime: " << ov::get_openvino_version() << slog::endl;
-
-        // ------------------------------ Parsing and validation of input arguments ---------------------------------
-        if (!parse_and_check_command_line(argc, argv)) {
-            return 0;
-        }
-        BaseFile* file;
-        BaseFile* fileOutput;
-        ArkFile arkFile;
-        NumpyFile numpyFile;
-        std::pair<std::string, std::vector<std::string>> input_data;
-        if (!FLAGS_i.empty())
-            input_data = parse_parameters(FLAGS_i);
-        auto extInputFile = fileExt(input_data.first);
-        if (extInputFile == "ark") {
-            file = &arkFile;
-        } else if (extInputFile == "npz") {
-            file = &numpyFile;
-        } else {
-            throw std::logic_error("Invalid input file");
-        }
-        std::vector<std::string> inputFiles;
-        std::vector<uint32_t> numBytesThisUtterance;
-        uint32_t numUtterances(0);
-        if (!input_data.first.empty()) {
-            std::string outStr;
-            std::istringstream stream(input_data.first);
-            uint32_t currentNumUtterances(0), currentNumBytesThisUtterance(0);
-            while (getline(stream, outStr, ',')) {
-                std::string filename(fileNameNoExt(outStr) + "." + extInputFile);
-                inputFiles.push_back(filename);
-                file->get_file_info(filename.c_str(), 0, &currentNumUtterances, &currentNumBytesThisUtterance);
-                if (numUtterances == 0) {
-                    numUtterances = currentNumUtterances;
-                } else if (currentNumUtterances != numUtterances) {
-                    throw std::logic_error(
-                        "Incorrect input files. Number of utterance must be the same for all input files");
-                }
-                numBytesThisUtterance.push_back(currentNumBytesThisUtterance);
-            }
-        }
-        size_t numInputFiles(inputFiles.size());
-
-        // --------------------------- Step 1. Initialize OpenVINO Runtime core and read model
-        // -------------------------------------
-        ov::Core core;
-        try {
-            const auto& gnaLibraryVersion = core.get_property("GNA", ov::intel_gna::library_full_version);
-            slog::info << "Detected GNA Library: " << gnaLibraryVersion << slog::endl;
-        } catch (std::exception& e) {
-            slog::info << "Cannot detect GNA Library version, exception: " << e.what() << slog::endl;
-        }
-        slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
-        uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs;
-        std::shared_ptr<ov::Model> model;
-        // --------------------------- Processing custom outputs ---------------------------------------------
-        const auto output_data = parse_parameters(FLAGS_o);
-        const auto reference_data = parse_parameters(FLAGS_r);
-
-        const auto outputs = get_first_non_empty(output_data.second, reference_data.second);
-
-        // ------------------------------ Preprocessing ------------------------------------------------------
-        // the preprocessing steps can be done only for loaded network and are not applicable for the imported network
-        // (already compiled)
-        if (!FLAGS_m.empty()) {
-            const auto outputs_with_ports = parse_to_extract_port(outputs);
-            model = core.read_model(FLAGS_m);
-            for (const auto& output_with_port : outputs_with_ports) {
-                auto output = model->add_output(output_with_port.first, output_with_port.second);
-                output.set_names({output_with_port.first + ":" + std::to_string(output_with_port.second)});
-            }
-            check_number_of_inputs(model->inputs().size(), numInputFiles);
-            ov::preprocess::PrePostProcessor proc(model);
-            const auto& inputs = model->inputs();
-            std::map<std::string, std::string> custom_layouts;
-            if (!FLAGS_layout.empty()) {
-                custom_layouts = parse_input_layouts(FLAGS_layout, inputs);
-            }
-            for (const auto& input : inputs) {
-                const auto& item_name = input.get_any_name();
-                auto& in = proc.input(item_name);
-                in.tensor().set_element_type(ov::element::f32);
-                // Explicitly set inputs layout
-                if (custom_layouts.count(item_name) > 0) {
-                    in.model().set_layout(ov::Layout(custom_layouts.at(item_name)));
-                }
-            }
-            for (size_t i = 0; i < model->outputs().size(); i++) {
-                proc.output(i).tensor().set_element_type(ov::element::f32);
-            }
-            model = proc.build();
-            if (FLAGS_bs) {
-                if (FLAGS_layout.empty() &&
-                    std::any_of(inputs.begin(), inputs.end(), [](const ov::Output<ov::Node>& i) {
-                        return ov::layout::get_layout(i).empty();
-                    })) {
-                    throw std::logic_error(
-                        "-bs option is set to " + std::to_string(FLAGS_bs) +
-                        " but model does not contain layout information for any input. Please "
-                        "specify it explicitly using -layout option. For example, input1[NCHW], input2[NC] or [NC]");
-                } else {
-                    ov::set_batch(model, batchSize);
-                }
-            }
-        }
-        // ------------------------------ Get Available Devices ------------------------------------------------------
-        auto isFeature = [&](const std::string xFeature) {
-            return FLAGS_d.find(xFeature) != std::string::npos;
-        };
-        bool useGna = isFeature("GNA");
-        bool useHetero = isFeature("HETERO");
-        std::string deviceStr = useHetero && useGna ? "HETERO:GNA,CPU" : FLAGS_d.substr(0, (FLAGS_d.find("_")));
-        // -----------------------------------------------------------------------------------------------------
-        // --------------------------- Set parameters and scale factors -------------------------------------
-        /** Setting parameter for per layer metrics **/
-        ov::AnyMap gnaPluginConfig;
-        ov::AnyMap genericPluginConfig;
-        if (useGna) {
-            std::string gnaDevice =
-                useHetero ? FLAGS_d.substr(FLAGS_d.find("GNA"), FLAGS_d.find(",") - FLAGS_d.find("GNA")) : FLAGS_d;
-            auto parse_gna_device = [&](const std::string& device) -> ov::intel_gna::ExecutionMode {
-                ov::intel_gna::ExecutionMode mode;
-                std::stringstream ss(device);
-                ss >> mode;
-                return mode;
-            };
-            gnaPluginConfig[ov::intel_gna::execution_mode.name()] = gnaDevice.find("_") == std::string::npos
-                                                                        ? ov::intel_gna::ExecutionMode::AUTO
-                                                                        : parse_gna_device(gnaDevice);
-        }
-        if (FLAGS_pc) {
-            genericPluginConfig.emplace(ov::enable_profiling(true));
-        }
-        if (FLAGS_q.compare("user") == 0) {
-            if (!FLAGS_rg.empty()) {
-                std::string errMessage("Custom scale factor can not be set for imported gna model: " + FLAGS_rg);
-                throw std::logic_error(errMessage);
-            } else {
-                auto scale_factors_per_input = parse_scale_factors(model->inputs(), FLAGS_sf);
-                if (numInputFiles != scale_factors_per_input.size()) {
-                    std::string errMessage("Incorrect command line for multiple inputs: " +
-                                           std::to_string(scale_factors_per_input.size()) +
-                                           " scale factors provided for " + std::to_string(numInputFiles) +
-                                           " input files.");
-                    throw std::logic_error(errMessage);
-                }
-                for (auto&& sf : scale_factors_per_input) {
-                    slog::info << "For input " << sf.first << " using scale factor of " << sf.second << slog::endl;
-                }
-                gnaPluginConfig[ov::intel_gna::scale_factors_per_input.name()] = scale_factors_per_input;
-            }
-        } else {
-            // "static" quantization with calculated scale factor
-            if (!FLAGS_rg.empty()) {
-                slog::info << "Using scale factor from provided imported gna model: " << FLAGS_rg << slog::endl;
-            } else {
-                std::map<std::string, float> scale_factors_per_input;
-                for (size_t i = 0; i < numInputFiles; i++) {
-                    auto inputFileName = inputFiles[i].c_str();
-                    std::string name;
-                    std::vector<uint8_t> ptrFeatures;
-                    uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0);
-                    file->get_file_info(inputFileName, 0, &numArrays, &numBytes);
-                    ptrFeatures.resize(numBytes);
-                    file->load_file(inputFileName,
-                                    0,
-                                    name,
-                                    ptrFeatures,
-                                    &numFrames,
-                                    &numFrameElements,
-                                    &numBytesPerElement);
-                    auto floatScaleFactor = scale_factor_for_quantization(ptrFeatures.data(),
-                                                                          MAX_VAL_2B_FEAT,
-                                                                          numFrames * numFrameElements);
-                    slog::info << "Using scale factor of " << floatScaleFactor << " calculated from first utterance."
-                               << slog::endl;
-                    scale_factors_per_input[strip_name(model->input(i).get_any_name())] = floatScaleFactor;
-                }
-                gnaPluginConfig[ov::intel_gna::scale_factors_per_input.name()] = scale_factors_per_input;
-            }
-        }
-        gnaPluginConfig[ov::hint::inference_precision.name()] = (FLAGS_qb == 8) ? ov::element::i8 : ov::element::i16;
-        const std::unordered_map<std::string, ov::intel_gna::HWGeneration> StringHWGenerationMap{
-            {"GNA_TARGET_1_0", ov::intel_gna::HWGeneration::GNA_1_0},
-            {"GNA_TARGET_1_0_E", ov::intel_gna::HWGeneration::GNA_1_0_E},
-            {"GNA_TARGET_2_0", ov::intel_gna::HWGeneration::GNA_2_0},
-            {"GNA_TARGET_3_0", ov::intel_gna::HWGeneration::GNA_3_0},
-            {"GNA_TARGET_3_1", ov::intel_gna::HWGeneration::GNA_3_1},
-            {"GNA_TARGET_3_5", ov::intel_gna::HWGeneration::GNA_3_5},
-            {"GNA_TARGET_3_5_E", ov::intel_gna::HWGeneration::GNA_3_5_E},
-            {"GNA_TARGET_3_6", ov::intel_gna::HWGeneration::GNA_3_6},
-            {"GNA_TARGET_4_0", ov::intel_gna::HWGeneration::GNA_4_0}};
-        auto parse_target = [&](const std::string& target) -> ov::intel_gna::HWGeneration {
-            auto hw_target = ov::intel_gna::HWGeneration::UNDEFINED;
-            const auto key_iter = StringHWGenerationMap.find(target);
-            if (key_iter != StringHWGenerationMap.end()) {
-                hw_target = key_iter->second;
-            } else if (!target.empty()) {
-                slog::warn << "Unsupported target: " << target << slog::endl;
-            }
-            return hw_target;
-        };
-
-        gnaPluginConfig[ov::intel_gna::execution_target.name()] = parse_target(FLAGS_exec_target);
-        gnaPluginConfig[ov::intel_gna::compile_target.name()] = parse_target(FLAGS_compile_target);
-        gnaPluginConfig[ov::intel_gna::memory_reuse.name()] = !FLAGS_memory_reuse_off;
-        gnaPluginConfig[ov::intel_gna::pwl_max_error_percent.name()] = FLAGS_pwl_me;
-        gnaPluginConfig[ov::log::level.name()] = FLAGS_log;
-        // -----------------------------------------------------------------------------------------------------
-        // --------------------------- Write model to file --------------------------------------------------
-        // Embedded GNA model dumping (for Intel(R) Speech Enabling Developer Kit)
-        if (!FLAGS_we.empty()) {
-            gnaPluginConfig[ov::intel_gna::firmware_model_image_path.name()] = FLAGS_we;
-        }
-        // -----------------------------------------------------------------------------------------------------
-        // --------------------------- Step 2. Loading model to the device ------------------------------------------
-        if (useGna) {
-            if (useHetero) {
-                genericPluginConfig.insert(ov::device::properties("GNA", gnaPluginConfig));
-            } else {
-                genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig));
-            }
-        }
-        auto t0 = Time::now();
-        ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
-        slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
-        ov::CompiledModel executableNet;
-        if (!FLAGS_m.empty()) {
-            slog::info << "Loading model to the device " << FLAGS_d << slog::endl;
-            executableNet = core.compile_model(model, deviceStr, genericPluginConfig);
-        } else {
-            slog::info << "Importing model to the device" << slog::endl;
-            std::ifstream streamrq(FLAGS_rg, std::ios_base::binary | std::ios_base::in);
-            if (!streamrq.is_open()) {
-                throw std::runtime_error("Cannot open model file " + FLAGS_rg);
-            }
-            executableNet = core.import_model(streamrq, deviceStr, genericPluginConfig);
-            // loading batch from exported model
-            const auto& imported_inputs = executableNet.inputs();
-            if (std::any_of(imported_inputs.begin(), imported_inputs.end(), [](const ov::Output<const ov::Node>& i) {
-                    return ov::layout::get_layout(i).empty();
-                })) {
-                slog::warn << "No batch dimension was found at any input, assuming batch to be 1." << slog::endl;
-                batchSize = 1;
-            } else {
-                for (auto& info : imported_inputs) {
-                    auto imported_layout = ov::layout::get_layout(info);
-                    if (ov::layout::has_batch(imported_layout)) {
-                        batchSize = (uint32_t)info.get_shape()[ov::layout::batch_idx(imported_layout)];
-                        break;
-                    }
-                }
-            }
-        }
-        // --------------------------- Exporting gna model using OpenVINO API---------------------
-        if (!FLAGS_wg.empty()) {
-            slog::info << "Writing GNA Model to file " << FLAGS_wg << slog::endl;
-            t0 = Time::now();
-            std::ofstream streamwq(FLAGS_wg, std::ios_base::binary | std::ios::out);
-            executableNet.export_model(streamwq);
-            ms exportTime = std::chrono::duration_cast<ms>(Time::now() - t0);
-            slog::info << "Exporting time " << exportTime.count() << " ms" << slog::endl;
-            return 0;
-        }
-        if (!FLAGS_we.empty()) {
-            slog::info << "Exported GNA embedded model to file " << FLAGS_we << slog::endl;
-            if (!FLAGS_compile_target.empty()) {
-                slog::info << "GNA embedded model target: " << FLAGS_compile_target << slog::endl;
-            }
-            return 0;
-        }
-        // ---------------------------------------------------------------------------------------------------------
-        // --------------------------- Step 3. Create infer request
-        // --------------------------------------------------
-        std::vector<InferRequestStruct> inferRequests(1);
-
-        for (auto& inferRequest : inferRequests) {
-            inferRequest = {executableNet.create_infer_request(), -1, batchSize};
-        }
-        // --------------------------- Step 4. Configure input & output
-        // --------------------------------------------------
-        std::vector<ov::Tensor> ptrInputBlobs;
-        auto cInputInfo = executableNet.inputs();
-        check_number_of_inputs(cInputInfo.size(), numInputFiles);
-        if (!input_data.second.empty()) {
-            std::vector<std::string> inputNameBlobs = input_data.second;
-            if (inputNameBlobs.size() != cInputInfo.size()) {
-                std::string errMessage(std::string("Number of network inputs ( ") + std::to_string(cInputInfo.size()) +
-                                       " ) is not equal to the number of inputs entered in the -i argument ( " +
-                                       std::to_string(inputNameBlobs.size()) + " ).");
-                throw std::logic_error(errMessage);
-            }
-            for (const auto& input : inputNameBlobs) {
-                ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input);
-                if (!blob) {
-                    std::string errMessage("No blob with name : " + input);
-                    throw std::logic_error(errMessage);
-                }
-                ptrInputBlobs.push_back(blob);
-            }
-        } else {
-            for (const auto& input : cInputInfo) {
-                ptrInputBlobs.push_back(inferRequests.begin()->inferRequest.get_tensor(input));
-            }
-        }
-        std::vector<std::string> output_name_files;
-        std::vector<std::string> reference_name_files;
-        size_t count_file = 1;
-        if (!output_data.first.empty()) {
-            output_name_files = convert_str_to_vector(output_data.first);
-            if (output_name_files.size() != outputs.size() && outputs.size()) {
-                throw std::logic_error("The number of output files is not equal to the number of network outputs.");
-            }
-            count_file = output_name_files.size();
-            if (executableNet.outputs().size() > 1 && output_data.second.empty() && count_file == 1) {
-                throw std::logic_error("-o is ambiguous: the model has multiple outputs but only one file provided "
-                                       "without output name specification");
-            }
-        }
-        if (!reference_data.first.empty()) {
-            reference_name_files = convert_str_to_vector(reference_data.first);
-            if (reference_name_files.size() != outputs.size() && outputs.size()) {
-                throw std::logic_error("The number of reference files is not equal to the number of network outputs.");
-            }
-            count_file = reference_name_files.size();
-            if (executableNet.outputs().size() > 1 && reference_data.second.empty() && count_file == 1) {
-                throw std::logic_error("-r is ambiguous: the model has multiple outputs but only one file provided "
-                                       "without output name specification");
-            }
-        }
-        if (count_file > executableNet.outputs().size()) {
-            throw std::logic_error(
-                "The number of output/reference files is not equal to the number of network outputs.");
-        }
-        // -----------------------------------------------------------------------------------------------------
-        // --------------------------- Step 5. Do inference --------------------------------------------------------
-        std::vector<std::vector<uint8_t>> ptrUtterances;
-        const auto effective_outputs_size = outputs.size() ? outputs.size() : executableNet.outputs().size();
-        std::vector<std::vector<uint8_t>> vectorPtrScores(effective_outputs_size);
-        std::vector<uint16_t> numScoresPerOutput(effective_outputs_size);
-
-        std::vector<std::vector<uint8_t>> vectorPtrReferenceScores(reference_name_files.size());
-        std::vector<ScoreErrorT> vectorFrameError(reference_name_files.size()),
-            vectorTotalError(reference_name_files.size());
-        ptrUtterances.resize(inputFiles.size());
-        // initialize memory state before starting
-        for (auto&& state : inferRequests.begin()->inferRequest.query_state()) {
-            state.reset();
-        }
-        /** Work with each utterance **/
-        for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
-            std::map<std::string, ov::ProfilingInfo> utterancePerfMap;
-            uint64_t totalNumberOfRunsOnHw = 0;
-            std::string uttName;
-            uint32_t numFrames(0), n(0);
-            std::vector<uint32_t> numFrameElementsInput;
-            std::vector<uint32_t> numFramesReference(reference_name_files.size()),
-                numFrameElementsReference(reference_name_files.size()),
-                numBytesPerElementReference(reference_name_files.size()),
-                numBytesReferenceScoreThisUtterance(reference_name_files.size());
-
-            /** Get information from input file for current utterance **/
-            numFrameElementsInput.resize(numInputFiles);
-            for (size_t i = 0; i < inputFiles.size(); i++) {
-                std::vector<uint8_t> ptrUtterance;
-                auto inputFilename = inputFiles[i].c_str();
-                uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0);
-                file->get_file_info(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]);
-                ptrUtterance.resize(numBytesThisUtterance[i]);
-                file->load_file(inputFilename,
-                                utteranceIndex,
-                                uttName,
-                                ptrUtterance,
-                                &currentNumFrames,
-                                &currentNumFrameElementsInput,
-                                &currentNumBytesPerElementInput);
-                if (numFrames == 0) {
-                    numFrames = currentNumFrames;
-                } else if (numFrames != currentNumFrames) {
-                    std::string errMessage("Number of frames in input files is different: " +
-                                           std::to_string(numFrames) + " and " + std::to_string(currentNumFrames));
-                    throw std::logic_error(errMessage);
-                }
-                ptrUtterances[i] = ptrUtterance;
-                numFrameElementsInput[i] = currentNumFrameElementsInput;
-            }
-            int i = 0;
-            for (auto& ptrInputBlob : ptrInputBlobs) {
-                if (ptrInputBlob.get_size() != numFrameElementsInput[i++] * batchSize) {
-                    throw std::logic_error("network input size(" + std::to_string(ptrInputBlob.get_size()) +
-                                           ") mismatch to input file size (" +
-                                           std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")");
-                }
-            }
-
-            double totalTime = 0.0;
-
-            for (size_t errorIndex = 0; errorIndex < vectorFrameError.size(); errorIndex++) {
-                clear_score_error(&vectorTotalError[errorIndex]);
-                vectorTotalError[errorIndex].threshold = vectorFrameError[errorIndex].threshold = MAX_SCORE_DIFFERENCE;
-            }
-
-            std::vector<uint8_t*> inputFrame;
-            for (auto& ut : ptrUtterances) {
-                inputFrame.push_back(&ut.front());
-            }
-            std::map<std::string, ov::ProfilingInfo> callPerfMap;
-            size_t frameIndex = 0;
-            uint32_t numFramesFile = numFrames;
-            numFrames += FLAGS_cw_l + FLAGS_cw_r;
-            uint32_t numFramesThisBatch{batchSize};
-            auto t0 = Time::now();
-            auto t1 = t0;
-
-            BaseFile* fileReferenceScores;
-            std::string refUtteranceName;
-
-            if (!reference_data.first.empty()) {
-                /** Read file with reference scores **/
-                auto exReferenceScoresFile = fileExt(reference_data.first);
-                if (exReferenceScoresFile == "ark") {
-                    fileReferenceScores = &arkFile;
-                } else if (exReferenceScoresFile == "npz") {
-                    fileReferenceScores = &numpyFile;
-                } else {
-                    throw std::logic_error("Invalid Reference Scores file");
-                }
-                for (size_t next_output = 0; next_output < count_file; next_output++) {
-                    if (fileReferenceScores != nullptr) {
-                        fileReferenceScores->get_file_info(reference_name_files[next_output].c_str(),
-                                                           utteranceIndex,
-                                                           &n,
-                                                           &numBytesReferenceScoreThisUtterance[next_output]);
-                        vectorPtrReferenceScores[next_output].resize(numBytesReferenceScoreThisUtterance[next_output]);
-                        fileReferenceScores->load_file(reference_name_files[next_output].c_str(),
-                                                       utteranceIndex,
-                                                       refUtteranceName,
-                                                       vectorPtrReferenceScores[next_output],
-                                                       &numFramesReference[next_output],
-                                                       &numFrameElementsReference[next_output],
-                                                       &numBytesPerElementReference[next_output]);
-                    }
-                }
-            }
-
-            while (frameIndex <= numFrames) {
-                if (frameIndex == numFrames) {
-                    if (std::find_if(inferRequests.begin(), inferRequests.end(), [&](InferRequestStruct x) {
-                            return (x.frameIndex != -1);
-                        }) == inferRequests.end()) {
-                        break;
-                    }
-                }
-                bool inferRequestFetched = false;
-                /** Start inference loop **/
-                for (auto& inferRequest : inferRequests) {
-                    if (frameIndex == numFrames) {
-                        numFramesThisBatch = 1;
-                    } else {
-                        numFramesThisBatch =
-                            (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize;
-                    }
-
-                    /* waits until inference result becomes available */
-                    if (inferRequest.frameIndex != -1) {
-                        inferRequest.inferRequest.wait();
-                        if (inferRequest.frameIndex >= 0)
-                            for (size_t next_output = 0; next_output < count_file; next_output++) {
-                                const auto output_name = outputs.size() > next_output
-                                                             ? outputs[next_output]
-                                                             : executableNet.output(next_output).get_any_name();
-                                auto dims = executableNet.output(output_name).get_shape();
-                                numScoresPerOutput[next_output] = std::accumulate(std::begin(dims),
-                                                                                  std::end(dims),
-                                                                                  size_t{1},
-                                                                                  std::multiplies<size_t>());
-
-                                vectorPtrScores[next_output].resize(numFramesFile * numScoresPerOutput[next_output] *
-                                                                    sizeof(float));
-
-                                if (!FLAGS_o.empty()) {
-                                    /* Prepare output data for save to file in future */
-                                    auto outputFrame = &vectorPtrScores[next_output].front() +
-                                                       numScoresPerOutput[next_output] * sizeof(float) *
-                                                           (inferRequest.frameIndex) / batchSize;
-
-                                    ov::Tensor outputBlob =
-                                        inferRequest.inferRequest.get_tensor(executableNet.output(output_name));
-                                    // locked memory holder should be alive all time while access to its buffer happens
-                                    auto byteSize = numScoresPerOutput[next_output] * sizeof(float);
-                                    std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
-                                }
-                                if (!FLAGS_r.empty()) {
-                                    /** Compare output data with reference scores **/
-                                    ov::Tensor outputBlob =
-                                        inferRequest.inferRequest.get_tensor(executableNet.output(output_name));
-
-                                    if (numScoresPerOutput[next_output] / numFrameElementsReference[next_output] ==
-                                        batchSize) {
-                                        compare_scores(
-                                            outputBlob.data<float>(),
-                                            &vectorPtrReferenceScores[next_output]
-                                                                     [inferRequest.frameIndex *
-                                                                      numFrameElementsReference[next_output] *
-                                                                      numBytesPerElementReference[next_output]],
-                                            &vectorFrameError[next_output],
-                                            inferRequest.numFramesThisBatch,
-                                            numFrameElementsReference[next_output]);
-                                        update_score_error(&vectorFrameError[next_output],
-                                                           &vectorTotalError[next_output]);
-                                    } else {
-                                        throw std::logic_error("Number of output and reference frames does not match.");
-                                    }
-                                }
-                                if (FLAGS_pc) {
-                                    // retrieve new counters
-                                    get_performance_counters(inferRequest.inferRequest, callPerfMap);
-                                    // summarize retrieved counters with all previous
-                                    sum_performance_counters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
-                                }
-                            }
-                        // -----------------------------------------------------------------------------------------------------
-                    }
-                    if (frameIndex == numFrames) {
-                        inferRequest.frameIndex = -1;
-                        continue;
-                    }
-                    ptrInputBlobs.clear();
-                    if (input_data.second.empty()) {
-                        for (auto& input : cInputInfo) {
-                            ptrInputBlobs.push_back(inferRequest.inferRequest.get_tensor(input));
-                        }
-                    } else {
-                        std::vector<std::string> inputNameBlobs = input_data.second;
-                        for (const auto& input : inputNameBlobs) {
-                            ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input);
-                            if (!blob) {
-                                std::string errMessage("No blob with name : " + input);
-                                throw std::logic_error(errMessage);
-                            }
-                            ptrInputBlobs.push_back(blob);
-                        }
-                    }
-
-                    /** Iterate over all the input blobs **/
-                    for (size_t i = 0; i < numInputFiles; ++i) {
-                        ov::Tensor minput = ptrInputBlobs[i];
-                        if (!minput) {
-                            std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) +
-                                                   "] to be inherited from Tensor, " +
-                                                   "but in fact we were not able to cast input to Tensor");
-                            throw std::logic_error(errMessage);
-                        }
-                        memcpy(minput.data(),
-                               inputFrame[i],
-                               numFramesThisBatch * numFrameElementsInput[i] * sizeof(float));
-                        // Used to infer fewer frames than the batch size
-                        if (batchSize != numFramesThisBatch) {
-                            memset(minput.data<float>() + numFramesThisBatch * numFrameElementsInput[i],
-                                   0,
-                                   (batchSize - numFramesThisBatch) * numFrameElementsInput[i]);
-                        }
-                    }
-                    // -----------------------------------------------------------------------------------------------------
-                    int index = static_cast<int>(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r);
-                    /* Starting inference in asynchronous mode*/
-                    inferRequest.inferRequest.start_async();
-                    inferRequest.frameIndex = index < 0 ? -2 : index;
-                    inferRequest.numFramesThisBatch = numFramesThisBatch;
-                    frameIndex += numFramesThisBatch;
-                    for (size_t j = 0; j < inputFiles.size(); j++) {
-                        if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
-                            int idx = frameIndex - FLAGS_cw_l;
-                            if (idx > 0 && idx < static_cast<int>(numFramesFile)) {
-                                inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                            } else if (idx >= static_cast<int>(numFramesFile)) {
-                                inputFrame[j] = &ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) *
-                                                                                numFrameElementsInput[j] *
-                                                                                numFramesThisBatch;
-                            } else if (idx <= 0) {
-                                inputFrame[j] = &ptrUtterances[j].front();
-                            }
-                        } else {
-                            inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
-                        }
-                    }
-                    inferRequestFetched |= true;
-                }
-                /** Inference was finished for current frame **/
-                if (!inferRequestFetched) {
-                    std::this_thread::sleep_for(std::chrono::milliseconds(1));
-                    continue;
-                }
-            }
-            t1 = Time::now();
-            fsec fs = t1 - t0;
-            ms d = std::chrono::duration_cast<ms>(fs);
-            totalTime += d.count();
-            // resetting state between utterances
-            for (auto&& state : inferRequests.begin()->inferRequest.query_state()) {
-                state.reset();
-            }
-            // -----------------------------------------------------------------------------------------------------
-
-            // --------------------------- Step 6. Process output
-            // -------------------------------------------------------
-
-            /** Show performance results **/
-            std::cout << "Utterance " << utteranceIndex << ": " << std::endl;
-            std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" << std::endl;
-            std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" << std::endl;
-            std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms\n"
-                      << std::endl;
-
-            if (FLAGS_pc) {
-                // print performance results
-                print_performance_counters(utterancePerfMap,
-                                           frameIndex,
-                                           std::cout,
-                                           getFullDeviceName(core, FLAGS_d),
-                                           totalNumberOfRunsOnHw,
-                                           FLAGS_d);
-            }
-
-            for (size_t next_output = 0; next_output < count_file; next_output++) {
-                if (!FLAGS_o.empty()) {
-                    auto exOutputScoresFile = fileExt(output_data.first);
-                    if (exOutputScoresFile == "ark") {
-                        fileOutput = &arkFile;
-                    } else if (exOutputScoresFile == "npz") {
-                        fileOutput = &numpyFile;
-                    } else {
-                        throw std::logic_error("Invalid Reference Scores file");
-                    }
-                    /* Save output data to file */
-                    bool shouldAppend = (utteranceIndex == 0) ? false : true;
-                    fileOutput->save_file(output_name_files[next_output].c_str(),
-                                          shouldAppend,
-                                          uttName,
-                                          &vectorPtrScores[next_output].front(),
-                                          numFramesFile,
-                                          numScoresPerOutput[next_output] / batchSize);
-                }
-                if (!FLAGS_r.empty()) {
-                    // print statistical score error
-                    const auto output_name = outputs.size() > next_output
-                                                 ? outputs[next_output]
-                                                 : executableNet.output(next_output).get_any_name();
-                    std::cout << "Output name: " << output_name << std::endl;
-                    std::cout << "Number scores per frame: " << numScoresPerOutput[next_output] / batchSize << std::endl
-                              << std::endl;
-                    print_reference_compare_results(vectorTotalError[next_output], numFrames, std::cout);
-                }
-            }
-        }
-    } catch (const std::exception& error) {
-        slog::err << error.what() << slog::endl;
-        return 1;
-    } catch (...) {
-        slog::err << "Unknown/internal exception happened" << slog::endl;
-        return 1;
-    }
-    slog::info << "Execution successful" << slog::endl;
-    return 0;
-}
--- a/samples/cpp/speech_sample/speech_sample.hpp
+++ b/samples/cpp/speech_sample/speech_sample.hpp
@ -1,310 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <gflags/gflags.h>
-
-#include <iostream>
-#include <string>
-#include <vector>
-
-/// @brief message for help argument
-static const char help_message[] = "Print a usage message.";
-
-/// @brief message for input data argument
-static const char input_message[] = "Required. Path(s) to input file(s). "
-                                    "Usage for a single file/layer: <input_file.ark> or <input_file.npz>. "
-                                    "Example of usage for several files/layers: "
-                                    "<layer1>:<port_num1>=<input_file1.ark>,<layer2>:<port_num2>=<input_file2.ark>.";
-
-/// @brief message for model argument
-static const char model_message[] = "Required. Path to an .xml file with a trained model (required if -rg is missing).";
-
-/// @brief message for assigning calculation to device
-static const char target_device_message[] =
-    "Optional. Specify a target device to infer on. CPU, GPU, NPU, GNA_AUTO, GNA_HW, "
-    "GNA_HW_WITH_SW_FBACK, GNA_SW_FP32, "
-    "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
-    " as a secondary (e.g. HETERO:GNA,CPU) are supported. "
-    "The sample will look for a suitable plugin for device specified.";
-
-/// @brief message for execution target
-static const char execution_target_message[] =
-    "Optional. Specify GNA execution target generation. "
-    "May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. "
-    "By default, generation corresponds to the GNA HW available in the system "
-    "or the latest fully supported generation by the software. "
-    "See the GNA Plugin's GNA_EXEC_TARGET config option description.";
-
-/// @brief message for compile target
-static const char compile_target_message[] = "Optional. Specify GNA compile target generation. "
-                                             "May be one of GNA_TARGET_2_0, GNA_TARGET_3_0. "
-                                             "By default, generation corresponds to the GNA HW available in the system "
-                                             "or the latest fully supported generation by the software. "
-                                             "See the GNA Plugin's GNA_COMPILE_TARGET config option description.";
-
-/// @brief message for enabling GNA log
-static const char enable_log_message[] = "Optional. Enable GNA logging, which may give additional info "
-                                         "about potential issues found in network. "
-                                         "By default logging is disabled.";
-
-/// @brief message for performance counters
-static const char performance_counter_message[] = "Optional. Enables per-layer performance report.";
-
-/// @brief message for disabling of compact (memory_reuse) mode
-static const char memory_reuse_message[] = "Optional. Disables memory optimizations for compiled model.";
-
-/// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for CPU plugin custom layers."
-                                                 "Absolute path to a shared library with the kernels implementations.";
-
-/// @brief message for score output argument
-static const char output_message[] = "Optional. Output file name(s) to save scores (inference results). "
-                                     "Usage for a single file/layer: <output_file.ark> or <output_file.npz>. "
-                                     "Example of usage for several files/layers: "
-                                     "<layer1>:<port_num1>=<output_file1.ark>,<layer2>:<port_num2>=<output_file2.ark>.";
-
-/// @brief message for reference score file argument
-static const char reference_score_message[] =
-    "Optional. Read reference score file(s) and compare inference results with reference scores. "
-    "Usage for a single file/layer: <reference_file.ark> or <reference_file.npz>. "
-    "Example of usage for several files/layers: "
-    "<layer1>:<port_num1>=<reference_file1.ark>,<layer2>:<port_num2>=<reference_file2.ark>.";
-
-/// @brief message for read GNA model argument
-static const char read_gna_model_message[] =
-    "Read GNA model from file using path/filename provided (required if -m is missing).";
-
-/// @brief message for write GNA model argument
-static const char write_gna_model_message[] = "Optional. Write GNA model to file using path/filename provided.";
-
-/// @brief message for write GNA embedded model argument
-static const char write_embedded_model_message[] =
-    "Optional. Write GNA embedded model to file using path/filename provided.";
-
-/// @brief message for write GNA embedded model generation argument
-static const char write_embedded_model_generation_message[] =
-    "Optional. GNA generation configuration string for embedded export."
-    "Can be GNA1 (default) or GNA3.";
-
-/// @brief message for quantization argument
-static const char quantization_message[] =
-    "Optional. Input quantization mode for GNA: static (default) or user defined (use with -sf).";
-
-/// @brief message for quantization bits argument
-static const char quantization_bits_message[] =
-    "Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default)";
-
-/// @brief message for scale factor argument
-static const char scale_factor_message[] =
-    "Optional. User-specified input scale factor for GNA quantization (use with -q user). "
-    "If the model contains multiple inputs, provide scale factors by separating them with commas. "
-    "For example: <layer1>:<sf1>,<layer2>:<sf2> or just <sf> to be applied to all inputs.";
-
-/// @brief message for batch size argument
-static const char batch_size_message[] = "Optional. Batch size 1-8 (default 1)";
-
-/// @brief message for left context window argument
-static const char context_window_message_l[] =
-    "Optional. Number of frames for left context windows (default is 0). "
-    "Works only with context window networks."
-    " If you use the cw_l or cw_r flag, then batch size argument is ignored.";
-
-/// @brief message for right context window argument
-static const char context_window_message_r[] =
-    "Optional. Number of frames for right context windows (default is 0). "
-    "Works only with context window networks."
-    " If you use the cw_r or cw_l flag, then batch size argument is ignored.";
-
-/// @brief message for inputs layer names
-static const char layout_message[] =
-    "Optional. Prompts how network layouts should be treated by application. "
-    "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size.";
-;
-
-/// @brief message for PWL max error percent
-static const char pwl_max_error_percent_message[] = "Optional. The maximum percent of error for PWL function."
-                                                    "The value must be in <0, 100> range. The default value is 1.0.";
-
-/// \brief Define flag for showing help message <br>
-DEFINE_bool(h, false, help_message);
-
-/// \brief Define flag for disabling compact (memory_reuse) mode <br>
-DEFINE_bool(memory_reuse_off, false, memory_reuse_message);
-
-/// \brief Define parameter for set image file <br>
-/// It is a required parameter
-DEFINE_string(i, "", input_message);
-
-/// \brief Define parameter for set model file <br>
-/// It is a required parameter
-DEFINE_string(m, "", model_message);
-
-/// \brief device the target device to infer on (default CPU) <br>
-DEFINE_string(d, "CPU", target_device_message);
-
-/// \brief GNA execution target <br>
-DEFINE_string(exec_target, "", execution_target_message);
-
-/// \brief GNA compile target <br>
-DEFINE_string(compile_target, "", compile_target_message);
-
-/// \brief GNA log level (default LOG_NONE) <br>
-DEFINE_string(log, "LOG_NONE", enable_log_message);
-
-/// \brief Enable per-layer performance report
-DEFINE_bool(pc, false, performance_counter_message);
-
-/// @brief Write output file to save ark scores
-DEFINE_string(o, "", output_message);
-
-/// @brief Read reference score file
-DEFINE_string(r, "", reference_score_message);
-
-/// @brief Read GNA model from file (model.bin)
-DEFINE_string(rg, "", read_gna_model_message);
-
-/// @brief Write GNA model to file (model.bin)
-DEFINE_string(wg, "", write_gna_model_message);
-
-/// @brief Write GNA embedded model to file (model.bin)
-DEFINE_string(we, "", write_embedded_model_message);
-
-/// @brief Input quantization mode (default static)
-DEFINE_string(q, "static", quantization_message);
-
-/// @brief Weight resolution in bits (default 16)
-DEFINE_int32(qb, 16, quantization_bits_message);
-
-/// @brief Scale factor for quantization
-DEFINE_string(sf, "", scale_factor_message);
-
-/// @brief Batch size (default 0)
-DEFINE_int32(bs, 0, batch_size_message);
-
-/// @brief Right context window size (default 0)
-DEFINE_int32(cw_r, 0, context_window_message_r);
-
-/// @brief Left context window size (default 0)
-DEFINE_int32(cw_l, 0, context_window_message_l);
-
-/// @brief Input layer name
-DEFINE_string(layout, "", layout_message);
-
-/// @brief PWL max error percent
-DEFINE_double(pwl_me, 1.0, pwl_max_error_percent_message);
-
-/**
- * \brief This function show a help message
- */
-static void show_usage() {
-    std::cout << std::endl;
-    std::cout << "speech_sample [OPTION]" << std::endl;
-    std::cout << "Options:" << std::endl;
-    std::cout << std::endl;
-    std::cout << "    -h                         " << help_message << std::endl;
-    std::cout << "    -i \"<path>\"                " << input_message << std::endl;
-    std::cout << "    -m \"<path>\"                " << model_message << std::endl;
-    std::cout << "    -o \"<path>\"                " << output_message << std::endl;
-    std::cout << "    -d \"<device>\"              " << target_device_message << std::endl;
-    std::cout << "    -pc                        " << performance_counter_message << std::endl;
-    std::cout << "    -q \"<mode>\"                " << quantization_message << std::endl;
-    std::cout << "    -qb \"<integer>\"            " << quantization_bits_message << std::endl;
-    std::cout << "    -sf \"<double>\"             " << scale_factor_message << std::endl;
-    std::cout << "    -bs \"<integer>\"            " << batch_size_message << std::endl;
-    std::cout << "    -r \"<path>\"                " << reference_score_message << std::endl;
-    std::cout << "    -rg \"<path>\"               " << read_gna_model_message << std::endl;
-    std::cout << "    -wg \"<path>\"               " << write_gna_model_message << std::endl;
-    std::cout << "    -we \"<path>\"               " << write_embedded_model_message << std::endl;
-    std::cout << "    -cw_l \"<integer>\"          " << context_window_message_l << std::endl;
-    std::cout << "    -cw_r \"<integer>\"          " << context_window_message_r << std::endl;
-    std::cout << "    -layout \"<string>\"         " << layout_message << std::endl;
-    std::cout << "    -pwl_me \"<double>\"         " << pwl_max_error_percent_message << std::endl;
-    std::cout << "    -exec_target \"<string>\"    " << execution_target_message << std::endl;
-    std::cout << "    -compile_target \"<string>\" " << compile_target_message << std::endl;
-    std::cout << "    -memory_reuse_off          " << memory_reuse_message << std::endl;
-}
-
-/**
- * @brief Checks input arguments
- * @param argc number of args
- * @param argv list of input arguments
- * @return bool status true(Success) or false(Fail)
- */
-bool parse_and_check_command_line(int argc, char* argv[]) {
-    slog::info << "Parsing input parameters" << slog::endl;
-
-    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
-    if (FLAGS_h) {
-        show_usage();
-        showAvailableDevices();
-        return false;
-    }
-    bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty();
-
-    // input not required only in dump mode and if external scale factor provided
-    if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) {
-        show_usage();
-        if (isDumpMode) {
-            throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale"
-                                   " factor need to be provided. See -q user option");
-        }
-        throw std::logic_error("Input file not set. Please use -i.");
-    }
-
-    if (FLAGS_m.empty() && FLAGS_rg.empty()) {
-        show_usage();
-        throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set.");
-    }
-
-    if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) {
-        throw std::logic_error("Only one of -m and -rg is allowed.");
-    }
-
-    std::vector<std::string> supportedDevices = {"CPU",
-                                                 "GPU",
-                                                 "GNA_AUTO",
-                                                 "GNA_HW",
-                                                 "GNA_HW_WITH_SW_FBACK",
-                                                 "GNA_SW_EXACT",
-                                                 "GNA_SW_FP32",
-                                                 "HETERO:GNA,CPU",
-                                                 "HETERO:GNA_HW,CPU",
-                                                 "HETERO:GNA_SW_EXACT,CPU",
-                                                 "HETERO:GNA_SW_FP32,CPU",
-                                                 "NPU"};
-
-    if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) {
-        throw std::logic_error("Specified device is not supported.");
-    }
-
-    uint32_t batchSize = (uint32_t)FLAGS_bs;
-    if (batchSize && ((batchSize < 1) || (batchSize > 8))) {
-        throw std::logic_error("Batch size out of range (1..8).");
-    }
-
-    /** default is a static quantization **/
-    if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("user") != 0)) {
-        throw std::logic_error("Quantization mode not supported (static, user).");
-    }
-
-    if (FLAGS_qb != 16 && FLAGS_qb != 8) {
-        throw std::logic_error("Only 8 or 16 bits supported.");
-    }
-
-    if (FLAGS_cw_r < 0) {
-        throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0");
-    }
-
-    if (FLAGS_cw_l < 0) {
-        throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0");
-    }
-
-    if (FLAGS_pwl_me < 0.0 || FLAGS_pwl_me > 100.0) {
-        throw std::logic_error("Invalid value for 'pwl_me' argument. It must be greater than 0.0 and less than 100.0");
-    }
-
-    return true;
-}
--- a/samples/cpp/speech_sample/utils.hpp
+++ b/samples/cpp/speech_sample/utils.hpp
@ -1,542 +0,0 @@
-// Copyright (C) 2018-2023 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <cnpy.h>
-
-#include <samples/common.hpp>
-
-#define MAX_SCORE_DIFFERENCE 0.0001f  // max score difference for frame error threshold
-#define MAX_VAL_2B_FEAT      16384    // max to find scale factor
-
-typedef std::chrono::high_resolution_clock Time;
-typedef std::chrono::duration<double, std::ratio<1, 1000>> ms;
-typedef std::chrono::duration<float> fsec;
-
-/**
- * @brief struct to store score error
- */
-struct ScoreErrorT {
-    uint32_t numScores;
-    uint32_t numErrors;
-    float threshold;
-    float maxError;
-    float rmsError;
-    float sumError;
-    float sumRmsError;
-    float sumSquaredError;
-    float maxRelError;
-    float sumRelError;
-    float sumSquaredRelError;
-    float maxAbsRefScore;
-    float sumAbsRefScore;
-};
-
-/**
- * @brief struct to store infer request data per frame
- */
-struct InferRequestStruct {
-    ov::InferRequest inferRequest;
-    int frameIndex;
-    uint32_t numFramesThisBatch;
-};
-
-/**
- * @brief Check number of input files and model network inputs
- * @param numInputs number model inputs
- * @param numInputFiles number of input files
- * @return none.
- */
-void check_number_of_inputs(size_t numInputs, size_t numInputFiles) {
-    if (numInputs != numInputFiles) {
-        throw std::logic_error("Number of network inputs (" + std::to_string(numInputs) +
-                               ")"
-                               " is not equal to number of input files (" +
-                               std::to_string(numInputFiles) + ")");
-    }
-}
-
-/**
- * @brief Get scale factor for quantization
- * @param ptrFloatMemory pointer to float memory with speech feature vector
- * @param targetMax max scale factor
- * @param numElements number of elements in speech feature vector
- * @return scale factor
- */
-float scale_factor_for_quantization(void* ptrFloatMemory, float targetMax, uint32_t numElements) {
-    float* ptrFloatFeat = reinterpret_cast<float*>(ptrFloatMemory);
-    float max = 0.0;
-    float scaleFactor;
-
-    for (uint32_t i = 0; i < numElements; i++) {
-        if (fabs(ptrFloatFeat[i]) > max) {
-            max = fabs(ptrFloatFeat[i]);
-        }
-    }
-
-    if (max == 0) {
-        scaleFactor = 1.0;
-    } else {
-        scaleFactor = targetMax / max;
-    }
-
-    return (scaleFactor);
-}
-
-/**
- * @brief Clean score error
- * @param error pointer to score error struct
- * @return none.
- */
-void clear_score_error(ScoreErrorT* error) {
-    error->numScores = 0;
-    error->numErrors = 0;
-    error->maxError = 0.0;
-    error->rmsError = 0.0;
-    error->sumError = 0.0;
-    error->sumRmsError = 0.0;
-    error->sumSquaredError = 0.0;
-    error->maxRelError = 0.0;
-    error->sumRelError = 0.0;
-    error->sumSquaredRelError = 0.0;
-    error->maxAbsRefScore = 0.0;
-    error->sumAbsRefScore = 0.0;
-}
-
-/**
- * @brief Update total score error
- * @param error pointer to score error struct
- * @param totalError pointer to total score error struct
- * @return none.
- */
-void update_score_error(ScoreErrorT* error, ScoreErrorT* totalError) {
-    totalError->numErrors += error->numErrors;
-    totalError->numScores += error->numScores;
-    totalError->sumRmsError += error->rmsError;
-    totalError->sumError += error->sumError;
-    totalError->sumAbsRefScore += error->sumAbsRefScore;
-    totalError->sumSquaredError += error->sumSquaredError;
-    if (error->maxError > totalError->maxError) {
-        totalError->maxError = error->maxError;
-    }
-    if (error->maxAbsRefScore > totalError->maxAbsRefScore) {
-        totalError->maxAbsRefScore = error->maxAbsRefScore;
-    }
-    totalError->sumRelError += error->sumRelError;
-    totalError->sumSquaredRelError += error->sumSquaredRelError;
-    if (error->maxRelError > totalError->maxRelError) {
-        totalError->maxRelError = error->maxRelError;
-    }
-}
-
-/**
- * @brief Compare score errors, array should be the same length
- * @param ptrScoreArray - pointer to score error struct array
- * @param ptrRefScoreArray - pointer to score error struct array to compare
- * @param scoreError - pointer to score error struct to save a new error
- * @param numRows - number rows in score error arrays
- * @param numColumns - number columns in score error arrays
- * @return none.
- */
-void compare_scores(float* ptrScoreArray,
-                    void* ptrRefScoreArray,
-                    ScoreErrorT* scoreError,
-                    uint32_t numRows,
-                    uint32_t numColumns) {
-    uint32_t numErrors = 0;
-
-    clear_score_error(scoreError);
-
-    float* A = ptrScoreArray;
-    float* B = reinterpret_cast<float*>(ptrRefScoreArray);
-    for (uint32_t i = 0; i < numRows; i++) {
-        for (uint32_t j = 0; j < numColumns; j++) {
-            float score = A[i * numColumns + j];
-            // std::cout << "score" << score << std::endl;
-            float refscore = B[i * numColumns + j];
-            float abs_refscore = fabs(refscore);
-            float error = fabs(refscore - score);
-            float rel_error = error / (static_cast<float>(abs_refscore) + 1e-20f);
-            float squared_error = error * error;
-            float squared_rel_error = rel_error * rel_error;
-            scoreError->numScores++;
-            scoreError->sumError += error;
-            scoreError->sumAbsRefScore += abs_refscore;
-            scoreError->sumSquaredError += squared_error;
-            if (abs_refscore > scoreError->maxAbsRefScore) {
-                scoreError->maxAbsRefScore = abs_refscore;
-            }
-            if (error > scoreError->maxError) {
-                scoreError->maxError = error;
-            }
-            scoreError->sumRelError += rel_error;
-            scoreError->sumSquaredRelError += squared_rel_error;
-            if (rel_error > scoreError->maxRelError) {
-                scoreError->maxRelError = rel_error;
-            }
-            if (error > scoreError->threshold) {
-                numErrors++;
-            }
-        }
-    }
-    scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns));
-    scoreError->sumRmsError += scoreError->rmsError;
-    scoreError->numErrors = numErrors;
-    // std::cout << "rmsError=" << scoreError->rmsError << "sumRmsError="<<scoreError->sumRmsError;
-}
-
-/**
- * @brief Get total stdev error
- * @param error pointer to score error struct
- * @return error
- */
-float std_dev_error(ScoreErrorT error) {
-    return (sqrt(error.sumSquaredError / error.numScores -
-                 (error.sumError / error.numScores) * (error.sumError / error.numScores)));
-}
-
-/**
- * @brief Print a report on the statistical score error
- * @param totalError reference to a total score error struct
- * @param framesNum number of frames in utterance
- * @param stream output stream
- * @return none.
- */
-void print_reference_compare_results(ScoreErrorT const& totalError, size_t framesNum, std::ostream& stream) {
-    stream << " max abs ref score: " << totalError.maxAbsRefScore << std::endl;
-    stream << " avg abs ref score: " << totalError.sumAbsRefScore / totalError.numScores << std::endl;
-    stream << "         max error: " << totalError.maxError << std::endl;
-    stream << "         avg error: " << totalError.sumError / totalError.numScores << std::endl;
-    stream << "     avg rms error: " << totalError.sumRmsError / framesNum << std::endl;
-    stream << "       stdev error: " << std_dev_error(totalError) << std::endl << std::endl;
-    stream << std::endl;
-}
-
-/**
- * @brief Print a report on the performance counts
- * @param utterancePerfMap reference to a map to store performance counters
- * @param numberOfFrames number of frames
- * @param stream output stream
- * @param fullDeviceName full device name string
- * @param numberOfFramesOnHw number of frames delivered to GNA HW
- * @param FLAGS_d flag of device
- * @return none.
- */
-void print_performance_counters(std::map<std::string, ov::ProfilingInfo> const& utterancePerfMap,
-                                size_t numberOfFrames,
-                                std::ostream& stream,
-                                std::string fullDeviceName,
-                                const uint64_t numberOfFramesOnHw,
-                                std::string FLAGS_d) {
-#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64)
-    std::ios::fmtflags fmt(std::cout.flags());
-    stream << std::endl << "Performance counts:" << std::endl;
-    stream << std::setw(10) << std::right << ""
-           << "Counter descriptions";
-    stream << std::setw(22) << "Utt scoring time";
-    stream << std::setw(18) << "Avg infer time";
-    stream << std::endl;
-
-    stream << std::setw(46) << "(ms)";
-    stream << std::setw(24) << "(us per call)";
-    stream << std::endl;
-    // if GNA HW counters
-    for (const auto& it : utterancePerfMap) {
-        std::string const& counter_name = it.first;
-        float current_units_us = static_cast<float>(it.second.real_time.count());
-        float call_units_us = 0;
-        if (numberOfFrames == 0) {
-            throw std::logic_error("Number off frames = 0,  division by zero.");
-        } else {
-            call_units_us = current_units_us / numberOfFrames;
-        }
-        if (FLAGS_d.find("GNA") != std::string::npos) {
-            stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
-        } else {
-            stream << std::setw(30) << std::left << counter_name;
-        }
-        stream << std::setw(16) << std::right << current_units_us / 1000;
-        stream << std::setw(21) << std::right << call_units_us;
-        stream << std::endl;
-    }
-    stream << std::endl;
-    std::cout << std::endl;
-    std::cout << "Full device name: " << fullDeviceName << std::endl;
-    std::cout << std::endl;
-    stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw;
-    stream << "/" << numberOfFrames;
-    stream << std::endl;
-    std::cout.flags(fmt);
-#endif
-}
-
-/**
- * @brief Get performance counts
- * @param request reference to infer request
- * @param perfCounters reference to a map to save performance counters
- * @return none.
- */
-void get_performance_counters(ov::InferRequest& request, std::map<std::string, ov::ProfilingInfo>& perfCounters) {
-    auto retPerfCounters = request.get_profiling_info();
-
-    for (const auto& element : retPerfCounters) {
-        perfCounters[element.node_name] = element;
-    }
-}
-
-/**
- * @brief Summarize performance counts and total number of frames executed on the GNA HW device
- * @param perfCounters reference to a map to get performance counters
- * @param totalPerfCounters reference to a map to save total performance counters
- * @param totalRunsOnHw reference to a total number of frames computed on GNA HW
- * @return none.
- */
-void sum_performance_counters(std::map<std::string, ov::ProfilingInfo> const& perfCounters,
-                              std::map<std::string, ov::ProfilingInfo>& totalPerfCounters,
-                              uint64_t& totalRunsOnHw) {
-    auto runOnHw = false;
-    for (const auto& pair : perfCounters) {
-        totalPerfCounters[pair.first].real_time += pair.second.real_time;
-        runOnHw |= pair.second.real_time > std::chrono::microseconds(0);  // if realTime is above zero, that means that
-                                                                          // a primitive was executed on the device
-    }
-    totalRunsOnHw += runOnHw;
-}
-
-/**
- * @brief Split string by delimeter
- * @param s input string
- * @param delim delimeter
- * @return vector of chunks
- */
-std::vector<std::string> split(const std::string& s, char delim) {
-    std::vector<std::string> result;
-    std::stringstream ss(s);
-    std::string item;
-
-    while (getline(ss, item, delim)) {
-        result.push_back(item);
-    }
-    return result;
-}
-
-/**
- * @brief Concat strings using delimeter
- * @param chunks input chunks
- * @param delim delimeter
- * @return concatenated string
- */
-std::string concat(const std::vector<std::string>& chunks, char delim) {
-    std::stringstream ss;
-    for (auto&& chunk : chunks) {
-        if (!ss.str().empty()) {
-            ss << delim;
-        }
-        ss << chunk;
-    }
-    return ss.str();
-}
-
-/**
- * @brief Check whether name is present in node vector
- * @param nodes nodes
- * @param node_name name
- * @return false or true
- */
-bool check_name(const ov::OutputVector& nodes, const std::string& node_name) {
-    std::vector<std::string> any_names;
-    bool count = false;
-    for (auto& node : nodes) {
-        any_names.push_back(node.get_any_name());
-        auto names = node.get_names();
-        count = std::count(names.begin(), names.end(), node_name);
-        if (count)
-            break;
-    }
-    if (!count) {
-        std::stringstream ss;
-        ss << "Incorrect node name '" + node_name << "'! ";
-        ss << "Try one of the following names: [ ";
-        for (auto&& name : any_names) {
-            ss << name << " ";
-        }
-        ss << "]";
-        throw std::logic_error(ss.str());
-    }
-    return count;
-}
-
-/**
- * @brief Strip the name of the input to exclude ":port"
- * @param name input name
- * @return striped input name
- */
-std::string strip_name(const std::string& name) {
-    return {name, 0, name.rfind(':')};
-}
-
-/**
- * @brief Parse scale factors per input
- * Format : <input_name1>=<sf1>,<input2>=<sf2> or just <sf>
- * @param inputs model inputs
- * @param values_string values_string input string
- * @return map of scale factors per input
- */
-std::map<std::string, float> parse_scale_factors(const ov::OutputVector& inputs, const std::string& values_string) {
-    auto get_sf = [&](const std::string& sf_string, const std::string& input_name = "") -> float {
-        float sf;
-        try {
-            sf = std::stof(sf_string);
-        } catch (...) {
-            throw std::logic_error("Can't get float scale factor from: " + sf_string);
-        }
-        if (sf <= 0.0f) {
-            throw std::logic_error("Scale factor for input '" + input_name +
-                                   "' (counting from zero) is out of range (must be positive).");
-        }
-        return sf;
-    };
-    std::map<std::string, float> result;
-    auto scale_factor_strings = split(values_string, ',');
-    for (auto& scale_factor_string : scale_factor_strings) {
-        auto values = split(scale_factor_string, '=');
-        if (values.size() == 1) {
-            if (scale_factor_strings.size() != 1) {
-                throw std::logic_error("Unrecognized scale factor format! "
-                                       "Please specify <input_name1>=<sf1>,<input_name2>=<sf2> or "
-                                       "just <sf> to be applied to all inputs");
-            }
-            auto scale_factor = get_sf(values.at(0));
-            for (auto& input : inputs) {
-                result[input.get_any_name()] = scale_factor;
-            }
-        } else if (values.size() > 0) {
-            auto sf_sting = values.back();
-            values.pop_back();
-            auto input_name = values.back();
-            check_name(inputs, input_name);
-            result[input_name] = get_sf(sf_sting, input_name);
-        }
-    }
-    return result;
-}
-
-/**
- * @brief Parse string of file names separated by comma to save it to vector of file names
- * @param str file names separated by comma
- * @return vector of file names
- */
-std::vector<std::string> convert_str_to_vector(std::string str) {
-    std::vector<std::string> blobName;
-    if (!str.empty()) {
-        size_t pos_last = 0;
-        size_t pos_next = 0;
-        while ((pos_next = str.find(",", pos_last)) != std::string::npos) {
-            blobName.push_back(str.substr(pos_last, pos_next - pos_last));
-            pos_last = pos_next + 1;
-        }
-        blobName.push_back(str.substr(pos_last));
-    }
-    return blobName;
-}
-
-/**
- * @brief Parse layout string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs)
- * @param layout_string input names with layout values
- * @param input_info reference to vector of inputs
- * @return map of inputs with layout values
- */
-std::map<std::string, std::string> parse_input_layouts(const std::string& layout_string,
-                                                       const std::vector<ov::Output<ov::Node>>& input_info) {
-    // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all
-    // inputs)
-    std::map<std::string, std::string> return_value;
-    std::string search_string = layout_string;
-    auto start_pos = search_string.find_first_of('[');
-    auto input_name = search_string.substr(0, start_pos);
-    while (start_pos != std::string::npos) {
-        auto end_pos = search_string.find_first_of(']');
-        if (end_pos == std::string::npos)
-            break;
-        if (start_pos)
-            input_name = search_string.substr(0, start_pos);
-        auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1);
-        if (!input_name.empty()) {
-            return_value[input_name] = input_value;
-        } else {
-            for (auto& item : input_info) {
-                return_value[item.get_any_name()] = input_value;
-            }
-        }
-        search_string = search_string.substr(end_pos + 1);
-        if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '['))
-            break;
-        if (search_string.front() == ',')
-            search_string = search_string.substr(1);
-        start_pos = search_string.find_first_of('[');
-    }
-    if (!search_string.empty())
-        throw std::logic_error("Can't parse input parameter string: " + layout_string);
-    return return_value;
-}
-
-/**
- * @brief Parse parameters for inputs/outputs/reference like as "<name1>=<file1.ark/.npz>,<name2>=<file2.ark/.npz>" or
- * "<file.ark/.npz>" in case of one input/output/reference.
- * @note Examplary result for given data: {"<file1.ark/.npz>,<file2.ark/.npz>",{"<name1>","<name2>"}}
- * @param file_paths_string input/output path
- * @return pair of filename and vector of layers names
- */
-std::pair<std::string, std::vector<std::string>> parse_parameters(const std::string& file_paths_string) {
-    auto search_string = file_paths_string;
-    char comma_delim = ',';
-    char equal_delim = '=';
-    std::string filename = "";
-    std::vector<std::string> layers_names;
-    std::vector<std::string> filenames;
-    if (!std::count(search_string.begin(), search_string.end(), comma_delim) &&
-        !std::count(search_string.begin(), search_string.end(), equal_delim)) {
-        return {search_string, layers_names};
-    }
-    search_string += comma_delim;
-    std::vector<std::string> splitted = split(search_string, comma_delim);
-    for (size_t j = 0; j < splitted.size(); j++) {
-        auto equal_delim_pos = splitted[j].find_first_of(equal_delim);
-        if (equal_delim_pos != std::string::npos) {
-            layers_names.push_back(splitted[j].substr(0, equal_delim_pos));
-            filenames.push_back(splitted[j].substr(equal_delim_pos + 1, std::string::npos));
-        }
-    }
-    for (std::vector<std::string>::const_iterator name = filenames.begin(); name != filenames.end(); ++name) {
-        filename += *name;
-        if (name != filenames.end() - 1)
-            filename += comma_delim;
-    }
-    return {filename, layers_names};
-}
-
-std::vector<std::pair<std::string, size_t>> parse_to_extract_port(const std::vector<std::string>& full_names) {
-    std::vector<std::pair<std::string, size_t>> result;
-    for (const auto& full_name : full_names) {
-        auto pos_layer = full_name.rfind(":");
-        if (pos_layer == std::string::npos) {
-            throw std::logic_error("Output " + full_name + " doesn't have a port");
-        }
-        const auto name = full_name.substr(0, pos_layer);
-        try {
-            const size_t port = std::stoul(full_name.substr(pos_layer + 1));
-            result.push_back({name, port});
-        } catch (const std::exception&) {
-            throw std::logic_error("Ports should have integer type");
-        }
-    }
-    return result;
-}
-
-const std::vector<std::string>& get_first_non_empty(const std::vector<std::string>& first,
-                                                    const std::vector<std::string>& second) {
-    if (!first.empty())
-        return first;
-    return second;
-}
--- a/samples/python/speech_sample/README.md
+++ b/samples/python/speech_sample/README.md
@ -1,43 +0,0 @@
-# Automatic Speech Recognition Python Sample
-
-> **NOTE**: This sample is being deprecated and will no longer be maintained after OpenVINO 2023.2 (LTS). The main reason for it is the outdated state of the sample and its extensive usage of GNA, which is not going to be supported by OpenVINO beyond 2023.2. 
-
-This sample demonstrates how to do a Synchronous Inference of acoustic model based on Kaldi\* neural models and speech feature vectors.
-
-The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
-
-For more detailed information on how this sample works, check the dedicated [article](https://docs.openvino.ai/2023.2/openvino_inference_engine_ie_bridges_python_sample_speech_sample_README.html)
-
-## Requirements
-
-| Options                     | Values                                                                                                                                                        |
-| ----------------------------| --------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| Validated Models            | Acoustic model based on Kaldi* neural models (see                                                                                                             |
-|                             | [Model Preparation](https://docs.openvino.ai/2023.2/openvino_inference_engine_ie_bridges_python_sample_speech_sample_README.html#model-preparation) section)  |
-| Model Format                | OpenVINO™ toolkit Intermediate Representation (.xml + .bin)                                                                                                   |
-| Supported devices           | See [Execution Modes](https://docs.openvino.ai/2023.2/openvino_inference_engine_ie_bridges_python_sample_speech_sample_README.html#execution-modes)           |
-|                             | section below and [List Supported Devices](https://docs.openvino.ai/2023.2/openvino_docs_OV_UG_supported_plugins_Supported_Devices.html)                      |
-| Other language realization  | [C++](https://docs.openvino.ai/2023.2/openvino_inference_engine_samples_speech_sample_README.html)                                                            |
-
-Automatic Speech Recognition Python sample application demonstrates how to use the following Python API in applications:
-
-| Feature                  | API                                                                                                                                                                                                             | Description                                                           |
-| -------------------------| ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------|
-| Import/Export Model      | [openvino.runtime.Core.import_model](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.Core.html#openvino.runtime.Core.import_model),                                             |                                                                       |
-|                          | [openvino.runtime.CompiledModel.export_model](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.export_model)                   | The GNA plugin supports loading and saving of the GNA-optimized model |
-| Model Operations         | [openvino.runtime.Model.add_outputs](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.Model.html#openvino.runtime.Model.add_outputs) ,                                           |                                                                       |
-|                          | [openvino.runtime.set_batch](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.html#openvino.runtime.set_batch),                                                                  |                                                                       |
-|                          | [openvino.runtime.CompiledModel.inputs](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.inputs),                              |                                                                       |
-|                          | [openvino.runtime.CompiledModel.outputs](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.outputs),                            |                                                                       |
-|                          | [openvino.runtime.ConstOutput.any_name](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.ConstOutput.html#openvino.runtime.ConstOutput.any_name)                                 | Managing of model: configure batch_size, input and output tensors     |
-| Synchronous Infer        | [openvino.runtime.CompiledModel.create_infer_request](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.CompiledModel.html#openvino.runtime.CompiledModel.create_infer_request),  |                                                                       |
-|                          | [openvino.runtime.InferRequest.infer](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.infer)                                    | Do synchronous inference                                              |
-| InferRequest Operations  | [openvino.runtime.InferRequest.get_input_tensor](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.get_input_tensor),             |                                                                       |
-|                          | [openvino.runtime.InferRequest.model_outputs](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.model_outputs),                   |                                                                       |
-|                          | [openvino.runtime.InferRequest.model_inputs](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.model_inputs),                     | Get info about model using infer request API                          |
-| InferRequest Operations  | [openvino.runtime.InferRequest.query_state](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.query_state),                       |                                                                       |
-|                          | [openvino.runtime.VariableState.reset](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.inference_engine.VariableState.html#openvino.inference_engine.VariableState.reset)               | Gets and resets CompiledModel state control                           |
-| Profiling                | [openvino.runtime.InferRequest.profiling_info](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.InferRequest.html#openvino.runtime.InferRequest.profiling_info),                 |                                                                       |
-|                          | [openvino.runtime.ProfilingInfo.real_time](https://docs.openvino.ai/2023.2/api/ie_python_api/_autosummary/openvino.runtime.ProfilingInfo.html#openvino.runtime.ProfilingInfo.real_time)                         | Get infer request profiling info                                      |
-
-Basic OpenVINO™ Runtime API is covered by [Hello Classification Python* Sample](https://docs.openvino.ai/2023.2/openvino_inference_engine_ie_bridges_python_sample_hello_classification_README.html).
--- a/samples/python/speech_sample/arg_parser.py
+++ b/samples/python/speech_sample/arg_parser.py
@ -1,142 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-import argparse
-import re
-from typing import List, Tuple, Union
-
-
-def build_arg_parser() -> argparse.ArgumentParser:
-    """Create and return argument parser."""
-    parser = argparse.ArgumentParser(add_help=False)
-    args = parser.add_argument_group('Options')
-    model = parser.add_mutually_exclusive_group(required=True)
-
-    model.add_argument('-m', '--model', type=str,
-                       help='Path to an .xml file with a trained model (required if -rg is missing).')
-    model.add_argument('-rg', '--import_gna_model', type=str,
-                       help='Read GNA model from file using path/filename provided (required if -m is missing).')
-
-    args.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
-    args.add_argument('-i', '--input', required=True, type=str,
-                      help='Required. Path(s) to input file(s). '
-                      'Usage for a single file/layer: <input_file.ark> or <input_file.npz>. '
-                      'Example of usage for several files/layers: <layer1>:<port_num1>=<input_file1.ark>,<layer2>:<port_num2>=<input_file2.ark>.')
-    args.add_argument('-o', '--output', type=str,
-                      help='Optional. Output file name(s) to save scores (inference results). '
-                      'Usage for a single file/layer: <output_file.ark> or <output_file.npz>. '
-                      'Example of usage for several files/layers: <layer1>:<port_num1>=<output_file1.ark>,<layer2>:<port_num2>=<output_file2.ark>.')
-    args.add_argument('-r', '--reference', type=str,
-                      help='Optional. Read reference score file(s) and compare inference results with reference scores. '
-                      'Usage for a single file/layer: <reference_file.ark> or <reference_file.npz>. '
-                      'Example of usage for several files/layers: <layer1>:<port_num1>=<reference_file1.ark>,<layer2>:<port_num2>=<reference_file2.ark>.')
-    args.add_argument('-d', '--device', default='CPU', type=str,
-                      help='Optional. Specify a target device to infer on. '
-                      'CPU, GPU, NPU, GNA_AUTO, GNA_HW, GNA_SW_FP32, GNA_SW_EXACT and HETERO with combination of GNA'
-                      ' as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. '
-                      'The sample will look for a suitable plugin for device specified. Default value is CPU.')
-    args.add_argument('-bs', '--batch_size', type=int, choices=range(1, 9), metavar='[1-8]',
-                      help='Optional. Batch size 1-8.')
-    args.add_argument('-layout', type=str,
-                      help='Optional. Custom layout in format: "input0[value0],input1[value1]" or "[value]" (applied to all inputs)')
-    args.add_argument('-qb', '--quantization_bits', default=16, type=int, choices=(8, 16), metavar='[8, 16]',
-                      help='Optional. Weight resolution in bits for GNA quantization: 8 or 16 (default 16).')
-    args.add_argument('-sf', '--scale_factor', type=str,
-                      help='Optional. User-specified input scale factor for GNA quantization. '
-                      'If the model contains multiple inputs, provide scale factors by separating them with commas. '
-                      'For example: <layer1>:<sf1>,<layer2>:<sf2> or just <sf> to be applied to all inputs.')
-    args.add_argument('-wg', '--export_gna_model', type=str,
-                      help='Optional. Write GNA model to file using path/filename provided.')
-    args.add_argument('-we', '--export_embedded_gna_model', type=str,
-                      help='Optional. Write GNA embedded model to file using path/filename provided.')
-    args.add_argument('-we_gen', '--embedded_gna_configuration', default='GNA1', type=str, metavar='[GNA1, GNA3]',
-                      help='Optional. GNA generation configuration string for embedded export. '
-                      'Can be GNA1 (default) or GNA3.')
-    args.add_argument('--exec_target', default='', type=str, choices=('GNA_TARGET_2_0', 'GNA_TARGET_3_0'),
-                      metavar='[GNA_TARGET_2_0, GNA_TARGET_3_0]',
-                      help='Optional. Specify GNA execution target generation. '
-                      'By default, generation corresponds to the GNA HW available in the system '
-                      'or the latest fully supported generation by the software. '
-                      "See the GNA Plugin's GNA_EXEC_TARGET config option description.")
-    args.add_argument('-pc', '--performance_counter', action='store_true',
-                      help='Optional. Enables performance report (specify -a to ensure arch accurate results).')
-    args.add_argument('-a', '--arch', default='CORE', type=str.upper, choices=('CORE', 'ATOM'), metavar='[CORE, ATOM]',
-                      help='Optional. Specify architecture. CORE, ATOM with the combination of -pc.')
-    args.add_argument('-cw_l', '--context_window_left', type=int, default=0,
-                      help='Optional. Number of frames for left context windows (default is 0). '
-                      'Works only with context window models. '
-                      'If you use the cw_l or cw_r flag, then batch size argument is ignored.')
-    args.add_argument('-cw_r', '--context_window_right', type=int, default=0,
-                      help='Optional. Number of frames for right context windows (default is 0). '
-                      'Works only with context window models. '
-                      'If you use the cw_l or cw_r flag, then batch size argument is ignored.')
-    args.add_argument('-pwl_me', type=float, default=1.0,
-                      help='Optional. The maximum percent of error for PWL function. '
-                      'The value must be in <0, 100> range. The default value is 1.0.')
-
-    return parser
-
-
-def parse_arg_with_names(arg_string: Union[str, None], separator: str = '=') -> Tuple[List[str], List[str]]:
-    keys = []
-    values = []
-
-    if isinstance(arg_string, str):
-        for parameter in re.split(', |,', arg_string):
-            if separator in parameter:
-                key, value = parameter.split(separator)
-                keys.append(key)
-                values.append(value)
-            else:
-                values.append(parameter)
-
-    return keys, values
-
-
-def check_arg_with_names(arg: Tuple[List[str], List[str]]) -> bool:
-    return True if len(arg[0]) == 0 and len(arg[1]) > 1 else False
-
-
-def parse_args(separator: str = '=') -> argparse.Namespace:
-    """Parse and validate command-line arguments."""
-    parser = build_arg_parser()
-    args = parser.parse_args()
-
-    if args.context_window_left < 0:
-        parser.error('Invalid value for argument -cw_l/--context_window_left: Must be an integer >= 0.')
-
-    if args.context_window_right < 0:
-        parser.error('Invalid value for argument -cw_r/--context_window_right: Must be an integer >= 0.')
-
-    if args.pwl_me < 0.0 or args.pwl_me > 100.0:
-        parser.error('Invalid value for -pwl_me argument. It must be greater than 0.0 and less than 100.0')
-
-    args.input = parse_arg_with_names(args.input, separator)
-    if check_arg_with_names(args.input):
-        parser.error(
-            'Invalid format for -i/--input argment. Please specify the parameter like this '
-            f'<input_name1>{separator}<file1.ark/.npz>,<input_name2>{separator}<file2.ark/.npz> or just <file.ark/.npz> in case of one input.',
-        )
-
-    args.scale_factor = parse_arg_with_names(args.scale_factor, separator)
-    if check_arg_with_names(args.scale_factor):
-        parser.error(
-            'Invalid format for -sf/--scale_factor argment. Please specify the parameter like this '
-            f'<input_name1>{separator}<sf1>,<input_name2>{separator}<sf2> or just <sf> to be applied to all inputs.',
-        )
-
-    args.output = parse_arg_with_names(args.output, separator)
-    if check_arg_with_names(args.output):
-        parser.error(
-            'Invalid format for -o/--output argment. Please specify the parameter like this '
-            f'<output_name1>{separator}<output1.ark/.npz>,<output_name2>{separator}<output2.ark/.npz> or just <output.ark/.npz> in case of one output.',
-        )
-
-    args.reference = parse_arg_with_names(args.reference, separator)
-    if check_arg_with_names(args.reference):
-        parser.error(
-            'Invalid format for -r/--reference argment. Please specify the parameter like this '
-            f'<output_name1>{separator}<reference1.ark/.npz>,<output_name2>{separator}<reference2.ark/.npz> or <reference.ark/.npz> in case of one output.',
-        )
-
-    return args
--- a/samples/python/speech_sample/file_options.py
+++ b/samples/python/speech_sample/file_options.py
@ -1,112 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import logging as log
-import sys
-from typing import IO, Any, List, NamedTuple
-
-import numpy as np
-
-
-class FileData(NamedTuple):
-    keys: List[str]
-    utterances: List[np.ndarray]
-
-
-def read_ark_file(file_name: str) -> FileData:
-    """Read utterance matrices from a .ark file."""
-    def read_key(input_file: IO[Any]) -> str:
-        """Read a identifier of utterance matrix."""
-        key = ''
-        char = input_file.read(1).decode()
-
-        while char not in ('', ' '):
-            key += char
-            char = input_file.read(1).decode()
-
-        return key
-
-    def read_matrix(input_file: IO[Any]) -> np.ndarray:
-        """Read a utterance matrix."""
-        header = input_file.read(5).decode()
-        if 'FM' in header:
-            num_of_bytes = 4
-            dtype = 'float32'
-        elif 'DM' in header:
-            num_of_bytes = 8
-            dtype = 'float64'
-        else:
-            log.error(f'The utterance header "{header}" does not contain information about a type of elements.')
-            sys.exit(-7)
-
-        _, rows, _, cols = np.frombuffer(input_file.read(10), 'int8, int32, int8, int32')[0]
-        buffer = input_file.read(rows * cols * num_of_bytes)
-        vector = np.frombuffer(buffer, dtype)
-        matrix = np.reshape(vector, (rows, cols))
-
-        return matrix
-
-    keys = []
-    utterances = []
-    with open(file_name, 'rb') as input_file:
-        key = read_key(input_file)
-
-        while key:
-            utterances.append(read_matrix(input_file))
-            keys.append(key)
-            key = read_key(input_file)
-
-    return FileData(keys, utterances)
-
-
-def write_ark_file(file_name: str, keys: List[str], utterances: List[np.ndarray]):
-    """Write utterance matrices to a .ark file."""
-    with open(file_name, 'wb') as output_file:
-        for key, matrix in zip(keys, utterances):
-            # write a utterance key
-            output_file.write(key.encode())
-            output_file.write(' '.encode())
-            output_file.write('\0B'.encode())
-
-            # write a matrix precision
-            if matrix.dtype == 'float32':
-                output_file.write('FM '.encode())
-            elif matrix.dtype == 'float64':
-                output_file.write('DM '.encode())
-
-            # write a matrix shape
-            output_file.write('\04'.encode())
-            output_file.write(matrix.shape[0].to_bytes(4, byteorder='little', signed=False))
-            output_file.write('\04'.encode())
-            output_file.write(matrix.shape[1].to_bytes(4, byteorder='little', signed=False))
-
-            # write a matrix data
-            output_file.write(matrix.tobytes())
-
-
-def read_utterance_file(file_name: str) -> FileData:
-    """Read utterance matrices from a file."""
-    file_extension = file_name.split('.')[-1]
-
-    if file_extension == 'ark':
-        return read_ark_file(file_name)
-    elif file_extension == 'npz':
-        data = dict(np.load(file_name))
-        return FileData(list(data.keys()), list(data.values()))
-    else:
-        log.error(f'The file {file_name} cannot be read. The sample supports only .ark and .npz files.')
-        sys.exit(-1)
-
-
-def write_utterance_file(file_name: str, keys: List[str], utterances: List[np.ndarray]):
-    """Write utterance matrices to a file."""
-    file_extension = file_name.split('.')[-1]
-
-    if file_extension == 'ark':
-        write_ark_file(file_name, keys, utterances)
-    elif file_extension == 'npz':
-        np.savez(file_name, **dict(zip(keys, utterances)))
-    else:
-        log.error(f'The file {file_name} cannot be written. The sample supports only .ark and .npz files.')
-        sys.exit(-2)
--- a/samples/python/speech_sample/speech_sample.py
+++ b/samples/python/speech_sample/speech_sample.py
@ -1,285 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import sys
-from io import BytesIO
-from timeit import default_timer
-from typing import Dict
-
-import numpy as np
-import openvino as ov
-
-from arg_parser import parse_args
-from file_options import read_utterance_file, write_utterance_file
-from utils import (GNA_ATOM_FREQUENCY, GNA_CORE_FREQUENCY,
-                   calculate_scale_factor, compare_with_reference,
-                   get_input_layouts, get_sorted_scale_factors, log,
-                   set_scale_factors)
-
-
-def do_inference(data: Dict[str, np.ndarray], infer_request: ov.InferRequest, cw_l: int = 0, cw_r: int = 0) -> np.ndarray:
-    """Do a synchronous matrix inference."""
-    frames_to_infer = {}
-    result = {}
-
-    batch_size = infer_request.model_inputs[0].shape[0]
-    num_of_frames = next(iter(data.values())).shape[0]
-
-    for output in infer_request.model_outputs:
-        result[output.any_name] = np.ndarray((num_of_frames, np.prod(tuple(output.shape)[1:])))
-
-    for i in range(-cw_l, num_of_frames + cw_r, batch_size):
-        if i < 0:
-            index = 0
-        elif i >= num_of_frames:
-            index = num_of_frames - 1
-        else:
-            index = i
-
-        for _input in infer_request.model_inputs:
-            frames_to_infer[_input.any_name] = data[_input.any_name][index:index + batch_size]
-            num_of_frames_to_infer = len(frames_to_infer[_input.any_name])
-
-            # Add [batch_size - num_of_frames_to_infer] zero rows to 2d numpy array
-            # Used to infer fewer frames than the batch size
-            frames_to_infer[_input.any_name] = np.pad(
-                frames_to_infer[_input.any_name],
-                [(0, batch_size - num_of_frames_to_infer), (0, 0)],
-            )
-
-            frames_to_infer[_input.any_name] = frames_to_infer[_input.any_name].reshape(_input.tensor.shape)
-
-        frame_results = infer_request.infer(frames_to_infer)
-
-        if i - cw_r < 0:
-            continue
-
-        for output in frame_results.keys():
-            vector_result = frame_results[output].reshape((batch_size, result[output.any_name].shape[1]))
-            result[output.any_name][i - cw_r:i - cw_r + batch_size] = vector_result[:num_of_frames_to_infer]
-
-    return result
-
-
-def main():
-    args = parse_args()
-
-# --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------
-    log.info('Creating OpenVINO Runtime Core')
-    core = ov.Core()
-
-# --------------------------- Step 2. Read a model --------------------------------------------------------------------
-    if args.model:
-        log.info(f'Reading the model: {args.model}')
-        # (.xml and .bin files) or (.onnx file)
-        model = core.read_model(args.model)
-
-# --------------------------- Step 3. Apply preprocessing -------------------------------------------------------------
-        model.add_outputs(args.output[0] + args.reference[0])
-
-        if args.layout:
-            layouts = get_input_layouts(args.layout, model.inputs)
-
-        ppp = ov.preprocess.PrePostProcessor(model)
-
-        for i in range(len(model.inputs)):
-            ppp.input(i).tensor().set_element_type(ov.Type.f32)
-
-            input_name = model.input(i).get_any_name()
-
-            if args.layout and input_name in layouts.keys():
-                ppp.input(i).tensor().set_layout(ov.Layout(layouts[input_name]))
-                ppp.input(i).model().set_layout(ov.Layout(layouts[input_name]))
-
-        for i in range(len(model.outputs)):
-            ppp.output(i).tensor().set_element_type(ov.Type.f32)
-
-        model = ppp.build()
-
-        if args.batch_size:
-            batch_size = args.batch_size if args.context_window_left == args.context_window_right == 0 else 1
-
-            if any((not _input.node.layout.empty for _input in model.inputs)):
-                ov.set_batch(model, batch_size)
-            else:
-                log.warning('Layout is not set for any input, so custom batch size is not set')
-
-# ---------------------------Step 4. Configure plugin ---------------------------------------------------------
-    devices = args.device.replace('HETERO:', '').split(',')
-    plugin_config = {}
-
-    if 'GNA' in args.device:
-        gna_device_mode = devices[0] if '_' in devices[0] else 'GNA_AUTO'
-        devices[0] = 'GNA'
-
-        plugin_config['GNA_DEVICE_MODE'] = gna_device_mode
-        plugin_config['GNA_PRECISION'] = f'I{args.quantization_bits}'
-        plugin_config['GNA_EXEC_TARGET'] = args.exec_target
-        plugin_config['GNA_PWL_MAX_ERROR_PERCENT'] = str(args.pwl_me)
-
-        # Set a GNA scale factor
-        if args.import_gna_model:
-            if args.scale_factor[1]:
-                log.error(f'Custom scale factor can not be set for imported gna model: {args.import_gna_model}')
-                return 1
-            else:
-                log.info(f'Using scale factor from provided imported gna model: {args.import_gna_model}')
-        else:
-            if args.scale_factor[1]:
-                scale_factors = get_sorted_scale_factors(args.scale_factor, model.inputs)
-            else:
-                scale_factors = []
-
-                for file_name in args.input[1]:
-                    _, utterances = read_utterance_file(file_name)
-                    scale_factor = calculate_scale_factor(utterances[0])
-                    log.info('Using scale factor(s) calculated from first utterance')
-                    scale_factors.append(str(scale_factor))
-
-            set_scale_factors(plugin_config, scale_factors, model.inputs)
-
-        if args.export_embedded_gna_model:
-            plugin_config['GNA_FIRMWARE_MODEL_IMAGE'] = args.export_embedded_gna_model
-            plugin_config['GNA_FIRMWARE_MODEL_IMAGE_GENERATION'] = args.embedded_gna_configuration
-
-        if args.performance_counter:
-            plugin_config['PERF_COUNT'] = 'YES'
-
-    device_str = f'HETERO:{",".join(devices)}' if 'HETERO' in args.device else devices[0]
-
-# --------------------------- Step 5. Loading model to the device -----------------------------------------------------
-    log.info('Loading the model to the plugin')
-    if args.model:
-        compiled_model = core.compile_model(model, device_str, plugin_config)
-    else:
-        with open(args.import_gna_model, 'rb') as f:
-            buf = BytesIO(f.read())
-            compiled_model = core.import_model(buf, device_str, plugin_config)
-
-# --------------------------- Exporting GNA model using InferenceEngine AOT API ---------------------------------------
-    if args.export_gna_model:
-        log.info(f'Writing GNA Model to {args.export_gna_model}')
-        user_stream = compiled_model.export_model()
-        with open(args.export_gna_model, 'wb') as f:
-            f.write(user_stream)
-        return 0
-
-    if args.export_embedded_gna_model:
-        log.info(f'Exported GNA embedded model to file {args.export_embedded_gna_model}')
-        log.info(f'GNA embedded model export done for GNA generation {args.embedded_gna_configuration}')
-        return 0
-
-# --------------------------- Step 6. Set up input --------------------------------------------------------------------
-    input_layer_names = args.input[0] if args.input[0] else [_input.any_name for _input in compiled_model.inputs]
-    input_file_names = args.input[1]
-
-    if len(input_layer_names) != len(input_file_names):
-        log.error(f'Number of model inputs ({len(compiled_model.inputs)}) is not equal '
-                  f'to number of ark files ({len(input_file_names)})')
-        return 3
-
-    input_file_data = [read_utterance_file(file_name) for file_name in input_file_names]
-
-    infer_data = [
-        {
-            input_layer_names[j]: input_file_data[j].utterances[i]
-            for j in range(len(input_file_data))
-        }
-        for i in range(len(input_file_data[0].utterances))
-    ]
-
-    output_layer_names = args.output[0] if args.output[0] else [compiled_model.outputs[0].any_name]
-    output_file_names = args.output[1]
-
-    reference_layer_names = args.reference[0] if args.reference[0] else [compiled_model.outputs[0].any_name]
-    reference_file_names = args.reference[1]
-
-    reference_file_data = [read_utterance_file(file_name) for file_name in reference_file_names]
-
-    references = [
-        {
-            reference_layer_names[j]: reference_file_data[j].utterances[i]
-            for j in range(len(reference_file_data))
-        }
-        for i in range(len(input_file_data[0].utterances))
-    ]
-
-# --------------------------- Step 7. Create infer request ------------------------------------------------------------
-    infer_request = compiled_model.create_infer_request()
-
-# --------------------------- Step 8. Do inference --------------------------------------------------------------------
-    log.info('Starting inference in synchronous mode')
-    results = []
-    total_infer_time = 0
-
-    for i in range(len(infer_data)):
-        start_infer_time = default_timer()
-
-        # Reset states between utterance inferences to remove a memory impact
-        infer_request.reset_state()
-
-        results.append(do_inference(
-            infer_data[i],
-            infer_request,
-            args.context_window_left,
-            args.context_window_right,
-        ))
-
-        infer_time = default_timer() - start_infer_time
-        total_infer_time += infer_time
-        num_of_frames = infer_data[i][input_layer_names[0]].shape[0]
-        avg_infer_time_per_frame = infer_time / num_of_frames
-
-# --------------------------- Step 9. Process output ------------------------------------------------------------------
-        log.info('')
-        log.info(f'Utterance {i}:')
-        log.info(f'Total time in Infer (HW and SW): {infer_time * 1000:.2f}ms')
-        log.info(f'Frames in utterance: {num_of_frames}')
-        log.info(f'Average Infer time per frame: {avg_infer_time_per_frame * 1000:.2f}ms')
-
-        for name in set(reference_layer_names + output_layer_names):
-            log.info('')
-            log.info(f'Output layer name: {name}')
-            log.info(f'Number scores per frame: {results[i][name].shape[1]}')
-
-            if name in references[i].keys():
-                log.info('')
-                compare_with_reference(results[i][name], references[i][name])
-
-        if args.performance_counter:
-            if 'GNA' in args.device:
-                total_cycles = infer_request.profiling_info[0].real_time.total_seconds()
-                stall_cycles = infer_request.profiling_info[1].real_time.total_seconds()
-                active_cycles = total_cycles - stall_cycles
-                frequency = 10**6
-                if args.arch == 'CORE':
-                    frequency *= GNA_CORE_FREQUENCY
-                else:
-                    frequency *= GNA_ATOM_FREQUENCY
-                total_inference_time = total_cycles / frequency
-                active_time = active_cycles / frequency
-                stall_time = stall_cycles / frequency
-                log.info('')
-                log.info('Performance Statistics of GNA Hardware')
-                log.info(f'   Total Inference Time: {(total_inference_time * 1000):.4f} ms')
-                log.info(f'   Active Time: {(active_time * 1000):.4f} ms')
-                log.info(f'   Stall Time:  {(stall_time * 1000):.4f} ms')
-
-    log.info('')
-    log.info(f'Total sample time: {total_infer_time * 1000:.2f}ms')
-
-    for i in range(len(output_file_names)):
-        log.info(f'Saving results from "{output_layer_names[i]}" layer to {output_file_names[i]}')
-        data = [results[j][output_layer_names[i]] for j in range(len(input_file_data[0].utterances))]
-        write_utterance_file(output_file_names[i], input_file_data[0].keys, data)
-
-# ----------------------------------------------------------------------------------------------------------------------
-    log.info('This sample is an API example, '
-             'for any performance measurements please use the dedicated benchmark_app tool\n')
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/samples/python/speech_sample/utils.py
+++ b/samples/python/speech_sample/utils.py
@ -1,74 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-
-import logging as log
-import sys
-from typing import Dict, List, Tuple
-
-import numpy as np
-from openvino.runtime import Output
-
-# Operating Frequency for GNA HW devices for Core and Atom architecture
-GNA_CORE_FREQUENCY = 400
-GNA_ATOM_FREQUENCY = 200
-
-log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout)
-
-
-def compare_with_reference(result: np.ndarray, reference: np.ndarray):
-    error_matrix = np.absolute(result - reference)
-
-    max_error = np.max(error_matrix)
-    sum_error = np.sum(error_matrix)
-    avg_error = sum_error / error_matrix.size
-    sum_square_error = np.sum(np.square(error_matrix))
-    avg_rms_error = np.sqrt(sum_square_error / error_matrix.size)
-    stdev_error = np.sqrt(sum_square_error / error_matrix.size - avg_error * avg_error)
-
-    log.info(f'max error: {max_error:.7f}')
-    log.info(f'avg error: {avg_error:.7f}')
-    log.info(f'avg rms error: {avg_rms_error:.7f}')
-    log.info(f'stdev error: {stdev_error:.7f}')
-
-
-def calculate_scale_factor(matrix: np.ndarray) -> float:
-    """Get scale factor for quantization using utterance matrix."""
-    # Max to find scale factor
-    target_max = 16384
-    max_val = np.max(matrix)
-    if max_val == 0:
-        return 1.0
-    else:
-        return target_max / max_val
-
-
-def set_scale_factors(plugin_config: Dict[str, str], scale_factors: List[str], inputs: List[Output]):
-    """Set a scale factor provided for each input."""
-    for i in range(len(inputs)):
-        log.info(f'For input {inputs[i].get_any_name()} using scale factor of {scale_factors[i]}')
-        plugin_config[f'GNA_SCALE_FACTOR_{i}'] = scale_factors[i]
-
-
-def get_input_layouts(layout_string: str, inputs: List[Output]) -> Dict[str, str]:
-    if layout_string[0] == '[':
-        return {_input.get_any_name(): layout_string[1:-1] for _input in inputs}
-    else:
-        sep = '],' if ',' in layout_string else ']'
-        tmp = [_input.split('[') for _input in layout_string[:-1].split(sep)]
-        return {_input[0]: _input[1] for _input in tmp}
-
-
-def get_sorted_scale_factors(scale_factor_arg: Tuple[List[str], List[str]], inputs: List[Output]) -> List[str]:
-    if scale_factor_arg[0]:
-        res = [1 for _ in range(len(inputs))]
-        input_names = [_input.get_any_name() for _input in inputs]
-
-        for i in range(len(scale_factor_arg[0])):
-            input_index = input_names.index(scale_factor_arg[0][i])
-            res[input_index] = scale_factor_arg[1][i]
-
-        return res
-
-    else:
-        return [scale_factor_arg[1][0] for _ in range(len(inputs))]
--- a/tests/samples_tests/smoke_tests/common/common_utils.py
+++ b/tests/samples_tests/smoke_tests/common/common_utils.py
@ -42,15 +42,6 @@ def shell(cmd, env=None, cwd=None, out_format="plain"):
    return p.returncode, stdout, stderr


-def parse_avg_err(speech_sample_out):
-    errors = []
-    for line in speech_sample_out:
-        if "avg error" in line:
-            errors.append(float(line.split(': ')[1]))
-    avg_error = round(np.mean(errors), 2)
-    return avg_error
-
-
 def fix_path(path, env_name, root_path=None):
    """
    Fix path: expand environment variables if any, make absolute path from
--- a/tests/samples_tests/smoke_tests/test_speech_sample.py
+++ b/tests/samples_tests/smoke_tests/test_speech_sample.py
@ -1,77 +0,0 @@
-"""
- Copyright (C) 2018-2023 Intel Corporation
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-      http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-import os
-import pytest
-import platform
-import sys
-import logging as log
-from common.samples_common_test_class import SamplesCommonTestClass
-from common.samples_common_test_class import Environment
-from common.samples_common_test_class import get_tests
-from common.common_utils import parse_avg_err
-
-log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout)
-
-test_data = get_tests(cmd_params={'i': [os.path.join('ark', 'dev93_10.ark')],
-                                           'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
-                                           'layout': ["[NC]"],
-                                           'bs': [1, 2],
-                                           'o': ['res_output.ark'],
-                                           'r': [os.path.join('ark', 'dev93_scores_10.ark')],
-                                           'qb': [8, 16],
-                                           'sf': ["2175.43"],
-                                           'q': ["static", "user"],
-                                           'd': ['GNA_SW_EXACT']},
-                               use_device=False
-                               )
-							   
-new_format_test_data = get_tests(cmd_params={'i': ['Parameter=' + os.path.join(Environment.env['test_data'], 'ark', 'dev93_10.ark')],
-                                           'm': [os.path.join('wsj', 'FP32', 'wsj_dnn5b.xml')],
-                                           'layout': ["[NC]"],
-                                           'bs': [1],
-                                           'o': ['affinetransform14/Fused_Add_:0=' + os.path.join(Environment.env['test_data'], 'res_output.ark')],
-                                           'r': ['affinetransform14/Fused_Add_:0=' + os.path.join(Environment.env['test_data'], 'ark', 'dev93_scores_10.ark')],
-                                           'qb': [8],
-                                           'sf': ["Parameter=2175.43"],
-                                           'q': ["static"],
-                                           'memory_reuse_off': [" "],
-                                           'd': ['GNA_SW_EXACT']},
-                               use_device=False
-                               )
-
-class TestSpeechSample(SamplesCommonTestClass):
-    @classmethod
-    def setup_class(cls):
-        cls.sample_name = 'speech_sample'
-        cls.threshold = 0.06
-        super().setup_class()
-
-    @pytest.mark.parametrize("param", test_data)
-    @pytest.mark.skipif(condition=platform.system() == 'Darwin' or platform.machine() == 'aarch64',
-                        reason="GNA is not available on macOS or aarch64")
-    def test_speech_sample_nthreads(self, param):
-        stdout = self._test(param).split('\n')
-
-        avg_error = parse_avg_err(stdout)
-        log.info('Average scores diff: {}'.format(avg_error))
-        assert avg_error <= self.threshold
-		
-    @pytest.mark.parametrize("param", new_format_test_data)
-    @pytest.mark.skipif(condition=platform.system() == 'Darwin' or platform.machine() == 'aarch64',
-                        reason="GNA is not available on macOS or aarch64")
-    def test_speech_sample_new_format(self, param):
-        stdout = self._test(param, complete_path=False).split('\n')
-
-        avg_error = parse_avg_err(stdout)
-        log.info('Average scores diff: {}'.format(avg_error))
-        assert avg_error <= self.threshold