// Copyright (C) 2018-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fileutils.hpp" #include "speech_sample.hpp" #define MAX_SCORE_DIFFERENCE 0.0001f // max score difference for frame error threshold #define MAX_VAL_2B_FEAT 16384 // max to find scale factor using namespace InferenceEngine; typedef std::chrono::high_resolution_clock Time; typedef std::chrono::duration> ms; typedef std::chrono::duration fsec; /** * @brief struct to store score error */ typedef struct { uint32_t numScores; uint32_t numErrors; float threshold; float maxError; float rmsError; float sumError; float sumRmsError; float sumSquaredError; float maxRelError; float sumRelError; float sumSquaredRelError; } score_error_t; /** * @brief struct to store infer request data per frame */ struct InferRequestStruct { InferRequest inferRequest; int frameIndex; uint32_t numFramesThisBatch; }; /** * @brief Check number of input files and model network inputs * @param numInputs number model inputs * @param numInputFiles number of input files * @return none. */ void CheckNumberOfInputs(size_t numInputs, size_t numInputFiles) { if (numInputs != numInputFiles) { throw std::logic_error("Number of network inputs (" + std::to_string(numInputs) + ")" " is not equal to number of input files (" + std::to_string(numInputFiles) + ")"); } } /** * @brief Get scale factor for quantization * @param ptrFloatMemory pointer to float memory with speech feature vector * @param targetMax max scale factor * @param numElements number of elements in speech feature vector * @return scale factor */ float ScaleFactorForQuantization(void* ptrFloatMemory, float targetMax, uint32_t numElements) { float* ptrFloatFeat = reinterpret_cast(ptrFloatMemory); float max = 0.0; float scaleFactor; for (uint32_t i = 0; i < numElements; i++) { if (fabs(ptrFloatFeat[i]) > max) { max = fabs(ptrFloatFeat[i]); } } if (max == 0) { scaleFactor = 1.0; } else { scaleFactor = targetMax / max; } return (scaleFactor); } /** * @brief Clean score error * @param error pointer to score error struct * @return none. */ void ClearScoreError(score_error_t* error) { error->numScores = 0; error->numErrors = 0; error->maxError = 0.0; error->rmsError = 0.0; error->sumError = 0.0; error->sumRmsError = 0.0; error->sumSquaredError = 0.0; error->maxRelError = 0.0; error->sumRelError = 0.0; error->sumSquaredRelError = 0.0; } /** * @brief Update total score error * @param error pointer to score error struct * @param totalError pointer to total score error struct * @return none. */ void UpdateScoreError(score_error_t* error, score_error_t* totalError) { totalError->numErrors += error->numErrors; totalError->numScores += error->numScores; totalError->sumRmsError += error->rmsError; totalError->sumError += error->sumError; totalError->sumSquaredError += error->sumSquaredError; if (error->maxError > totalError->maxError) { totalError->maxError = error->maxError; } totalError->sumRelError += error->sumRelError; totalError->sumSquaredRelError += error->sumSquaredRelError; if (error->maxRelError > totalError->maxRelError) { totalError->maxRelError = error->maxRelError; } } /** * @brief Compare score errors, array should be the same length * @param ptrScoreArray - pointer to score error struct array * @param ptrRefScoreArray - pointer to score error struct array to compare * @param scoreError - pointer to score error struct to save a new error * @param numRows - number rows in score error arrays * @param numColumns - number columns in score error arrays * @return none. */ void CompareScores(float* ptrScoreArray, void* ptrRefScoreArray, score_error_t* scoreError, uint32_t numRows, uint32_t numColumns) { uint32_t numErrors = 0; ClearScoreError(scoreError); float* A = ptrScoreArray; float* B = reinterpret_cast(ptrRefScoreArray); for (uint32_t i = 0; i < numRows; i++) { for (uint32_t j = 0; j < numColumns; j++) { float score = A[i * numColumns + j]; float refscore = B[i * numColumns + j]; float error = fabs(refscore - score); float rel_error = error / (static_cast(fabs(refscore)) + 1e-20f); float squared_error = error * error; float squared_rel_error = rel_error * rel_error; scoreError->numScores++; scoreError->sumError += error; scoreError->sumSquaredError += squared_error; if (error > scoreError->maxError) { scoreError->maxError = error; } scoreError->sumRelError += rel_error; scoreError->sumSquaredRelError += squared_rel_error; if (rel_error > scoreError->maxRelError) { scoreError->maxRelError = rel_error; } if (error > scoreError->threshold) { numErrors++; } } } scoreError->rmsError = sqrt(scoreError->sumSquaredError / (numRows * numColumns)); scoreError->sumRmsError += scoreError->rmsError; scoreError->numErrors = numErrors; } /** * @brief Get total stdev error * @param error pointer to score error struct * @return error */ float StdDevError(score_error_t error) { return (sqrt(error.sumSquaredError / error.numScores - (error.sumError / error.numScores) * (error.sumError / error.numScores))); } #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) # ifdef _WIN32 # include # include # else # include # endif inline void native_cpuid(unsigned int* eax, unsigned int* ebx, unsigned int* ecx, unsigned int* edx) { size_t level = *eax; # ifdef _WIN32 int regs[4] = {static_cast(*eax), static_cast(*ebx), static_cast(*ecx), static_cast(*edx)}; __cpuid(regs, level); *eax = static_cast(regs[0]); *ebx = static_cast(regs[1]); *ecx = static_cast(regs[2]); *edx = static_cast(regs[3]); # else __get_cpuid(level, eax, ebx, ecx, edx); # endif } /** * @brief Get GNA module frequency * @return GNA module frequency in MHz */ float getGnaFrequencyMHz() { uint32_t eax = 1; uint32_t ebx = 0; uint32_t ecx = 0; uint32_t edx = 0; uint32_t family = 0; uint32_t model = 0; const uint8_t sixth_family = 6; const uint8_t cannon_lake_model = 102; const uint8_t gemini_lake_model = 122; const uint8_t ice_lake_model = 126; const uint8_t tgl_model = 140; const uint8_t next_model = 151; native_cpuid(&eax, &ebx, &ecx, &edx); family = (eax >> 8) & 0xF; // model is the concatenation of two fields // | extended model | model | // copy extended model data model = (eax >> 16) & 0xF; // shift model <<= 4; // copy model data model += (eax >> 4) & 0xF; if (family == sixth_family) { switch (model) { case cannon_lake_model: case ice_lake_model: case tgl_model: case next_model: return 400; case gemini_lake_model: return 200; default: return 1; } } else { // counters not supported and we returns just default value return 1; } } #endif // if not ARM /** * @brief Print a report on the statistical score error * @param totalError reference to a total score error struct * @param framesNum number of frames in utterance * @param stream output stream * @return none. */ void printReferenceCompareResults(score_error_t const& totalError, size_t framesNum, std::ostream& stream) { stream << " max error: " << totalError.maxError << std::endl; stream << " avg error: " << totalError.sumError / totalError.numScores << std::endl; stream << " avg rms error: " << totalError.sumRmsError / framesNum << std::endl; stream << " stdev error: " << StdDevError(totalError) << std::endl << std::endl; stream << std::endl; } /** * @brief Print a report on the performance counts * @param utterancePerfMap reference to a map to store performance counters * @param numberOfFrames number of frames * @param stream output stream * @param fullDeviceName full device name string * @param numberOfFramesOnHw number of frames delivered to GNA HW * @return none. */ void printPerformanceCounters( std::map const& utterancePerfMap, size_t numberOfFrames, std::ostream& stream, std::string fullDeviceName, const uint64_t numberOfFramesOnHw) { #if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) && !defined(_M_ARM64) if (numberOfFrames == 0) throw std::runtime_error("printPerformanceCounters: numberOfFrames=0, incorrect input"); std::ios::fmtflags fmt(std::cout.flags()); stream << std::endl << "Performance counts:" << std::endl; stream << std::setw(10) << std::right << "" << "Counter descriptions"; stream << std::setw(22) << "Utt scoring time"; stream << std::setw(18) << "Avg infer time"; stream << std::endl; stream << std::setw(46) << "(ms)"; stream << std::setw(24) << "(us per call)"; stream << std::endl; // if GNA HW counters // get frequency of GNA module float freq = getGnaFrequencyMHz(); for (const auto& it : utterancePerfMap) { std::string const& counter_name = it.first; float current_units_us = static_cast(it.second.realTime_uSec) / freq; float call_units_us = current_units_us / numberOfFrames; if (FLAGS_d.find("GNA") != std::string::npos) { stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1); } else { stream << std::setw(30) << std::left << counter_name; } stream << std::setw(16) << std::right << current_units_us / 1000; stream << std::setw(21) << std::right << call_units_us; stream << std::endl; } stream << std::endl; std::cout << std::endl; std::cout << "Full device name: " << fullDeviceName << std::endl; std::cout << std::endl; stream << "Number of frames delivered to GNA HW: " << numberOfFramesOnHw; stream << "/" << numberOfFrames; stream << std::endl; std::cout.flags(fmt); #endif } /** * @brief Get performance counts * @param request reference to infer request * @param perfCounters reference to a map to save performance counters * @return none. */ void getPerformanceCounters(InferenceEngine::InferRequest& request, std::map& perfCounters) { auto retPerfCounters = request.GetPerformanceCounts(); for (const auto& pair : retPerfCounters) { perfCounters[pair.first] = pair.second; } } /** * @brief Summarize performance counts and total number of frames executed on the GNA HW device * @param perfCounters reference to a map to get performance counters * @param totalPerfCounters reference to a map to save total performance counters * @param totalRunsOnHw reference to a total number of frames computed on GNA HW * @return none. */ void sumPerformanceCounters(std::map const& perfCounters, std::map& totalPerfCounters, uint64_t& totalRunsOnHw) { auto runOnHw = false; for (const auto& pair : perfCounters) { totalPerfCounters[pair.first].realTime_uSec += pair.second.realTime_uSec; runOnHw |= pair.second.realTime_uSec > 0; // if realTime is above zero, that means that a primitive was executed on the device } totalRunsOnHw += runOnHw; } /** * @brief Parse scale factors * @param str reference to user-specified input scale factor for quantization, can be separated by comma * @return vector scale factors */ std::vector ParseScaleFactors(const std::string& str) { std::vector scaleFactorInput; if (!str.empty()) { std::string outStr; std::istringstream stream(str); int i = 0; while (getline(stream, outStr, ',')) { auto floatScaleFactor = std::stof(outStr); if (floatScaleFactor <= 0.0f) { throw std::logic_error("Scale factor for input #" + std::to_string(i) + " (counting from zero) is out of range (must be positive)."); } scaleFactorInput.push_back(outStr); i++; } } else { throw std::logic_error("Scale factor need to be specified via -sf option if you are using -q user"); } return scaleFactorInput; } /** * @brief Parse string of file names separated by comma to save it to vector of file names * @param str file names separated by comma * @return vector of file names */ std::vector ConvertStrToVector(std::string str) { std::vector blobName; if (!str.empty()) { size_t pos_last = 0; size_t pos_next = 0; while ((pos_next = str.find(",", pos_last)) != std::string::npos) { blobName.push_back(str.substr(pos_last, pos_next - pos_last)); pos_last = pos_next + 1; } blobName.push_back(str.substr(pos_last)); } return blobName; } /** * @brief Checks input arguments * @param argc number of args * @param argv list of input arguments * @return bool status true(Success) or false(Fail) */ bool ParseAndCheckCommandLine(int argc, char* argv[]) { slog::info << "Parsing input parameters" << slog::endl; gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { showUsage(); showAvailableDevices(); return false; } bool isDumpMode = !FLAGS_wg.empty() || !FLAGS_we.empty(); // input not required only in dump mode and if external scale factor provided if (FLAGS_i.empty() && (!isDumpMode || FLAGS_q.compare("user") != 0)) { showUsage(); if (isDumpMode) { throw std::logic_error("In model dump mode either static quantization is used (-i) or user scale" " factor need to be provided. See -q user option"); } throw std::logic_error("Input file not set. Please use -i."); } if (FLAGS_m.empty() && FLAGS_rg.empty()) { showUsage(); throw std::logic_error("Either IR file (-m) or GNAModel file (-rg) need to be set."); } if ((!FLAGS_m.empty() && !FLAGS_rg.empty())) { throw std::logic_error("Only one of -m and -rg is allowed."); } std::vector supportedDevices = {"CPU", "GPU", "GNA_AUTO", "GNA_HW", "GNA_HW_WITH_SW_FBACK", "GNA_SW_EXACT", "GNA_SW", "GNA_SW_FP32", "HETERO:GNA,CPU", "HETERO:GNA_HW,CPU", "HETERO:GNA_SW_EXACT,CPU", "HETERO:GNA_SW,CPU", "HETERO:GNA_SW_FP32,CPU", "MYRIAD"}; if (std::find(supportedDevices.begin(), supportedDevices.end(), FLAGS_d) == supportedDevices.end()) { throw std::logic_error("Specified device is not supported."); } uint32_t batchSize = (uint32_t)FLAGS_bs; if ((batchSize < 1) || (batchSize > 8)) { throw std::logic_error("Batch size out of range (1..8)."); } /** default is a static quantization **/ if ((FLAGS_q.compare("static") != 0) && (FLAGS_q.compare("dynamic") != 0) && (FLAGS_q.compare("user") != 0)) { throw std::logic_error("Quantization mode not supported (static, dynamic, user)."); } if (FLAGS_q.compare("dynamic") == 0) { throw std::logic_error("Dynamic quantization not yet supported."); } if (FLAGS_qb != 16 && FLAGS_qb != 8) { throw std::logic_error("Only 8 or 16 bits supported."); } if (FLAGS_nthreads <= 0) { throw std::logic_error("Invalid value for 'nthreads' argument. It must be greater that or equal to 0"); } if (FLAGS_cw_r < 0) { throw std::logic_error("Invalid value for 'cw_r' argument. It must be greater than or equal to 0"); } if (FLAGS_cw_l < 0) { throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0"); } if (FLAGS_pwl_me < 0.0 || FLAGS_pwl_me > 100.0) { throw std::logic_error("Invalid value for 'pwl_me' argument. It must be greater than 0.0 and less than 100.0"); } return true; } /** * @brief The entry point for inference engine automatic speech recognition sample * @file speech_sample/main.cpp * @example speech_sample/main.cpp */ int main(int argc, char* argv[]) { try { // ------------------------------ Get Inference Engine version // ------------------------------------------------------ slog::info << "InferenceEngine: " << GetInferenceEngineVersion() << slog::endl; // ------------------------------ Parsing and validation of input arguments --------------------------------- if (!ParseAndCheckCommandLine(argc, argv)) { return 0; } BaseFile* file; BaseFile* fileOutput; ArkFile arkFile; NumpyFile numpyFile; auto extInputFile = fileExt(FLAGS_i); if (extInputFile == "ark") { file = &arkFile; } else if (extInputFile == "npz") { file = &numpyFile; } else { throw std::logic_error("Invalid input file"); } std::vector inputFiles; std::vector numBytesThisUtterance; uint32_t numUtterances(0); if (!FLAGS_i.empty()) { std::string outStr; std::istringstream stream(FLAGS_i); uint32_t currentNumUtterances(0), currentNumBytesThisUtterance(0); while (getline(stream, outStr, ',')) { std::string filename(fileNameNoExt(outStr) + "." + extInputFile); inputFiles.push_back(filename); file->GetFileInfo(filename.c_str(), 0, ¤tNumUtterances, ¤tNumBytesThisUtterance); if (numUtterances == 0) { numUtterances = currentNumUtterances; } else if (currentNumUtterances != numUtterances) { throw std::logic_error( "Incorrect input files. Number of utterance must be the same for all input files"); } numBytesThisUtterance.push_back(currentNumBytesThisUtterance); } } size_t numInputFiles(inputFiles.size()); // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 1. Initialize inference engine core ------------------------------------- slog::info << "Loading Inference Engine" << slog::endl; Core ie; CNNNetwork network; ExecutableNetwork executableNet; // ------------------------------ Get Available Devices ------------------------------------------------------ auto isFeature = [&](const std::string xFeature) { return FLAGS_d.find(xFeature) != std::string::npos; }; bool useGna = isFeature("GNA"); bool useHetero = isFeature("HETERO"); std::string deviceStr = useHetero && useGna ? "HETERO:GNA,CPU" : FLAGS_d.substr(0, (FLAGS_d.find("_"))); slog::info << "Device info: " << slog::endl; slog::info << ie.GetVersions(deviceStr) << slog::endl; // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 2. Read a model in OpenVINO Intermediate Representation (.xml and .bin // files) slog::info << "Loading network files:" << slog::endl << FLAGS_m << slog::endl; uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs; if (!FLAGS_m.empty()) { /** Read network model **/ network = ie.ReadNetwork(FLAGS_m); CheckNumberOfInputs(network.getInputsInfo().size(), numInputFiles); // ------------------------------------------------------------------------------------------------- // --------------------------- Set batch size --------------------------------------------------- /** Set batch size. Unlike in imaging, batching in time (rather than space) is done for speech recognition. * **/ network.setBatchSize(batchSize); slog::info << "Batch size is " << std::to_string(network.getBatchSize()) << slog::endl; } // ----------------------------------------------------------------------------------------------------- // --------------------------- Set parameters and scale factors ------------------------------------- /** Setting parameter for per layer metrics **/ std::map gnaPluginConfig; std::map genericPluginConfig; if (useGna) { std::string gnaDevice = useHetero ? FLAGS_d.substr(FLAGS_d.find("GNA"), FLAGS_d.find(",") - FLAGS_d.find("GNA")) : FLAGS_d; gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] = gnaDevice.find("_") == std::string::npos ? "GNA_AUTO" : gnaDevice; } if (FLAGS_pc) { genericPluginConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES; } if (FLAGS_q.compare("user") == 0) { if (!FLAGS_rg.empty()) { slog::warn << "Custom scale factor will be used for imported gna model: " << FLAGS_rg << slog::endl; } auto scaleFactorInput = ParseScaleFactors(FLAGS_sf); if (numInputFiles != scaleFactorInput.size()) { std::string errMessage( "Incorrect command line for multiple inputs: " + std::to_string(scaleFactorInput.size()) + " scale factors provided for " + std::to_string(numInputFiles) + " input files."); throw std::logic_error(errMessage); } for (size_t i = 0; i < scaleFactorInput.size(); ++i) { slog::info << "For input " << i << " using scale factor of " << scaleFactorInput[i] << slog::endl; std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i); gnaPluginConfig[scaleFactorConfigKey] = scaleFactorInput[i]; } } else { // "static" quantization with calculated scale factor if (!FLAGS_rg.empty()) { slog::info << "Using scale factor from provided imported gna model: " << FLAGS_rg << slog::endl; } else { for (size_t i = 0; i < numInputFiles; i++) { auto inputFileName = inputFiles[i].c_str(); std::string name; std::vector ptrFeatures; uint32_t numArrays(0), numBytes(0), numFrames(0), numFrameElements(0), numBytesPerElement(0); file->GetFileInfo(inputFileName, 0, &numArrays, &numBytes); ptrFeatures.resize(numBytes); file->LoadFile(inputFileName, 0, name, ptrFeatures, &numFrames, &numFrameElements, &numBytesPerElement); auto floatScaleFactor = ScaleFactorForQuantization(ptrFeatures.data(), MAX_VAL_2B_FEAT, numFrames * numFrameElements); slog::info << "Using scale factor of " << floatScaleFactor << " calculated from first utterance." << slog::endl; std::string scaleFactorConfigKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_") + std::to_string(i); gnaPluginConfig[scaleFactorConfigKey] = std::to_string(floatScaleFactor); } } } if (FLAGS_qb == 8) { gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I8"; } else { gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16"; } gnaPluginConfig[GNAConfigParams::KEY_GNA_EXEC_TARGET] = FLAGS_exec_target; gnaPluginConfig[GNAConfigParams::KEY_GNA_COMPILE_TARGET] = FLAGS_compile_target; gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO); gnaPluginConfig[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(FLAGS_pwl_me); // ----------------------------------------------------------------------------------------------------- // --------------------------- Write model to file -------------------------------------------------- // Embedded GNA model dumping (for Intel(R) Speech Enabling Developer Kit) if (!FLAGS_we.empty()) { gnaPluginConfig[GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE] = FLAGS_we; gnaPluginConfig[GNAConfigParams::KEY_GNA_FIRMWARE_MODEL_IMAGE_GENERATION] = FLAGS_we_gen; } // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 4. Loading model to the device ------------------------------------------ if (useGna) { genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig)); } auto t0 = Time::now(); std::vector outputs; if (!FLAGS_oname.empty()) { std::vector output_names = ConvertStrToVector(FLAGS_oname); std::vector ports; for (const auto& outBlobName : output_names) { int pos_layer = outBlobName.rfind(":"); if (pos_layer == -1) { throw std::logic_error(std::string("Output ") + std::string(outBlobName) + std::string(" doesn't have a port")); } outputs.push_back(outBlobName.substr(0, pos_layer)); try { ports.push_back(std::stoi(outBlobName.substr(pos_layer + 1))); } catch (const std::exception&) { throw std::logic_error("Ports should have integer type"); } } if (!FLAGS_m.empty()) { for (size_t i = 0; i < outputs.size(); i++) { network.addOutput(outputs[i], ports[i]); } } } if (!FLAGS_m.empty()) { slog::info << "Loading model to the device" << slog::endl; executableNet = ie.LoadNetwork(network, deviceStr, genericPluginConfig); } else { slog::info << "Importing model to the device" << slog::endl; executableNet = ie.ImportNetwork(FLAGS_rg.c_str(), deviceStr, genericPluginConfig); } ms loadTime = std::chrono::duration_cast(Time::now() - t0); slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl; // --------------------------- Exporting gna model using InferenceEngine AOT API--------------------- if (!FLAGS_wg.empty()) { slog::info << "Writing GNA Model to file " << FLAGS_wg << slog::endl; t0 = Time::now(); executableNet.Export(FLAGS_wg); ms exportTime = std::chrono::duration_cast(Time::now() - t0); slog::info << "Exporting time " << exportTime.count() << " ms" << slog::endl; return 0; } if (!FLAGS_we.empty()) { slog::info << "Exported GNA embedded model to file " << FLAGS_we << slog::endl; if (!FLAGS_we_gen.empty()) { slog::info << "GNA embedded model export done for GNA generation: " << FLAGS_we_gen << slog::endl; } return 0; } // --------------------------------------------------------------------------------------------------------- // --------------------------- Step 5. Create infer request -------------------------------------------------- std::vector inferRequests((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads); for (auto& inferRequest : inferRequests) { inferRequest = {executableNet.CreateInferRequest(), -1, batchSize}; } // --------------------------------------------------------------------------------------------------------- // --------------------------- Step 3. Configure input & output // -------------------------------------------------- This step executed after creating infer request to check // input/output layers mentioned via -iname and -oname args // --------------------------- Prepare input blobs ----------------------------------------------------- /** Taking information about all topology inputs **/ ConstInputsDataMap cInputInfo = executableNet.GetInputsInfo(); CheckNumberOfInputs(cInputInfo.size(), numInputFiles); /** Stores all input blobs data **/ std::vector ptrInputBlobs; if (!FLAGS_iname.empty()) { std::vector inputNameBlobs = ConvertStrToVector(FLAGS_iname); if (inputNameBlobs.size() != cInputInfo.size()) { std::string errMessage(std::string("Number of network inputs ( ") + std::to_string(cInputInfo.size()) + " ) is not equal to the number of inputs entered in the -iname argument ( " + std::to_string(inputNameBlobs.size()) + " )."); throw std::logic_error(errMessage); } for (const auto& input : inputNameBlobs) { Blob::Ptr blob = inferRequests.begin()->inferRequest.GetBlob(input); if (!blob) { std::string errMessage("No blob with name : " + input); throw std::logic_error(errMessage); } ptrInputBlobs.push_back(blob); } } else { for (const auto& input : cInputInfo) { ptrInputBlobs.push_back(inferRequests.begin()->inferRequest.GetBlob(input.first)); } } InputsDataMap inputInfo; if (!FLAGS_m.empty()) { inputInfo = network.getInputsInfo(); } /** Configure input precision if model is loaded from IR **/ for (auto& item : inputInfo) { Precision inputPrecision = Precision::FP32; // specify Precision::I16 to provide quantized inputs item.second->setPrecision(inputPrecision); } // --------------------------------------------------------------------- // ------------------------------ Prepare output blobs ------------------------------------------------- ConstOutputsDataMap cOutputInfo(executableNet.GetOutputsInfo()); OutputsDataMap outputInfo; if (!FLAGS_m.empty()) { outputInfo = network.getOutputsInfo(); } std::vector ptrOutputBlob; if (!outputs.empty()) { for (const auto& output : outputs) { Blob::Ptr blob = inferRequests.begin()->inferRequest.GetBlob(output); if (!blob) { std::string errMessage("No blob with name : " + output); throw std::logic_error(errMessage); } ptrOutputBlob.push_back(blob); } } else { for (auto& output : cOutputInfo) { ptrOutputBlob.push_back(inferRequests.begin()->inferRequest.GetBlob(output.first)); } } for (auto& item : outputInfo) { DataPtr outData = item.second; if (!outData) { throw std::logic_error("output data pointer is not valid"); } Precision outputPrecision = Precision::FP32; // specify Precision::I32 to retrieve quantized outputs outData->setPrecision(outputPrecision); } std::vector output_name_files; std::vector reference_name_files; size_t count_file = 1; if (!FLAGS_o.empty()) { output_name_files = ConvertStrToVector(FLAGS_o); if (output_name_files.size() != outputs.size() && !outputs.empty()) { throw std::logic_error("The number of output files is not equal to the number of network outputs."); } count_file = output_name_files.empty() ? 1 : output_name_files.size(); } if (!FLAGS_r.empty()) { reference_name_files = ConvertStrToVector(FLAGS_r); if (reference_name_files.size() != outputs.size() && !outputs.empty()) { throw std::logic_error("The number of reference files is not equal to the number of network outputs."); } count_file = reference_name_files.empty() ? 1 : reference_name_files.size(); } // --------------------------------------------------------------------- // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 7. Do inference -------------------------------------------------------- for (size_t next_output = 0; next_output < count_file; next_output++) { std::vector> ptrUtterances; std::vector ptrScores; std::vector ptrReferenceScores; score_error_t frameError, totalError; ptrUtterances.resize(inputFiles.size()); // initialize memory state before starting for (auto&& state : inferRequests.begin()->inferRequest.QueryState()) { state.Reset(); } /** Work with each utterance **/ for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) { std::map utterancePerfMap; uint64_t totalNumberOfRunsOnHw = 0; std::string uttName; uint32_t numFrames(0), n(0); std::vector numFrameElementsInput; uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0), numBytesReferenceScoreThisUtterance(0); auto dims = outputs.empty() ? cOutputInfo.rbegin()->second->getDims() : cOutputInfo[outputs[next_output]]->getDims(); const auto numScoresPerFrame = std::accumulate(std::begin(dims), std::end(dims), size_t{1}, std::multiplies()); slog::info << "Number scores per frame : " << numScoresPerFrame << slog::endl; /** Get information from input file for current utterance **/ numFrameElementsInput.resize(numInputFiles); for (size_t i = 0; i < inputFiles.size(); i++) { std::vector ptrUtterance; auto inputFilename = inputFiles[i].c_str(); uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0); file->GetFileInfo(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]); ptrUtterance.resize(numBytesThisUtterance[i]); file->LoadFile(inputFilename, utteranceIndex, uttName, ptrUtterance, ¤tNumFrames, ¤tNumFrameElementsInput, ¤tNumBytesPerElementInput); if (numFrames == 0) { numFrames = currentNumFrames; } else if (numFrames != currentNumFrames) { std::string errMessage("Number of frames in input files is different: " + std::to_string(numFrames) + " and " + std::to_string(currentNumFrames)); throw std::logic_error(errMessage); } ptrUtterances[i] = ptrUtterance; numFrameElementsInput[i] = currentNumFrameElementsInput; } int i = 0; for (auto& ptrInputBlob : ptrInputBlobs) { if (ptrInputBlob->size() != numFrameElementsInput[i++] * batchSize) { throw std::logic_error("network input size(" + std::to_string(ptrInputBlob->size()) + ") mismatch to input file size (" + std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")"); } } ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float)); if (!FLAGS_r.empty()) { /** Read file with reference scores **/ BaseFile* fileReferenceScores; auto exReferenceScoresFile = fileExt(FLAGS_r); if (exReferenceScoresFile == "ark") { fileReferenceScores = &arkFile; } else if (exReferenceScoresFile == "npz") { fileReferenceScores = &numpyFile; } else { throw std::logic_error("Invalid Reference Scores file"); } std::string refUtteranceName; fileReferenceScores->GetFileInfo(reference_name_files[next_output].c_str(), utteranceIndex, &n, &numBytesReferenceScoreThisUtterance); ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance); fileReferenceScores->LoadFile(reference_name_files[next_output].c_str(), utteranceIndex, refUtteranceName, ptrReferenceScores, &numFramesReference, &numFrameElementsReference, &numBytesPerElementReference); } double totalTime = 0.0; std::cout << "Utterance " << utteranceIndex << ": " << std::endl; ClearScoreError(&totalError); totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE; auto outputFrame = &ptrScores.front(); std::vector inputFrame; for (auto& ut : ptrUtterances) { inputFrame.push_back(&ut.front()); } std::map callPerfMap; size_t frameIndex = 0; uint32_t numFramesFile = numFrames; numFrames += FLAGS_cw_l + FLAGS_cw_r; uint32_t numFramesThisBatch{batchSize}; auto t0 = Time::now(); auto t1 = t0; while (frameIndex <= numFrames) { if (frameIndex == numFrames) { if (std::find_if(inferRequests.begin(), inferRequests.end(), [&](InferRequestStruct x) { return (x.frameIndex != -1); }) == inferRequests.end()) { break; } } bool inferRequestFetched = false; /** Start inference loop **/ for (auto& inferRequest : inferRequests) { if (frameIndex == numFrames) { numFramesThisBatch = 1; } else { numFramesThisBatch = (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize; } /* waits until inference result becomes available */ if (inferRequest.frameIndex != -1) { StatusCode code = inferRequest.inferRequest.Wait(InferenceEngine::InferRequest::WaitMode::RESULT_READY); if (code != StatusCode::OK) { if (!useHetero) continue; if (code != StatusCode::INFER_NOT_STARTED) continue; } // --------------------------- Step 8. Process output part 1 // ------------------------------------------------------- ConstOutputsDataMap newOutputInfo; if (inferRequest.frameIndex >= 0) { if (!FLAGS_o.empty()) { /* Prepare output data for save to file in future */ outputFrame = &ptrScores.front() + numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex); if (!outputs.empty()) { newOutputInfo[outputs[next_output]] = cOutputInfo[outputs[next_output]]; } else { newOutputInfo = cOutputInfo; } Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(newOutputInfo.rbegin()->first); MemoryBlob::CPtr moutput = as(outputBlob); if (!moutput) { throw std::logic_error( "We expect output to be inherited from MemoryBlob, " "but in fact we were not able to cast output to MemoryBlob"); } // locked memory holder should be alive all time while access to its buffer happens auto moutputHolder = moutput->rmap(); auto byteSize = numScoresPerFrame * sizeof(float); std::memcpy(outputFrame, moutputHolder.as(), byteSize); } if (!FLAGS_r.empty()) { /** Compare output data with reference scores **/ if (!outputs.empty()) { newOutputInfo[outputs[next_output]] = cOutputInfo[outputs[next_output]]; } else { newOutputInfo = cOutputInfo; } Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(newOutputInfo.rbegin()->first); MemoryBlob::CPtr moutput = as(outputBlob); if (!moutput) { throw std::logic_error( "We expect output to be inherited from MemoryBlob, " "but in fact we were not able to cast output to MemoryBlob"); } // locked memory holder should be alive all time while access to its buffer happens auto moutputHolder = moutput->rmap(); CompareScores( moutputHolder.as(), &ptrReferenceScores[inferRequest.frameIndex * numFrameElementsReference * numBytesPerElementReference], &frameError, inferRequest.numFramesThisBatch, numFrameElementsReference); UpdateScoreError(&frameError, &totalError); } if (FLAGS_pc) { // retrieve new counters getPerformanceCounters(inferRequest.inferRequest, callPerfMap); // summarize retrieved counters with all previous sumPerformanceCounters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw); } } // ----------------------------------------------------------------------------------------------------- } if (frameIndex == numFrames) { inferRequest.frameIndex = -1; continue; } // --------------------------- Step 6. Prepare input // -------------------------------------------------------- ptrInputBlobs.clear(); if (FLAGS_iname.empty()) { for (auto& input : cInputInfo) { ptrInputBlobs.push_back(inferRequest.inferRequest.GetBlob(input.first)); } } else { std::vector inputNameBlobs = ConvertStrToVector(FLAGS_iname); for (const auto& input : inputNameBlobs) { Blob::Ptr blob = inferRequests.begin()->inferRequest.GetBlob(input); if (!blob) { std::string errMessage("No blob with name : " + input); throw std::logic_error(errMessage); } ptrInputBlobs.push_back(blob); } } /** Iterate over all the input blobs **/ for (size_t i = 0; i < numInputFiles; ++i) { MemoryBlob::Ptr minput = as(ptrInputBlobs[i]); if (!minput) { std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) + "] to be inherited from MemoryBlob, " + "but in fact we were not able to cast input blob to MemoryBlob"); throw std::logic_error(errMessage); } // locked memory holder should be alive all time while access to its buffer happens auto minputHolder = minput->wmap(); std::memcpy(minputHolder.as(), inputFrame[i], minput->byteSize()); } // ----------------------------------------------------------------------------------------------------- int index = static_cast(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r); /* Starting inference in asynchronous mode*/ inferRequest.inferRequest.StartAsync(); inferRequest.frameIndex = index < 0 ? -2 : index; inferRequest.numFramesThisBatch = numFramesThisBatch; frameIndex += numFramesThisBatch; for (size_t j = 0; j < inputFiles.size(); j++) { if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) { int idx = frameIndex - FLAGS_cw_l; if (idx > 0 && idx < static_cast(numFramesFile)) { inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; } else if (idx >= static_cast(numFramesFile)) { inputFrame[j] = &ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; } else if (idx <= 0) { inputFrame[j] = &ptrUtterances[j].front(); } } else { inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch; } } inferRequestFetched |= true; } /** Inference was finished for current frame **/ if (!inferRequestFetched) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); continue; } } t1 = Time::now(); fsec fs = t1 - t0; ms d = std::chrono::duration_cast(fs); totalTime += d.count(); // resetting state between utterances for (auto&& state : inferRequests.begin()->inferRequest.QueryState()) { state.Reset(); } // ----------------------------------------------------------------------------------------------------- // --------------------------- Step 8. Process output part 2 // ------------------------------------------------------- if (!FLAGS_o.empty()) { auto exOutputScoresFile = fileExt(FLAGS_o); if (exOutputScoresFile == "ark") { fileOutput = &arkFile; } else if (exOutputScoresFile == "npz") { fileOutput = &numpyFile; } else { throw std::logic_error("Invalid Reference Scores file"); } /* Save output data to file */ bool shouldAppend = (utteranceIndex == 0) ? false : true; fileOutput->SaveFile(output_name_files[next_output].c_str(), shouldAppend, uttName, &ptrScores.front(), numFramesFile, numScoresPerFrame); } /** Show performance results **/ std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" << std::endl; std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" << std::endl; std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast(numFrames) << " ms" << std::endl; if (FLAGS_pc) { // print performance results printPerformanceCounters(utterancePerfMap, frameIndex, std::cout, getFullDeviceName(ie, FLAGS_d), totalNumberOfRunsOnHw); } if (!FLAGS_r.empty()) { // print statistical score error printReferenceCompareResults(totalError, numFrames, std::cout); } std::cout << "End of Utterance " << utteranceIndex << std::endl << std::endl; // ----------------------------------------------------------------------------------------------------- } } // ----------------------------------------------------------------------------------------------------- } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; } catch (...) { slog::err << "Unknown/internal exception happened" << slog::endl; return 1; } slog::info << "Execution successful" << slog::endl; return 0; }