[CPP Speech Sample] Improve -o and -oname flags (#10321)
* Improve `-o` and `-oname` flags
* Apply clang-format tool
* fix saving output files
* Apply clang-format
* Fix error when `-oname` not specified
* apply clang format
* Fix error `-oname`
* Use output name with port to find model output
* fix comment line breaking
* fix comparison with reference for multiple outputs
* Fix output name printing error
* try to fix clang format
* fix problem with bs > 1
* minimal change to rerun test pipeline
* clang format
* Revert "Fix error `-oname`"
This reverts commit c33d5f16e8.
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
# Automatic Speech Recognition C++ Sample {#openvino_inference_engine_samples_speech_sample_README}
|
||||
|
||||
This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors.
|
||||
This sample demonstrates how to execute an Asynchronous Inference of acoustic model based on Kaldi\* neural networks and speech feature vectors.
|
||||
|
||||
The sample works with Kaldi ARK or Numpy* uncompressed NPZ files, so it does not cover an end-to-end speech recognition scenario (speech to text), requiring additional preprocessing (feature extraction) to get a feature vector from a speech signal, as well as postprocessing (decoding) to produce text from scores.
|
||||
|
||||
|
||||
@@ -86,10 +86,11 @@ int main(int argc, char* argv[]) {
|
||||
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0 || !FLAGS_bs) ? 1 : (uint32_t)FLAGS_bs;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
std::vector<std::string> outputs;
|
||||
std::vector<std::string> output_names;
|
||||
std::vector<size_t> ports;
|
||||
// --------------------------- Processing custom outputs ---------------------------------------------
|
||||
if (!FLAGS_oname.empty()) {
|
||||
std::vector<std::string> output_names = convert_str_to_vector(FLAGS_oname);
|
||||
output_names = convert_str_to_vector(FLAGS_oname);
|
||||
for (const auto& output_name : output_names) {
|
||||
auto pos_layer = output_name.rfind(":");
|
||||
if (pos_layer == std::string::npos) {
|
||||
@@ -248,10 +249,9 @@ int main(int argc, char* argv[]) {
|
||||
auto t0 = Time::now();
|
||||
ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
|
||||
slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
|
||||
slog::info << "Loading model to the device " << FLAGS_d << slog::endl;
|
||||
ov::CompiledModel executableNet;
|
||||
if (!FLAGS_m.empty()) {
|
||||
slog::info << "Loading model to the device" << slog::endl;
|
||||
slog::info << "Loading model to the device " << FLAGS_d << slog::endl;
|
||||
executableNet = core.compile_model(model, deviceStr, genericPluginConfig);
|
||||
} else {
|
||||
slog::info << "Importing model to the device" << slog::endl;
|
||||
@@ -344,157 +344,184 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
// --------------------------- Step 5. Do inference --------------------------------------------------------
|
||||
for (size_t next_output = 0; next_output < count_file; next_output++) {
|
||||
std::vector<std::vector<uint8_t>> ptrUtterances;
|
||||
std::vector<uint8_t> ptrScores;
|
||||
std::vector<uint8_t> ptrReferenceScores;
|
||||
ScoreErrorT frameError, totalError;
|
||||
ptrUtterances.resize(inputFiles.size());
|
||||
// initialize memory state before starting
|
||||
for (auto&& state : inferRequests.begin()->inferRequest.query_state()) {
|
||||
state.reset();
|
||||
}
|
||||
/** Work with each utterance **/
|
||||
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
|
||||
std::map<std::string, ov::ProfilingInfo> utterancePerfMap;
|
||||
uint64_t totalNumberOfRunsOnHw = 0;
|
||||
std::string uttName;
|
||||
uint32_t numFrames(0), n(0);
|
||||
std::vector<uint32_t> numFrameElementsInput;
|
||||
uint32_t numFramesReference(0), numFrameElementsReference(0), numBytesPerElementReference(0),
|
||||
numBytesReferenceScoreThisUtterance(0);
|
||||
auto dims = executableNet.outputs()[0].get_shape();
|
||||
const auto numScoresPerFrame =
|
||||
std::accumulate(std::begin(dims), std::end(dims), size_t{1}, std::multiplies<size_t>());
|
||||
slog::info << "Number scores per frame : " << numScoresPerFrame << slog::endl;
|
||||
/** Get information from input file for current utterance **/
|
||||
numFrameElementsInput.resize(numInputFiles);
|
||||
for (size_t i = 0; i < inputFiles.size(); i++) {
|
||||
std::vector<uint8_t> ptrUtterance;
|
||||
auto inputFilename = inputFiles[i].c_str();
|
||||
uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0);
|
||||
file->get_file_info(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]);
|
||||
ptrUtterance.resize(numBytesThisUtterance[i]);
|
||||
file->load_file(inputFilename,
|
||||
utteranceIndex,
|
||||
uttName,
|
||||
ptrUtterance,
|
||||
¤tNumFrames,
|
||||
¤tNumFrameElementsInput,
|
||||
¤tNumBytesPerElementInput);
|
||||
if (numFrames == 0) {
|
||||
numFrames = currentNumFrames;
|
||||
} else if (numFrames != currentNumFrames) {
|
||||
std::string errMessage("Number of frames in input files is different: " +
|
||||
std::to_string(numFrames) + " and " + std::to_string(currentNumFrames));
|
||||
throw std::logic_error(errMessage);
|
||||
}
|
||||
ptrUtterances[i] = ptrUtterance;
|
||||
numFrameElementsInput[i] = currentNumFrameElementsInput;
|
||||
}
|
||||
int i = 0;
|
||||
for (auto& ptrInputBlob : ptrInputBlobs) {
|
||||
if (ptrInputBlob.get_size() != numFrameElementsInput[i++] * batchSize) {
|
||||
throw std::logic_error("network input size(" + std::to_string(ptrInputBlob.get_size()) +
|
||||
") mismatch to input file size (" +
|
||||
std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")");
|
||||
}
|
||||
}
|
||||
ptrScores.resize(numFrames * numScoresPerFrame * sizeof(float));
|
||||
if (!FLAGS_r.empty()) {
|
||||
/** Read file with reference scores **/
|
||||
BaseFile* fileReferenceScores;
|
||||
auto exReferenceScoresFile = fileExt(FLAGS_r);
|
||||
if (exReferenceScoresFile == "ark") {
|
||||
fileReferenceScores = &arkFile;
|
||||
} else if (exReferenceScoresFile == "npz") {
|
||||
fileReferenceScores = &numpyFile;
|
||||
} else {
|
||||
throw std::logic_error("Invalid Reference Scores file");
|
||||
}
|
||||
std::string refUtteranceName;
|
||||
fileReferenceScores->get_file_info(reference_name_files[next_output].c_str(),
|
||||
utteranceIndex,
|
||||
&n,
|
||||
&numBytesReferenceScoreThisUtterance);
|
||||
ptrReferenceScores.resize(numBytesReferenceScoreThisUtterance);
|
||||
fileReferenceScores->load_file(reference_name_files[next_output].c_str(),
|
||||
utteranceIndex,
|
||||
refUtteranceName,
|
||||
ptrReferenceScores,
|
||||
&numFramesReference,
|
||||
&numFrameElementsReference,
|
||||
&numBytesPerElementReference);
|
||||
}
|
||||
double totalTime = 0.0;
|
||||
std::cout << "Utterance " << utteranceIndex << ": " << std::endl;
|
||||
clear_score_error(&totalError);
|
||||
totalError.threshold = frameError.threshold = MAX_SCORE_DIFFERENCE;
|
||||
auto outputFrame = &ptrScores.front();
|
||||
std::vector<uint8_t*> inputFrame;
|
||||
for (auto& ut : ptrUtterances) {
|
||||
inputFrame.push_back(&ut.front());
|
||||
}
|
||||
std::map<std::string, ov::ProfilingInfo> callPerfMap;
|
||||
size_t frameIndex = 0;
|
||||
uint32_t numFramesFile = numFrames;
|
||||
numFrames += FLAGS_cw_l + FLAGS_cw_r;
|
||||
uint32_t numFramesThisBatch{batchSize};
|
||||
auto t0 = Time::now();
|
||||
auto t1 = t0;
|
||||
while (frameIndex <= numFrames) {
|
||||
if (frameIndex == numFrames) {
|
||||
if (std::find_if(inferRequests.begin(), inferRequests.end(), [&](InferRequestStruct x) {
|
||||
return (x.frameIndex != -1);
|
||||
}) == inferRequests.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
bool inferRequestFetched = false;
|
||||
/** Start inference loop **/
|
||||
for (auto& inferRequest : inferRequests) {
|
||||
if (frameIndex == numFrames) {
|
||||
numFramesThisBatch = 1;
|
||||
} else {
|
||||
numFramesThisBatch =
|
||||
(numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize;
|
||||
}
|
||||
std::vector<std::vector<uint8_t>> ptrUtterances;
|
||||
std::vector<std::vector<uint8_t>> vectorPtrScores((outputs.size() == 0) ? 1 : outputs.size());
|
||||
std::vector<uint16_t> numScoresPerOutput((outputs.size() == 0) ? 1 : outputs.size());
|
||||
std::vector<std::vector<uint8_t>> vectorPtrReferenceScores(reference_name_files.size());
|
||||
std::vector<ScoreErrorT> vectorFrameError(reference_name_files.size()),
|
||||
vectorTotalError(reference_name_files.size());
|
||||
ptrUtterances.resize(inputFiles.size());
|
||||
// initialize memory state before starting
|
||||
for (auto&& state : inferRequests.begin()->inferRequest.query_state()) {
|
||||
state.reset();
|
||||
}
|
||||
/** Work with each utterance **/
|
||||
for (uint32_t utteranceIndex = 0; utteranceIndex < numUtterances; ++utteranceIndex) {
|
||||
std::map<std::string, ov::ProfilingInfo> utterancePerfMap;
|
||||
uint64_t totalNumberOfRunsOnHw = 0;
|
||||
std::string uttName;
|
||||
uint32_t numFrames(0), n(0);
|
||||
std::vector<uint32_t> numFrameElementsInput;
|
||||
std::vector<uint32_t> numFramesReference(reference_name_files.size()),
|
||||
numFrameElementsReference(reference_name_files.size()),
|
||||
numBytesPerElementReference(reference_name_files.size()),
|
||||
numBytesReferenceScoreThisUtterance(reference_name_files.size());
|
||||
|
||||
/** Get information from input file for current utterance **/
|
||||
numFrameElementsInput.resize(numInputFiles);
|
||||
for (size_t i = 0; i < inputFiles.size(); i++) {
|
||||
std::vector<uint8_t> ptrUtterance;
|
||||
auto inputFilename = inputFiles[i].c_str();
|
||||
uint32_t currentNumFrames(0), currentNumFrameElementsInput(0), currentNumBytesPerElementInput(0);
|
||||
file->get_file_info(inputFilename, utteranceIndex, &n, &numBytesThisUtterance[i]);
|
||||
ptrUtterance.resize(numBytesThisUtterance[i]);
|
||||
file->load_file(inputFilename,
|
||||
utteranceIndex,
|
||||
uttName,
|
||||
ptrUtterance,
|
||||
¤tNumFrames,
|
||||
¤tNumFrameElementsInput,
|
||||
¤tNumBytesPerElementInput);
|
||||
if (numFrames == 0) {
|
||||
numFrames = currentNumFrames;
|
||||
} else if (numFrames != currentNumFrames) {
|
||||
std::string errMessage("Number of frames in input files is different: " +
|
||||
std::to_string(numFrames) + " and " + std::to_string(currentNumFrames));
|
||||
throw std::logic_error(errMessage);
|
||||
}
|
||||
ptrUtterances[i] = ptrUtterance;
|
||||
numFrameElementsInput[i] = currentNumFrameElementsInput;
|
||||
}
|
||||
int i = 0;
|
||||
for (auto& ptrInputBlob : ptrInputBlobs) {
|
||||
if (ptrInputBlob.get_size() != numFrameElementsInput[i++] * batchSize) {
|
||||
throw std::logic_error("network input size(" + std::to_string(ptrInputBlob.get_size()) +
|
||||
") mismatch to input file size (" +
|
||||
std::to_string(numFrameElementsInput[i - 1] * batchSize) + ")");
|
||||
}
|
||||
}
|
||||
|
||||
double totalTime = 0.0;
|
||||
|
||||
for (size_t errorIndex = 0; errorIndex < vectorFrameError.size(); errorIndex++) {
|
||||
clear_score_error(&vectorTotalError[errorIndex]);
|
||||
vectorTotalError[errorIndex].threshold = vectorFrameError[errorIndex].threshold = MAX_SCORE_DIFFERENCE;
|
||||
}
|
||||
|
||||
std::vector<uint8_t*> inputFrame;
|
||||
for (auto& ut : ptrUtterances) {
|
||||
inputFrame.push_back(&ut.front());
|
||||
}
|
||||
std::map<std::string, ov::ProfilingInfo> callPerfMap;
|
||||
size_t frameIndex = 0;
|
||||
uint32_t numFramesFile = numFrames;
|
||||
numFrames += FLAGS_cw_l + FLAGS_cw_r;
|
||||
uint32_t numFramesThisBatch{batchSize};
|
||||
auto t0 = Time::now();
|
||||
auto t1 = t0;
|
||||
|
||||
BaseFile* fileReferenceScores;
|
||||
std::string refUtteranceName;
|
||||
|
||||
if (!FLAGS_r.empty()) {
|
||||
/** Read file with reference scores **/
|
||||
auto exReferenceScoresFile = fileExt(FLAGS_r);
|
||||
if (exReferenceScoresFile == "ark") {
|
||||
fileReferenceScores = &arkFile;
|
||||
} else if (exReferenceScoresFile == "npz") {
|
||||
fileReferenceScores = &numpyFile;
|
||||
} else {
|
||||
throw std::logic_error("Invalid Reference Scores file");
|
||||
}
|
||||
for (size_t next_output = 0; next_output < count_file; next_output++) {
|
||||
if (fileReferenceScores != nullptr) {
|
||||
fileReferenceScores->get_file_info(reference_name_files[next_output].c_str(),
|
||||
utteranceIndex,
|
||||
&n,
|
||||
&numBytesReferenceScoreThisUtterance[next_output]);
|
||||
vectorPtrReferenceScores[next_output].resize(numBytesReferenceScoreThisUtterance[next_output]);
|
||||
fileReferenceScores->load_file(reference_name_files[next_output].c_str(),
|
||||
utteranceIndex,
|
||||
refUtteranceName,
|
||||
vectorPtrReferenceScores[next_output],
|
||||
&numFramesReference[next_output],
|
||||
&numFrameElementsReference[next_output],
|
||||
&numBytesPerElementReference[next_output]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (frameIndex <= numFrames) {
|
||||
if (frameIndex == numFrames) {
|
||||
if (std::find_if(inferRequests.begin(), inferRequests.end(), [&](InferRequestStruct x) {
|
||||
return (x.frameIndex != -1);
|
||||
}) == inferRequests.end()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
bool inferRequestFetched = false;
|
||||
/** Start inference loop **/
|
||||
for (auto& inferRequest : inferRequests) {
|
||||
if (frameIndex == numFrames) {
|
||||
numFramesThisBatch = 1;
|
||||
} else {
|
||||
numFramesThisBatch =
|
||||
(numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize;
|
||||
}
|
||||
|
||||
/* waits until inference result becomes available */
|
||||
if (inferRequest.frameIndex != -1) {
|
||||
inferRequest.inferRequest.wait();
|
||||
if (inferRequest.frameIndex >= 0)
|
||||
for (size_t next_output = 0; next_output < count_file; next_output++) {
|
||||
std::string outputName = (outputs.size() == 0) ? executableNet.output(0).get_any_name()
|
||||
: output_names[next_output];
|
||||
auto dims = executableNet.output(outputName).get_shape();
|
||||
numScoresPerOutput[next_output] = std::accumulate(std::begin(dims),
|
||||
std::end(dims),
|
||||
size_t{1},
|
||||
std::multiplies<size_t>());
|
||||
|
||||
vectorPtrScores[next_output].resize(numFramesFile * numScoresPerOutput[next_output] *
|
||||
sizeof(float));
|
||||
|
||||
/* waits until inference result becomes available */
|
||||
if (inferRequest.frameIndex != -1) {
|
||||
inferRequest.inferRequest.wait();
|
||||
if (inferRequest.frameIndex >= 0) {
|
||||
if (!FLAGS_o.empty()) {
|
||||
/* Prepare output data for save to file in future */
|
||||
outputFrame = &ptrScores.front() +
|
||||
numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex);
|
||||
auto outputFrame =
|
||||
&vectorPtrScores[next_output].front() +
|
||||
numScoresPerOutput[next_output] * sizeof(float) * (inferRequest.frameIndex);
|
||||
|
||||
ov::Tensor outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.outputs()[0]);
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(outputName));
|
||||
if (!outputs.empty()) {
|
||||
outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(outputName));
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer
|
||||
// happens
|
||||
auto byteSize = numScoresPerFrame * sizeof(float);
|
||||
// locked memory holder should be alive all time while access to its buffer happens
|
||||
auto byteSize = numScoresPerOutput[next_output] * sizeof(float);
|
||||
std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
|
||||
}
|
||||
if (!FLAGS_r.empty()) {
|
||||
/** Compare output data with reference scores **/
|
||||
ov::Tensor outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.outputs()[0]);
|
||||
if (!FLAGS_oname.empty())
|
||||
outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
|
||||
compare_scores(
|
||||
outputBlob.data<float>(),
|
||||
&ptrReferenceScores[inferRequest.frameIndex * numFrameElementsReference *
|
||||
numBytesPerElementReference],
|
||||
&frameError,
|
||||
inferRequest.numFramesThisBatch,
|
||||
numFrameElementsReference);
|
||||
update_score_error(&frameError, &totalError);
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(outputName));
|
||||
|
||||
if (numScoresPerOutput[next_output] / numFrameElementsReference[next_output] ==
|
||||
batchSize) {
|
||||
compare_scores(
|
||||
outputBlob.data<float>(),
|
||||
&vectorPtrReferenceScores[next_output]
|
||||
[inferRequest.frameIndex *
|
||||
numFrameElementsReference[next_output] *
|
||||
numBytesPerElementReference[next_output]],
|
||||
&vectorFrameError[next_output],
|
||||
inferRequest.numFramesThisBatch,
|
||||
numFrameElementsReference[next_output]);
|
||||
update_score_error(&vectorFrameError[next_output],
|
||||
&vectorTotalError[next_output]);
|
||||
} else {
|
||||
throw std::logic_error("Number of output and reference frames does not match.");
|
||||
}
|
||||
}
|
||||
if (FLAGS_pc) {
|
||||
// retrieve new counters
|
||||
@@ -503,90 +530,108 @@ int main(int argc, char* argv[]) {
|
||||
sum_performance_counters(callPerfMap, utterancePerfMap, totalNumberOfRunsOnHw);
|
||||
}
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
}
|
||||
if (frameIndex == numFrames) {
|
||||
inferRequest.frameIndex = -1;
|
||||
continue;
|
||||
}
|
||||
ptrInputBlobs.clear();
|
||||
if (FLAGS_iname.empty()) {
|
||||
for (auto& input : cInputInfo) {
|
||||
ptrInputBlobs.push_back(inferRequest.inferRequest.get_tensor(input));
|
||||
}
|
||||
} else {
|
||||
std::vector<std::string> inputNameBlobs = convert_str_to_vector(FLAGS_iname);
|
||||
for (const auto& input : inputNameBlobs) {
|
||||
ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input);
|
||||
if (!blob) {
|
||||
std::string errMessage("No blob with name : " + input);
|
||||
throw std::logic_error(errMessage);
|
||||
}
|
||||
ptrInputBlobs.push_back(blob);
|
||||
}
|
||||
}
|
||||
|
||||
/** Iterate over all the input blobs **/
|
||||
for (size_t i = 0; i < numInputFiles; ++i) {
|
||||
ov::Tensor minput = ptrInputBlobs[i];
|
||||
if (!minput) {
|
||||
std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) +
|
||||
"] to be inherited from Tensor, " +
|
||||
"but in fact we were not able to cast input to Tensor");
|
||||
throw std::logic_error(errMessage);
|
||||
}
|
||||
memcpy(minput.data<float>(), inputFrame[i], minput.get_byte_size());
|
||||
// Used to infer fewer frames than the batch size
|
||||
if (batchSize != numFramesThisBatch) {
|
||||
memset(minput.data<float>() + numFramesThisBatch * numFrameElementsInput[i],
|
||||
0,
|
||||
(batchSize - numFramesThisBatch) * numFrameElementsInput[i]);
|
||||
}
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
int index = static_cast<int>(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r);
|
||||
/* Starting inference in asynchronous mode*/
|
||||
inferRequest.inferRequest.start_async();
|
||||
inferRequest.frameIndex = index < 0 ? -2 : index;
|
||||
inferRequest.numFramesThisBatch = numFramesThisBatch;
|
||||
frameIndex += numFramesThisBatch;
|
||||
for (size_t j = 0; j < inputFiles.size(); j++) {
|
||||
if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
|
||||
int idx = frameIndex - FLAGS_cw_l;
|
||||
if (idx > 0 && idx < static_cast<int>(numFramesFile)) {
|
||||
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
|
||||
} else if (idx >= static_cast<int>(numFramesFile)) {
|
||||
inputFrame[j] = &ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) *
|
||||
numFrameElementsInput[j] *
|
||||
numFramesThisBatch;
|
||||
} else if (idx <= 0) {
|
||||
inputFrame[j] = &ptrUtterances[j].front();
|
||||
}
|
||||
} else {
|
||||
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
|
||||
}
|
||||
}
|
||||
inferRequestFetched |= true;
|
||||
}
|
||||
/** Inference was finished for current frame **/
|
||||
if (!inferRequestFetched) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
if (frameIndex == numFrames) {
|
||||
inferRequest.frameIndex = -1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
t1 = Time::now();
|
||||
fsec fs = t1 - t0;
|
||||
ms d = std::chrono::duration_cast<ms>(fs);
|
||||
totalTime += d.count();
|
||||
// resetting state between utterances
|
||||
for (auto&& state : inferRequests.begin()->inferRequest.query_state()) {
|
||||
state.reset();
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
ptrInputBlobs.clear();
|
||||
if (FLAGS_iname.empty()) {
|
||||
for (auto& input : cInputInfo) {
|
||||
ptrInputBlobs.push_back(inferRequest.inferRequest.get_tensor(input));
|
||||
}
|
||||
} else {
|
||||
std::vector<std::string> inputNameBlobs = convert_str_to_vector(FLAGS_iname);
|
||||
for (const auto& input : inputNameBlobs) {
|
||||
ov::Tensor blob = inferRequests.begin()->inferRequest.get_tensor(input);
|
||||
if (!blob) {
|
||||
std::string errMessage("No blob with name : " + input);
|
||||
throw std::logic_error(errMessage);
|
||||
}
|
||||
ptrInputBlobs.push_back(blob);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------- Step 6. Process output
|
||||
// -------------------------------------------------------
|
||||
/** Iterate over all the input blobs **/
|
||||
for (size_t i = 0; i < numInputFiles; ++i) {
|
||||
ov::Tensor minput = ptrInputBlobs[i];
|
||||
if (!minput) {
|
||||
std::string errMessage("We expect ptrInputBlobs[" + std::to_string(i) +
|
||||
"] to be inherited from Tensor, " +
|
||||
"but in fact we were not able to cast input to Tensor");
|
||||
throw std::logic_error(errMessage);
|
||||
}
|
||||
memcpy(minput.data<float>(), inputFrame[i], minput.get_byte_size());
|
||||
// Used to infer fewer frames than the batch size
|
||||
if (batchSize != numFramesThisBatch) {
|
||||
memset(minput.data<float>() + numFramesThisBatch * numFrameElementsInput[i],
|
||||
0,
|
||||
(batchSize - numFramesThisBatch) * numFrameElementsInput[i]);
|
||||
}
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
int index = static_cast<int>(frameIndex) - (FLAGS_cw_l + FLAGS_cw_r);
|
||||
/* Starting inference in asynchronous mode*/
|
||||
inferRequest.inferRequest.start_async();
|
||||
inferRequest.frameIndex = index < 0 ? -2 : index;
|
||||
inferRequest.numFramesThisBatch = numFramesThisBatch;
|
||||
frameIndex += numFramesThisBatch;
|
||||
for (size_t j = 0; j < inputFiles.size(); j++) {
|
||||
if (FLAGS_cw_l > 0 || FLAGS_cw_r > 0) {
|
||||
int idx = frameIndex - FLAGS_cw_l;
|
||||
if (idx > 0 && idx < static_cast<int>(numFramesFile)) {
|
||||
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
|
||||
} else if (idx >= static_cast<int>(numFramesFile)) {
|
||||
inputFrame[j] = &ptrUtterances[j].front() + (numFramesFile - 1) * sizeof(float) *
|
||||
numFrameElementsInput[j] *
|
||||
numFramesThisBatch;
|
||||
} else if (idx <= 0) {
|
||||
inputFrame[j] = &ptrUtterances[j].front();
|
||||
}
|
||||
} else {
|
||||
inputFrame[j] += sizeof(float) * numFrameElementsInput[j] * numFramesThisBatch;
|
||||
}
|
||||
}
|
||||
inferRequestFetched |= true;
|
||||
}
|
||||
/** Inference was finished for current frame **/
|
||||
if (!inferRequestFetched) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
t1 = Time::now();
|
||||
fsec fs = t1 - t0;
|
||||
ms d = std::chrono::duration_cast<ms>(fs);
|
||||
totalTime += d.count();
|
||||
// resetting state between utterances
|
||||
for (auto&& state : inferRequests.begin()->inferRequest.query_state()) {
|
||||
state.reset();
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
|
||||
// --------------------------- Step 6. Process output
|
||||
// -------------------------------------------------------
|
||||
|
||||
/** Show performance results **/
|
||||
std::cout << "Utterance " << utteranceIndex << ": " << std::endl;
|
||||
std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" << std::endl;
|
||||
std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" << std::endl;
|
||||
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms\n"
|
||||
<< std::endl;
|
||||
|
||||
if (FLAGS_pc) {
|
||||
// print performance results
|
||||
print_performance_counters(utterancePerfMap,
|
||||
frameIndex,
|
||||
std::cout,
|
||||
getFullDeviceName(core, FLAGS_d),
|
||||
totalNumberOfRunsOnHw,
|
||||
FLAGS_d);
|
||||
}
|
||||
|
||||
for (size_t next_output = 0; next_output < count_file; next_output++) {
|
||||
if (!FLAGS_o.empty()) {
|
||||
auto exOutputScoresFile = fileExt(FLAGS_o);
|
||||
if (exOutputScoresFile == "ark") {
|
||||
@@ -601,33 +646,21 @@ int main(int argc, char* argv[]) {
|
||||
fileOutput->save_file(output_name_files[next_output].c_str(),
|
||||
shouldAppend,
|
||||
uttName,
|
||||
&ptrScores.front(),
|
||||
&vectorPtrScores[next_output].front(),
|
||||
numFramesFile,
|
||||
numScoresPerFrame);
|
||||
}
|
||||
/** Show performance results **/
|
||||
std::cout << "Total time in Infer (HW and SW):\t" << totalTime << " ms" << std::endl;
|
||||
std::cout << "Frames in utterance:\t\t\t" << numFrames << " frames" << std::endl;
|
||||
std::cout << "Average Infer time per frame:\t\t" << totalTime / static_cast<double>(numFrames) << " ms"
|
||||
<< std::endl;
|
||||
if (FLAGS_pc) {
|
||||
// print performance results
|
||||
print_performance_counters(utterancePerfMap,
|
||||
frameIndex,
|
||||
std::cout,
|
||||
getFullDeviceName(core, FLAGS_d),
|
||||
totalNumberOfRunsOnHw,
|
||||
FLAGS_d);
|
||||
numScoresPerOutput[next_output]);
|
||||
}
|
||||
if (!FLAGS_r.empty()) {
|
||||
// print statistical score error
|
||||
print_reference_compare_results(totalError, numFrames, std::cout);
|
||||
std::string outputName =
|
||||
(outputs.size() == 0) ? executableNet.output(0).get_any_name() : output_names[next_output];
|
||||
std::cout << "Output name: " << outputName << std::endl;
|
||||
std::cout << "Number scores per frame: " << numScoresPerOutput[next_output] / batchSize << std::endl
|
||||
<< std::endl;
|
||||
print_reference_compare_results(vectorTotalError[next_output], numFrames, std::cout);
|
||||
}
|
||||
std::cout << "End of Utterance " << utteranceIndex << std::endl << std::endl;
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
}
|
||||
}
|
||||
// -----------------------------------------------------------------------------------------------------
|
||||
} catch (const std::exception& error) {
|
||||
slog::err << error.what() << slog::endl;
|
||||
return 1;
|
||||
|
||||
Reference in New Issue
Block a user