Files
openvino/inference-engine/tools/vpu/vpu_compile/vpu_tools_common.cpp
Ilya Lavrenov 07f94ea602 Minimized usage of IE_MAIN_SOURCE_DIR (#6422)
* Minimized IE_MAIN_SOURCE_DIR hardcode

* Used COMPILE_OPTIONS instead of COMPILE_FLAGS

* Fixed compilation

* Enabled NGRAPH_DYNAMIC_COMPONENTS_ENABLE by default

* Fixes

* Moved GNA related file

* Fixed backend compilation

* Fixed Windows for ngraph

* Fixed <root>/tests build

* More updates

* Fixed scripts install

* Fixed Windows

* Fixed Windows
2021-06-30 18:15:41 +03:00

300 lines
11 KiB
C++

// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/* on windows min and max already defined that makes using numeric_limits impossible */
#ifdef _WIN32
# define NOMINMAX
#endif
#include <sys/stat.h>
#include <algorithm>
#include <map>
#include <unordered_map>
#include <utility>
#include <vector>
#include <string>
#include <limits>
#include "vpu_tools_common.hpp"
#include <vpu/utils/string.hpp>
#include <samples/os/windows/w_dirent.h>
#include <samples/common.hpp>
#include "precision_utils.h"
InferenceEngine::CNNNetwork readNetwork(const std::string &xmlFileName) {
return InferenceEngine::Core().ReadNetwork(xmlFileName);
}
bool isFP16(InferenceEngine::Precision precision) {
return precision == InferenceEngine::Precision::FP16;
}
bool isFP32(InferenceEngine::Precision precision) {
return precision == InferenceEngine::Precision::FP32;
}
bool isU8(InferenceEngine::Precision precision) {
return precision == InferenceEngine::Precision::U8;
}
bool isFloat(InferenceEngine::Precision precision) {
return isFP16(precision) || isFP32(precision);
}
void setPrecisions(const InferenceEngine::CNNNetwork &network) {
for (auto &&layer : network.getInputsInfo()) {
if (isFP32(layer.second->getPrecision())) {
layer.second->setPrecision(InferenceEngine::Precision::FP16);
}
}
for (auto &&layer : network.getOutputsInfo()) {
if (isFP32(layer.second->getPrecision())) {
layer.second->setPrecision(InferenceEngine::Precision::FP16);
}
}
}
BitMap::BitMap(const std::string &filename) {
BmpHeader header;
BmpInfoHeader infoHeader;
std::ifstream input(filename, std::ios::binary);
if (!input) {
return;
}
input.read(reinterpret_cast<char *>(&header.type), 2);
if (header.type != 'M'*256+'B') {
std::cerr << "[BMP] file is not bmp type\n";
return;
}
input.read(reinterpret_cast<char *>(&header.size), 4);
input.read(reinterpret_cast<char *>(&header.reserved), 4);
input.read(reinterpret_cast<char *>(&header.offset), 4);
input.read(reinterpret_cast<char *>(&infoHeader), sizeof(BmpInfoHeader));
bool rowsReversed = infoHeader.height < 0;
_width = static_cast<std::size_t>(infoHeader.width);
_height = static_cast<std::size_t>(std::abs(infoHeader.height));
if (infoHeader.bits != 24) {
std::cerr << "[BMP] 24bpp only supported. But input has:" << infoHeader.bits << "\n";
return;
}
if (infoHeader.compression != 0) {
std::cerr << "[BMP] compression not supported\n";
}
auto padSize = _width & 3;
char pad[3];
size_t size = _width * _height * 3;
_data.reset(new unsigned char[size], std::default_delete<unsigned char[]>());
input.seekg(header.offset, std::ios::beg);
// reading by rows in invert vertically
for (uint32_t i = 0; i < _height; i++) {
uint32_t storeAt = rowsReversed ? i : (uint32_t)_height - 1 - i;
input.read(reinterpret_cast<char *>(_data.get()) + _width * 3 * storeAt, _width * 3);
input.read(pad, padSize);
}
}
void loadImage(const std::string &imageFilename, InferenceEngine::Blob::Ptr &blob) {
InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc();
if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) {
throw std::invalid_argument("Input must have FP16 precision");
}
BitMap reader(imageFilename);
const auto dims = tensDesc.getDims();
auto numBlobChannels = dims[1];
size_t batch = dims[0];
size_t w = dims[3];
size_t h = dims[2];
size_t img_w = reader.width();
size_t img_h = reader.height();
size_t numImageChannels = reader.size() / (reader.width() * reader.height());
if (numBlobChannels != numImageChannels && numBlobChannels != 1) {
throw std::invalid_argument("Input channels mismatch: image channels " + std::to_string(numImageChannels) +
", network channels " + std::to_string(numBlobChannels) +
", expecting count of image channels are equal to count if network channels"
"or count of network channels are equal to 1");
}
int16_t *blobDataPtr = std::dynamic_pointer_cast<InferenceEngine::TBlob<int16_t>>(blob)->data();
auto nPixels = w * h;
unsigned char *RGB8 = reader.getData().get();
float xscale = 1.0f * img_w / w;
float yscale = 1.0f * img_h / h;
for (std::size_t n = 0; n != batch; n++) {
for (std::size_t i = 0; i < h; ++i) {
int y = static_cast<int>(std::floor((i + 0.5f) * yscale));
for (std::size_t j = 0; j < w; ++j) {
int x = static_cast<int>(std::floor((j + 0.5f) * xscale));
for (std::size_t k = 0; k < numBlobChannels; k++) {
float src = 1.0f * RGB8[(y * img_w + x) * numImageChannels + k];
if (tensDesc.getLayout() == InferenceEngine::NHWC) {
blobDataPtr[n * h * w * numBlobChannels + (i * w + j) * numBlobChannels + k] =
InferenceEngine::PrecisionUtils::f32tof16(src);
} else {
blobDataPtr[n * h * w * numBlobChannels + (i * w + j) + k * nPixels] =
InferenceEngine::PrecisionUtils::f32tof16(src);
}
}
}
}
}
}
void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& perfMap, const std::string report) {
std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> perfVec(perfMap.begin(),
perfMap.end());
std::sort(perfVec.begin(), perfVec.end(),
[=](const std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo> &pair1,
const std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo> &pair2) -> bool {
return pair1.second.execution_index < pair2.second.execution_index;
});
size_t maxLayerName = 0u, maxExecType = 0u;
for (auto &&entry : perfVec) {
maxLayerName = std::max(maxLayerName, entry.first.length());
maxExecType = std::max(maxExecType, std::strlen(entry.second.exec_type));
}
size_t indexWidth = 7, nameWidth = maxLayerName + 5, typeWidth = maxExecType + 5, timeWidth = 10;
size_t totalWidth = indexWidth + nameWidth + typeWidth + timeWidth;
std::cout << std::endl << "Detailed " << report << " Profile" << std::endl;
for (size_t i = 0; i < totalWidth; i++)
std::cout << "=";
std::cout << std::endl;
std::cout << std::setw(static_cast<int>(indexWidth)) << std::left << "Index"
<< std::setw(static_cast<int>(nameWidth)) << std::left << "Name"
<< std::setw(static_cast<int>(typeWidth)) << std::left << "Type"
<< std::setw(static_cast<int>(timeWidth)) << std::right << "Time (ms)"
<< std::endl;
for (size_t i = 0; i < totalWidth; i++)
std::cout << "-";
std::cout << std::endl;
long long totalTime = 0;
for (const auto& p : perfVec) {
const auto& stageName = p.first;
const auto& info = p.second;
if (info.status == InferenceEngine::InferenceEngineProfileInfo::EXECUTED) {
std::cout << std::setw(static_cast<int>(indexWidth)) << std::left << info.execution_index
<< std::setw(static_cast<int>(nameWidth)) << std::left << stageName
<< std::setw(static_cast<int>(typeWidth)) << std::left << info.exec_type
<< std::setw(static_cast<int>(timeWidth)) << std::right << info.realTime_uSec / 1000.0
<< std::endl;
totalTime += info.realTime_uSec;
}
}
for (std::size_t i = 0; i < totalWidth; i++)
std::cout << "-";
std::cout << std::endl;
std::cout << std::setw(static_cast<int>(totalWidth / 2)) << std::right << "Total inference time:"
<< std::setw(static_cast<int>(totalWidth / 2 + 1)) << std::right << totalTime / 1000.0
<< std::endl;
for (std::size_t i = 0; i < totalWidth; i++)
std::cout << "-";
std::cout << std::endl;
}
std::vector<std::string> extractFilesByExtension(const std::string& directory, const std::string& extension) {
return extractFilesByExtension(directory, extension, std::numeric_limits<std::size_t>::max());
}
std::vector<std::string> extractFilesByExtension(const std::string& directory, const std::string& extension,
std::size_t max_size) {
if (max_size == 0) {
return {};
}
std::vector<std::string> files;
DIR* dir = opendir(directory.c_str());
if (!dir) {
throw std::invalid_argument("Can not open " + directory);
}
auto getExtension = [](const std::string& name) {
auto extensionPosition = name.rfind('.', name.size());
return extensionPosition == std::string::npos ? "" : name.substr(extensionPosition + 1, name.size() - 1);
};
dirent* ent = nullptr;
while ((ent = readdir(dir)) && files.size() < max_size) {
std::string file_name = ent->d_name;
if (getExtension(file_name) != extension) {
continue;
}
std::string full_file_name = directory + "/" + file_name;
struct stat st = {};
if (stat(full_file_name.c_str(), &st) != 0) {
continue;
}
bool is_directory = (st.st_mode & S_IFDIR) != 0;
if (is_directory) {
continue;
}
files.emplace_back(full_file_name);
}
closedir(dir);
return files;
}
void loadBinaryTensor(const std::string &binaryFileName, InferenceEngine::Blob::Ptr& blob) {
InferenceEngine::TensorDesc tensDesc = blob->getTensorDesc();
if (tensDesc.getPrecision() != InferenceEngine::Precision::FP16) {
throw std::invalid_argument("Input must have FP16 precision");
}
std::ifstream binaryFile(binaryFileName, std::ios_base::binary | std::ios_base::ate);
if (!binaryFile) {
throw std::invalid_argument("Can not open \"" + binaryFileName + "\"");
}
auto fileSize = static_cast<std::size_t>(binaryFile.tellg());
binaryFile.seekg(0, std::ios_base::beg);
if (!binaryFile.good()) {
throw std::invalid_argument("Can not read \"" + binaryFileName + "\"");
}
auto expected_size = blob->byteSize();
if (fileSize != expected_size) {
throw std::invalid_argument("File \"" + binaryFileName + "\" contains " + std::to_string(fileSize) + " bytes "
"but network expects " + std::to_string(expected_size));
}
/* try to read 32 bits data */
std::int16_t *blobDataPtr = std::dynamic_pointer_cast<InferenceEngine::TBlob<std::int16_t>>(blob)->data();
for (std::size_t i = 0; i < blob->size(); i++) {
float tmp = 0.f;
binaryFile.read(reinterpret_cast<char *>(&tmp), sizeof(float));
blobDataPtr[i] = InferenceEngine::PrecisionUtils::f32tof16(tmp);
}
}