Set proper precision for added output (#9496)

This commit is contained in:
Krzysztof Bruniecki 2022-02-03 16:34:55 +01:00 committed by GitHub
parent 5c9b6915dc
commit 6677079821
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 102 additions and 102 deletions

View File

@ -85,11 +85,35 @@ int main(int argc, char* argv[]) {
slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs;
std::shared_ptr<ov::Model> model;
std::vector<std::string> outputs;
std::vector<size_t> ports;
// --------------------------- Processing custom outputs ---------------------------------------------
if (!FLAGS_oname.empty()) {
std::vector<std::string> output_names = convert_str_to_vector(FLAGS_oname);
for (const auto& output_name : output_names) {
auto pos_layer = output_name.rfind(":");
if (pos_layer == std::string::npos) {
throw std::logic_error("Output " + output_name + " doesn't have a port");
}
outputs.push_back(output_name.substr(0, pos_layer));
try {
ports.push_back(std::stoi(output_name.substr(pos_layer + 1)));
} catch (const std::exception&) {
throw std::logic_error("Ports should have integer type");
}
}
}
// ------------------------------ Preprocessing ------------------------------------------------------
// the preprocessing steps can be done only for loaded network and are not applicable for the imported network
// (already compiled)
if (!FLAGS_m.empty()) {
model = core.read_model(FLAGS_m);
if (!outputs.empty()) {
for (size_t i = 0; i < outputs.size(); i++) {
auto output = model->add_output(outputs[i], ports[i]);
output.set_names({outputs[i] + ":" + std::to_string(ports[i])});
}
}
check_number_of_inputs(model->inputs().size(), numInputFiles);
const ov::Layout tensor_layout{"NC"};
ov::preprocess::PrePostProcessor proc(model);
@ -195,29 +219,6 @@ int main(int argc, char* argv[]) {
genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig));
}
auto t0 = Time::now();
std::vector<std::string> outputs;
if (!FLAGS_oname.empty()) {
std::vector<std::string> output_names = convert_str_to_vector(FLAGS_oname);
std::vector<size_t> ports;
for (const auto& outBlobName : output_names) {
int pos_layer = outBlobName.rfind(":");
if (pos_layer == -1) {
throw std::logic_error(std::string("Output ") + std::string(outBlobName) +
std::string(" doesn't have a port"));
}
outputs.push_back(outBlobName.substr(0, pos_layer));
try {
ports.push_back(std::stoi(outBlobName.substr(pos_layer + 1)));
} catch (const std::exception&) {
throw std::logic_error("Ports should have integer type");
}
}
if (!FLAGS_m.empty()) {
for (size_t i = 0; i < outputs.size(); i++) {
model->add_output(outputs[i], ports[i]);
}
}
}
ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
slog::info << "Loading model to the device " << FLAGS_d << slog::endl;
@ -426,9 +427,10 @@ int main(int argc, char* argv[]) {
ov::Tensor outputBlob =
inferRequest.inferRequest.get_tensor(executableNet.outputs()[0]);
if (!FLAGS_oname.empty())
if (!outputs.empty()) {
outputBlob =
inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
}
// locked memory holder should be alive all time while access to its buffer happens
auto byteSize = numScoresPerFrame * sizeof(float);
std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);

View File

@ -33,23 +33,41 @@ struct GnaDesc {
// gna specific properties
double scale_factor = GNAPluginNS::kScaleFactorDefault;
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
uint32_t num_bytes_per_element = 0;
uint32_t num_elements = 0;
uint32_t allocated_size = 0;
std::vector<void *> ptrs = {}; // ptr per each infer request
// help methods
uint32_t get_required_size() {
return num_elements * num_bytes_per_element;
return num_elements * tensor_precision.size();
}
uint32_t get_allocated_size() {
return allocated_size;
}
void set_precision(InferenceEngine::Precision precision) {
void set_precision(InferenceEngine::Precision::ePrecision precision) {
this->tensor_precision = precision;
this->num_bytes_per_element = precision.size();
}
// helps to get the precision for gna layers, because they use num_bytes instead of precision values
void set_precision(uint32_t num_bytes) {
switch (num_bytes) {
case sizeof(int8_t) : {
set_precision(InferenceEngine::Precision::I8);
break;
}
case sizeof(int16_t) : {
set_precision(InferenceEngine::Precision::I16);
break;
}
case sizeof(int32_t) : {
set_precision(InferenceEngine::Precision::I32);
break;
}
default :
set_precision(InferenceEngine::Precision::UNSPECIFIED);
}
}
InferenceEngine::DataPtr to_ie_data() {
@ -69,7 +87,6 @@ struct InputDesc : GnaDesc {
this->model_layout = inputInfo->getLayout();
this->dims = inputInfo->getTensorDesc().getDims();
this->name = inputInfo->name();
this->num_bytes_per_element = tensor_precision.size();
this->num_elements = InferenceEngine::details::product(dims.begin(), dims.end());
}
@ -92,7 +109,6 @@ struct OutputDesc : GnaDesc {
this->model_layout = outputData->getLayout();
this->dims = outputData->getTensorDesc().getDims();
this->name = outputData->getName();
this->num_bytes_per_element = tensor_precision.size();
this->num_elements = InferenceEngine::details::product(dims.begin(), dims.end());
}
};

View File

@ -2382,9 +2382,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
auto quantized = getInjectedData<QuantizedLayerParams>(prevLayer);
if (quantized) {
if (quantized->lowPrecision) {
inputs_ptr_->at(prevLayer->name).set_precision(Precision::I8);
inputs_ptr_->at(prevLayer->name).set_precision(InferenceEngine::Precision::I8);
} else {
inputs_ptr_->at(prevLayer->name).set_precision(Precision::I16);
inputs_ptr_->at(prevLayer->name).set_precision(InferenceEngine::Precision::I16);
}
}
if (0 == inputs_ptr_->at(prevLayer->name).get_allocated_size()) {

View File

@ -403,7 +403,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
HeaderLatest::RuntimeEndPoint ep;
ep.elements_count = desc.num_elements;
ep.scaleFactor = desc.scale_factor;
ep.element_size = desc.num_bytes_per_element;
ep.element_size = desc.tensor_precision.size();
ep.layout = desc.model_layout;
ep.precision = desc.model_precision;
ep.orientation = desc.orientation;
@ -538,10 +538,9 @@ void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::
input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
input.orientation = ep.orientation;
input.num_elements = ep.elements_count;
input.num_bytes_per_element = ep.element_size;
input.scale_factor = ep.scaleFactor;
input.model_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
input.tensor_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
input.set_precision(ep.element_size);
input.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
input.allocated_size = input.get_required_size();
@ -565,10 +564,9 @@ void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS:
output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
output.orientation = ep.orientation;
output.num_elements = ep.elements_count;
output.num_bytes_per_element = ep.element_size;
output.scale_factor = ep.scaleFactor;
output.set_precision(ep.element_size);
output.model_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
output.tensor_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
output.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
output.allocated_size = output.get_required_size();

View File

@ -194,65 +194,56 @@ void GNAPlugin::ExportScores(void *ptr_dst,
uint32_t num_vector_elements,
uint32_t num_active_elements,
uint32_t num_vector_stride,
uint32_t num_bytes_per_element_input,
uint32_t num_bytes_per_element) {
Precision precision_in,
Precision precision_out) {
if (precision_out != Precision::I32 && precision_out != Precision::FP32) {
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << precision_out.name();
}
// source scores are possibly padded to multiple of 8 and possibly interleaved
// rotate if necessary and only copy actual scores (not padding)
if (orientation == kDnnInterleavedOrientation) {
if (num_bytes_per_element == 2) {
int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
const int16_t *src = reinterpret_cast<const int16_t *>(ptr_src);
for (uint32_t i = 0; i < num_frames; i++) {
for (uint32_t j = 0; j < num_active_elements; j++) {
dst[i * num_vector_elements + j] = src[j * num_group + i];
}
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
dst[i * num_vector_elements + j] = 0;
}
}
} else if (num_bytes_per_element == 4) { // should work for both int and float
int32_t *dst = reinterpret_cast<int32_t *>(ptr_dst);
const int8_t *src = reinterpret_cast<const int8_t*>(ptr_src);
for (uint32_t i = 0; i < num_frames; i++) {
for (uint32_t j = 0; j < num_active_elements; j++) {
auto input_ptr = src + (j * num_group + i) * num_bytes_per_element_input;
auto dst_ptr = dst + (i * num_vector_elements + j);
int32_t *dst = reinterpret_cast<int32_t *>(ptr_dst);
const int8_t *src = reinterpret_cast<const int8_t*>(ptr_src);
for (uint32_t i = 0; i < num_frames; i++) {
for (uint32_t j = 0; j < num_active_elements; j++) {
auto input_ptr = src + (j * num_group + i) * precision_in.size();
auto dst_ptr = dst + (i * num_vector_elements + j);
switch (num_bytes_per_element_input) {
case 1: {
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int8_t*>(input_ptr));
break;
}
case 2 : {
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int16_t*>(input_ptr));
break;
}
case 4 : {
*dst_ptr = *reinterpret_cast<const int32_t *>(input_ptr);
break;
}
default:
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << num_bytes_per_element_input << "bytes";
switch (precision_in) {
case Precision::I8 : {
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int8_t*>(input_ptr));
break;
}
}
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
dst[i * num_vector_elements + j] = 0;
case Precision::I16 : {
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int16_t*>(input_ptr));
break;
}
case Precision::I32 : {
*dst_ptr = *reinterpret_cast<const int32_t *>(input_ptr);
break;
}
default:
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << precision_in.name();
}
}
} else {
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
dst[i * num_vector_elements + j] = 0;
}
}
} else {
if (num_bytes_per_element == 2) {
for (uint32_t i = 0; i < num_frames; i++) {
auto ptr_dst_vec = reinterpret_cast<uint8_t *>(ptr_dst) + i * num_vector_elements * sizeof(int16_t);
auto ptr_src_vec = reinterpret_cast<const uint8_t *>(ptr_src) + i * num_vector_stride * sizeof(int16_t);
memset(ptr_dst_vec, 0, num_vector_elements * sizeof(int16_t));
ie_memcpy(ptr_dst_vec, num_active_elements * sizeof(int16_t),
ptr_src_vec, num_active_elements * sizeof(int16_t));
switch (precision_in) {
case Precision::I8 :
case Precision::I32 : {
for (uint32_t i = 0; i < num_frames; i++) {
void* ptr_dst_vec = reinterpret_cast<uint8_t*>(ptr_dst) + i * num_vector_elements * precision_out.size();
const void* ptr_src_vec = reinterpret_cast<const uint8_t*>(ptr_src) + i * num_vector_stride * precision_in.size();
memset(ptr_dst_vec, 0, num_vector_elements * precision_out.size());
ie_memcpy(ptr_dst_vec, num_active_elements * precision_out.size(),
ptr_src_vec, num_active_elements * precision_in.size());
}
break;
}
} else if (num_bytes_per_element == 4) { // should work for both int and float
if (num_bytes_per_element_input == 2) {
case Precision::I16 : {
for (uint32_t i = 0; i < num_frames; i++) {
auto ptr_dst_vec = reinterpret_cast<int32_t*>(ptr_dst) + i * num_vector_elements;
auto ptr_src_vec = reinterpret_cast<const int16_t*>(ptr_src) + i * num_vector_stride;
@ -260,17 +251,10 @@ void GNAPlugin::ExportScores(void *ptr_dst,
ptr_dst_vec[j] = ptr_src_vec[j];
}
}
} else {
for (uint32_t i = 0; i < num_frames; i++) {
void* ptr_dst_vec = reinterpret_cast<uint8_t*>(ptr_dst) + i * num_vector_elements * sizeof(float);
const void* ptr_src_vec = reinterpret_cast<const uint8_t*>(ptr_src) + i * num_vector_stride * sizeof(float);
memset(ptr_dst_vec, 0, num_vector_elements * sizeof(float));
ie_memcpy(ptr_dst_vec, num_active_elements * sizeof(float),
ptr_src_vec, num_active_elements * sizeof(float));
}
break;
}
} else {
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
default:
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << precision_in.name();
}
}
}
@ -494,7 +478,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN
outputs_.at(portName).ptrs.resize(gnaFlags->num_requests);
outputs_.at(portName).orientation = orientation;
outputs_.at(portName).num_bytes_per_element = numBytesPerElem;
outputs_.at(portName).set_precision(numBytesPerElem);
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : GNAPluginNS::kScaleFactorDefault;
outputs_.at(portName).num_elements = numElem;
@ -1350,7 +1334,7 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
THROW_GNA_EXCEPTION << "Transposed data size (" << transposed_data_size
<< ") do not match output buffer length of " << elementsPerBatch;
}
ConvertTensorFromNCHWToNHWC(outputDesc.num_bytes_per_element,
ConvertTensorFromNCHWToNHWC(outputDesc.tensor_precision.size(),
batchSize,
elementsPerBatch,
reinterpret_cast<uint8_t*>(outputDesc.ptrs[request_idx]),
@ -1366,8 +1350,8 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
elementsPerBatch,
elementsPerBatch,
elementsPerBatch,
outputDesc.num_bytes_per_element,
sizeof(float));
outputDesc.tensor_precision,
outputDesc.model_precision);
if (gnadevice) {
#ifdef PLOT

View File

@ -188,8 +188,8 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
uint32_t num_vector_elements,
uint32_t num_active_elements,
uint32_t num_vector_stride,
uint32_t num_bytes_per_element_input,
uint32_t num_bytes_per_element);
InferenceEngine::Precision precision_in,
InferenceEngine::Precision precision_out);
template <typename T, typename U>
void copyInputData(T *dst,