Set proper precision for added output (#9496)
This commit is contained in:
parent
5c9b6915dc
commit
6677079821
@ -85,11 +85,35 @@ int main(int argc, char* argv[]) {
|
||||
slog::info << "Loading model files:" << slog::endl << FLAGS_m << slog::endl;
|
||||
uint32_t batchSize = (FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : (uint32_t)FLAGS_bs;
|
||||
std::shared_ptr<ov::Model> model;
|
||||
std::vector<std::string> outputs;
|
||||
std::vector<size_t> ports;
|
||||
// --------------------------- Processing custom outputs ---------------------------------------------
|
||||
if (!FLAGS_oname.empty()) {
|
||||
std::vector<std::string> output_names = convert_str_to_vector(FLAGS_oname);
|
||||
for (const auto& output_name : output_names) {
|
||||
auto pos_layer = output_name.rfind(":");
|
||||
if (pos_layer == std::string::npos) {
|
||||
throw std::logic_error("Output " + output_name + " doesn't have a port");
|
||||
}
|
||||
outputs.push_back(output_name.substr(0, pos_layer));
|
||||
try {
|
||||
ports.push_back(std::stoi(output_name.substr(pos_layer + 1)));
|
||||
} catch (const std::exception&) {
|
||||
throw std::logic_error("Ports should have integer type");
|
||||
}
|
||||
}
|
||||
}
|
||||
// ------------------------------ Preprocessing ------------------------------------------------------
|
||||
// the preprocessing steps can be done only for loaded network and are not applicable for the imported network
|
||||
// (already compiled)
|
||||
if (!FLAGS_m.empty()) {
|
||||
model = core.read_model(FLAGS_m);
|
||||
if (!outputs.empty()) {
|
||||
for (size_t i = 0; i < outputs.size(); i++) {
|
||||
auto output = model->add_output(outputs[i], ports[i]);
|
||||
output.set_names({outputs[i] + ":" + std::to_string(ports[i])});
|
||||
}
|
||||
}
|
||||
check_number_of_inputs(model->inputs().size(), numInputFiles);
|
||||
const ov::Layout tensor_layout{"NC"};
|
||||
ov::preprocess::PrePostProcessor proc(model);
|
||||
@ -195,29 +219,6 @@ int main(int argc, char* argv[]) {
|
||||
genericPluginConfig.insert(std::begin(gnaPluginConfig), std::end(gnaPluginConfig));
|
||||
}
|
||||
auto t0 = Time::now();
|
||||
std::vector<std::string> outputs;
|
||||
if (!FLAGS_oname.empty()) {
|
||||
std::vector<std::string> output_names = convert_str_to_vector(FLAGS_oname);
|
||||
std::vector<size_t> ports;
|
||||
for (const auto& outBlobName : output_names) {
|
||||
int pos_layer = outBlobName.rfind(":");
|
||||
if (pos_layer == -1) {
|
||||
throw std::logic_error(std::string("Output ") + std::string(outBlobName) +
|
||||
std::string(" doesn't have a port"));
|
||||
}
|
||||
outputs.push_back(outBlobName.substr(0, pos_layer));
|
||||
try {
|
||||
ports.push_back(std::stoi(outBlobName.substr(pos_layer + 1)));
|
||||
} catch (const std::exception&) {
|
||||
throw std::logic_error("Ports should have integer type");
|
||||
}
|
||||
}
|
||||
if (!FLAGS_m.empty()) {
|
||||
for (size_t i = 0; i < outputs.size(); i++) {
|
||||
model->add_output(outputs[i], ports[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
|
||||
slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
|
||||
slog::info << "Loading model to the device " << FLAGS_d << slog::endl;
|
||||
@ -426,9 +427,10 @@ int main(int argc, char* argv[]) {
|
||||
|
||||
ov::Tensor outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.outputs()[0]);
|
||||
if (!FLAGS_oname.empty())
|
||||
if (!outputs.empty()) {
|
||||
outputBlob =
|
||||
inferRequest.inferRequest.get_tensor(executableNet.output(FLAGS_oname));
|
||||
}
|
||||
// locked memory holder should be alive all time while access to its buffer happens
|
||||
auto byteSize = numScoresPerFrame * sizeof(float);
|
||||
std::memcpy(outputFrame, outputBlob.data<float>(), byteSize);
|
||||
|
@ -33,23 +33,41 @@ struct GnaDesc {
|
||||
// gna specific properties
|
||||
double scale_factor = GNAPluginNS::kScaleFactorDefault;
|
||||
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
|
||||
uint32_t num_bytes_per_element = 0;
|
||||
uint32_t num_elements = 0;
|
||||
uint32_t allocated_size = 0;
|
||||
std::vector<void *> ptrs = {}; // ptr per each infer request
|
||||
|
||||
// help methods
|
||||
uint32_t get_required_size() {
|
||||
return num_elements * num_bytes_per_element;
|
||||
return num_elements * tensor_precision.size();
|
||||
}
|
||||
|
||||
uint32_t get_allocated_size() {
|
||||
return allocated_size;
|
||||
}
|
||||
|
||||
void set_precision(InferenceEngine::Precision precision) {
|
||||
void set_precision(InferenceEngine::Precision::ePrecision precision) {
|
||||
this->tensor_precision = precision;
|
||||
this->num_bytes_per_element = precision.size();
|
||||
}
|
||||
|
||||
// helps to get the precision for gna layers, because they use num_bytes instead of precision values
|
||||
void set_precision(uint32_t num_bytes) {
|
||||
switch (num_bytes) {
|
||||
case sizeof(int8_t) : {
|
||||
set_precision(InferenceEngine::Precision::I8);
|
||||
break;
|
||||
}
|
||||
case sizeof(int16_t) : {
|
||||
set_precision(InferenceEngine::Precision::I16);
|
||||
break;
|
||||
}
|
||||
case sizeof(int32_t) : {
|
||||
set_precision(InferenceEngine::Precision::I32);
|
||||
break;
|
||||
}
|
||||
default :
|
||||
set_precision(InferenceEngine::Precision::UNSPECIFIED);
|
||||
}
|
||||
}
|
||||
|
||||
InferenceEngine::DataPtr to_ie_data() {
|
||||
@ -69,7 +87,6 @@ struct InputDesc : GnaDesc {
|
||||
this->model_layout = inputInfo->getLayout();
|
||||
this->dims = inputInfo->getTensorDesc().getDims();
|
||||
this->name = inputInfo->name();
|
||||
this->num_bytes_per_element = tensor_precision.size();
|
||||
this->num_elements = InferenceEngine::details::product(dims.begin(), dims.end());
|
||||
}
|
||||
|
||||
@ -92,7 +109,6 @@ struct OutputDesc : GnaDesc {
|
||||
this->model_layout = outputData->getLayout();
|
||||
this->dims = outputData->getTensorDesc().getDims();
|
||||
this->name = outputData->getName();
|
||||
this->num_bytes_per_element = tensor_precision.size();
|
||||
this->num_elements = InferenceEngine::details::product(dims.begin(), dims.end());
|
||||
}
|
||||
};
|
||||
|
@ -2382,9 +2382,9 @@ GNAPluginNS::ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
|
||||
auto quantized = getInjectedData<QuantizedLayerParams>(prevLayer);
|
||||
if (quantized) {
|
||||
if (quantized->lowPrecision) {
|
||||
inputs_ptr_->at(prevLayer->name).set_precision(Precision::I8);
|
||||
inputs_ptr_->at(prevLayer->name).set_precision(InferenceEngine::Precision::I8);
|
||||
} else {
|
||||
inputs_ptr_->at(prevLayer->name).set_precision(Precision::I16);
|
||||
inputs_ptr_->at(prevLayer->name).set_precision(InferenceEngine::Precision::I16);
|
||||
}
|
||||
}
|
||||
if (0 == inputs_ptr_->at(prevLayer->name).get_allocated_size()) {
|
||||
|
@ -403,7 +403,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
|
||||
HeaderLatest::RuntimeEndPoint ep;
|
||||
ep.elements_count = desc.num_elements;
|
||||
ep.scaleFactor = desc.scale_factor;
|
||||
ep.element_size = desc.num_bytes_per_element;
|
||||
ep.element_size = desc.tensor_precision.size();
|
||||
ep.layout = desc.model_layout;
|
||||
ep.precision = desc.model_precision;
|
||||
ep.orientation = desc.orientation;
|
||||
@ -538,10 +538,9 @@ void GNAModelSerial::ImportInputs(std::istream &is, void* basePtr, GNAPluginNS::
|
||||
input.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
|
||||
input.orientation = ep.orientation;
|
||||
input.num_elements = ep.elements_count;
|
||||
input.num_bytes_per_element = ep.element_size;
|
||||
input.scale_factor = ep.scaleFactor;
|
||||
input.model_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
|
||||
input.tensor_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
|
||||
input.set_precision(ep.element_size);
|
||||
input.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
|
||||
input.allocated_size = input.get_required_size();
|
||||
|
||||
@ -565,10 +564,9 @@ void GNAModelSerial::ImportOutputs(std::istream &is, void* basePtr, GNAPluginNS:
|
||||
output.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + ep.descriptor_offset));
|
||||
output.orientation = ep.orientation;
|
||||
output.num_elements = ep.elements_count;
|
||||
output.num_bytes_per_element = ep.element_size;
|
||||
output.scale_factor = ep.scaleFactor;
|
||||
output.set_precision(ep.element_size);
|
||||
output.model_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
|
||||
output.tensor_precision = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(ep.precision));
|
||||
output.model_layout = static_cast<InferenceEngine::Layout>(ep.layout);
|
||||
output.allocated_size = output.get_required_size();
|
||||
|
||||
|
@ -194,65 +194,56 @@ void GNAPlugin::ExportScores(void *ptr_dst,
|
||||
uint32_t num_vector_elements,
|
||||
uint32_t num_active_elements,
|
||||
uint32_t num_vector_stride,
|
||||
uint32_t num_bytes_per_element_input,
|
||||
uint32_t num_bytes_per_element) {
|
||||
Precision precision_in,
|
||||
Precision precision_out) {
|
||||
if (precision_out != Precision::I32 && precision_out != Precision::FP32) {
|
||||
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << precision_out.name();
|
||||
}
|
||||
// source scores are possibly padded to multiple of 8 and possibly interleaved
|
||||
// rotate if necessary and only copy actual scores (not padding)
|
||||
if (orientation == kDnnInterleavedOrientation) {
|
||||
if (num_bytes_per_element == 2) {
|
||||
int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
|
||||
const int16_t *src = reinterpret_cast<const int16_t *>(ptr_src);
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
for (uint32_t j = 0; j < num_active_elements; j++) {
|
||||
dst[i * num_vector_elements + j] = src[j * num_group + i];
|
||||
}
|
||||
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
|
||||
dst[i * num_vector_elements + j] = 0;
|
||||
}
|
||||
}
|
||||
} else if (num_bytes_per_element == 4) { // should work for both int and float
|
||||
int32_t *dst = reinterpret_cast<int32_t *>(ptr_dst);
|
||||
const int8_t *src = reinterpret_cast<const int8_t*>(ptr_src);
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
for (uint32_t j = 0; j < num_active_elements; j++) {
|
||||
auto input_ptr = src + (j * num_group + i) * num_bytes_per_element_input;
|
||||
auto dst_ptr = dst + (i * num_vector_elements + j);
|
||||
int32_t *dst = reinterpret_cast<int32_t *>(ptr_dst);
|
||||
const int8_t *src = reinterpret_cast<const int8_t*>(ptr_src);
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
for (uint32_t j = 0; j < num_active_elements; j++) {
|
||||
auto input_ptr = src + (j * num_group + i) * precision_in.size();
|
||||
auto dst_ptr = dst + (i * num_vector_elements + j);
|
||||
|
||||
switch (num_bytes_per_element_input) {
|
||||
case 1: {
|
||||
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int8_t*>(input_ptr));
|
||||
break;
|
||||
}
|
||||
case 2 : {
|
||||
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int16_t*>(input_ptr));
|
||||
break;
|
||||
}
|
||||
case 4 : {
|
||||
*dst_ptr = *reinterpret_cast<const int32_t *>(input_ptr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << num_bytes_per_element_input << "bytes";
|
||||
switch (precision_in) {
|
||||
case Precision::I8 : {
|
||||
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int8_t*>(input_ptr));
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
|
||||
dst[i * num_vector_elements + j] = 0;
|
||||
case Precision::I16 : {
|
||||
*dst_ptr = static_cast<int32_t>(*reinterpret_cast<const int16_t*>(input_ptr));
|
||||
break;
|
||||
}
|
||||
case Precision::I32 : {
|
||||
*dst_ptr = *reinterpret_cast<const int32_t *>(input_ptr);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << precision_in.name();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
|
||||
for (uint32_t j = num_active_elements; j < num_vector_elements; j++) {
|
||||
dst[i * num_vector_elements + j] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (num_bytes_per_element == 2) {
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
auto ptr_dst_vec = reinterpret_cast<uint8_t *>(ptr_dst) + i * num_vector_elements * sizeof(int16_t);
|
||||
auto ptr_src_vec = reinterpret_cast<const uint8_t *>(ptr_src) + i * num_vector_stride * sizeof(int16_t);
|
||||
memset(ptr_dst_vec, 0, num_vector_elements * sizeof(int16_t));
|
||||
ie_memcpy(ptr_dst_vec, num_active_elements * sizeof(int16_t),
|
||||
ptr_src_vec, num_active_elements * sizeof(int16_t));
|
||||
switch (precision_in) {
|
||||
case Precision::I8 :
|
||||
case Precision::I32 : {
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
void* ptr_dst_vec = reinterpret_cast<uint8_t*>(ptr_dst) + i * num_vector_elements * precision_out.size();
|
||||
const void* ptr_src_vec = reinterpret_cast<const uint8_t*>(ptr_src) + i * num_vector_stride * precision_in.size();
|
||||
memset(ptr_dst_vec, 0, num_vector_elements * precision_out.size());
|
||||
ie_memcpy(ptr_dst_vec, num_active_elements * precision_out.size(),
|
||||
ptr_src_vec, num_active_elements * precision_in.size());
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else if (num_bytes_per_element == 4) { // should work for both int and float
|
||||
if (num_bytes_per_element_input == 2) {
|
||||
case Precision::I16 : {
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
auto ptr_dst_vec = reinterpret_cast<int32_t*>(ptr_dst) + i * num_vector_elements;
|
||||
auto ptr_src_vec = reinterpret_cast<const int16_t*>(ptr_src) + i * num_vector_stride;
|
||||
@ -260,17 +251,10 @@ void GNAPlugin::ExportScores(void *ptr_dst,
|
||||
ptr_dst_vec[j] = ptr_src_vec[j];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < num_frames; i++) {
|
||||
void* ptr_dst_vec = reinterpret_cast<uint8_t*>(ptr_dst) + i * num_vector_elements * sizeof(float);
|
||||
const void* ptr_src_vec = reinterpret_cast<const uint8_t*>(ptr_src) + i * num_vector_stride * sizeof(float);
|
||||
memset(ptr_dst_vec, 0, num_vector_elements * sizeof(float));
|
||||
ie_memcpy(ptr_dst_vec, num_active_elements * sizeof(float),
|
||||
ptr_src_vec, num_active_elements * sizeof(float));
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
THROW_GNA_EXCEPTION << "Unsupported target precision for infer : " << num_bytes_per_element << "bytes";
|
||||
default:
|
||||
THROW_GNA_EXCEPTION << "Unsupported output layer precision: " << precision_in.name();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -494,7 +478,7 @@ bool GNAPlugin::TryToInitOutput(const std::string &portName, InferenceEngine::CN
|
||||
|
||||
outputs_.at(portName).ptrs.resize(gnaFlags->num_requests);
|
||||
outputs_.at(portName).orientation = orientation;
|
||||
outputs_.at(portName).num_bytes_per_element = numBytesPerElem;
|
||||
outputs_.at(portName).set_precision(numBytesPerElem);
|
||||
outputs_.at(portName).scale_factor = quantized != nullptr ? quantized->_dst_quant.GetScale() : GNAPluginNS::kScaleFactorDefault;
|
||||
outputs_.at(portName).num_elements = numElem;
|
||||
|
||||
@ -1350,7 +1334,7 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
|
||||
THROW_GNA_EXCEPTION << "Transposed data size (" << transposed_data_size
|
||||
<< ") do not match output buffer length of " << elementsPerBatch;
|
||||
}
|
||||
ConvertTensorFromNCHWToNHWC(outputDesc.num_bytes_per_element,
|
||||
ConvertTensorFromNCHWToNHWC(outputDesc.tensor_precision.size(),
|
||||
batchSize,
|
||||
elementsPerBatch,
|
||||
reinterpret_cast<uint8_t*>(outputDesc.ptrs[request_idx]),
|
||||
@ -1366,8 +1350,8 @@ GnaWaitStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
|
||||
elementsPerBatch,
|
||||
elementsPerBatch,
|
||||
elementsPerBatch,
|
||||
outputDesc.num_bytes_per_element,
|
||||
sizeof(float));
|
||||
outputDesc.tensor_precision,
|
||||
outputDesc.model_precision);
|
||||
|
||||
if (gnadevice) {
|
||||
#ifdef PLOT
|
||||
|
@ -188,8 +188,8 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
|
||||
uint32_t num_vector_elements,
|
||||
uint32_t num_active_elements,
|
||||
uint32_t num_vector_stride,
|
||||
uint32_t num_bytes_per_element_input,
|
||||
uint32_t num_bytes_per_element);
|
||||
InferenceEngine::Precision precision_in,
|
||||
InferenceEngine::Precision precision_out);
|
||||
|
||||
template <typename T, typename U>
|
||||
void copyInputData(T *dst,
|
||||
|
Loading…
Reference in New Issue
Block a user