[GNA] Fixed export/import functionality (#5963)

* Rebase master

* [GNA] Fixed export/import functionality

* Extended import log

* Added logs

* Fixed importing issue for the old models

* Revert "Added logs"

This reverts commit 39a3882d56.

* Revert "Extended import log"

This reverts commit 59eb9d6fba.

* Reverted precision import

* Extended tests

* Enabled skipped tests

* Included gna2-common-api header

* Replaced included header

* Centos7 build fix
This commit is contained in:
Mikhail Ryzhov
2021-06-09 20:39:05 +03:00
committed by GitHub
parent 3bedd051dc
commit 1a6392eb53
8 changed files with 301 additions and 35 deletions

View File

@@ -17,6 +17,7 @@
#include <mm_malloc.h>
#include <serial/headers/2dot2/gna_model_header.hpp>
#include <serial/headers/2dot5/gna_model_header.hpp>
#include <serial/headers/2dot6/gna_model_header.hpp>
#endif
@@ -133,10 +134,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
}
case 5:
case 6:
case 7:
readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
break;
default:
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 4 and is: " << header.version.minor;
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << header.version.minor;
}
break;
default:
@@ -154,6 +156,40 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
return header;
}
GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
is.exceptions(std::istream::failbit);
HeaderLatest::RuntimeEndPoint endPoint;
switch (modelHeader.version.major) {
case 2:
switch (modelHeader.version.minor) {
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
{
Header2dot6::RuntimeEndPoint tempEndPoint2dot6;
readBits(tempEndPoint2dot6, is);
endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, modelHeader.nGroup);
break;
}
case 7:
readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is);
break;
default:
THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << modelHeader.version.minor;
}
break;
default:
THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: "
<< modelHeader.version.major << " is not implemented";
}
return endPoint;
}
#define offsetFromBase(field)\
getOffsetFromBase(field, #field)
@@ -324,18 +360,6 @@ void GNAModelSerial::Import(void *basePointer,
is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
}
uint32_t guessGrouping(Gna2Model const& model) {
if (model.NumberOfOperations == 0 ||
model.Operations == nullptr ||
model.Operations[0].Operands == nullptr ||
model.Operations[0].NumberOfOperands == 0 ||
model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
THROW_GNA_EXCEPTION << "Can not guess grouping";
}
return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
}
void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
os.exceptions(std::ostream::failbit);
@@ -366,6 +390,9 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
out.scaleFactor = ep.scaleFactor;
out.element_size = ep.element_size;
out.shape = ep.shape;
out.layout = ep.layout;
out.precision = ep.precision;
out.orientation = ep.orientation;
return out;
};
@@ -381,7 +408,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
header.headerSize = sizeof(HeaderLatest::ModelHeader);
header.gnaMemSize = gnaGraphSize;
header.layersCount = layers.size();
header.nGroup = guessGrouping(*gna2Model);
header.nGroup = 1; // just to support the old models
header.nInputs = inputs.size();
header.nOutputs = outputs.size();
header.nTransposeInputs = transposeInputsInfo.size();
@@ -796,13 +823,22 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
std::size_t outputIndex = 0;
for (auto const &output : outputsDataMap) {
auto outputName = output.first;
auto inputDims = output.second->getTensorDesc().getDims();
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
auto outputDims = output.second->getTensorDesc().getDims();
HeaderLatest::RuntimeEndPoint::Shape outputShape;
outputShape.NumberOfDimensions = outputDims.size();
for (size_t i=0; i < outputShape.NumberOfDimensions; ++i) {
outputShape.Dimensions[i] = static_cast<uint32_t>(outputDims[i]);
}
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
InferenceEngine::Layout outputLayout = output.second->getLayout();
InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
outputsDesc[outputIndex].ptrs[0],
outputsDesc[outputIndex].num_bytes_per_element,
elementsCount,
outputShape,
outputLayout,
outputPrecision,
outputsDesc[outputIndex].orientation);
endPoints.push_back(endPoint);
outputIndex++;
@@ -818,18 +854,26 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
for (auto const& input : inputsDataMap) {
auto inputName = input.first;
auto inputDims = input.second->getTensorDesc().getDims();
HeaderLatest::RuntimeEndPoint::Shape inputShape;
inputShape.NumberOfDimensions = inputDims.size();
for (size_t i=0; i < inputShape.NumberOfDimensions; ++i) {
inputShape.Dimensions[i] = static_cast<uint32_t>(inputDims[i]);
}
double scaleFactor = inputDesc->getScaleFactor(inputIndex);
std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
IE_ASSERT(descriptor_ptr.size() > 0);
uint32_t element_size = 2u;
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
InferenceEngine::Layout inputLayout = input.second->getLayout();
InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
descriptor_ptr[0],
element_size,
elementsCount,
inputShape,
inputLayout,
inputPrecision,
orientation);
endPoints.push_back(endPoint);
inputIndex++;
@@ -846,20 +890,24 @@ void GNAModelSerial::ImportInputs(std::istream &is,
for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
HeaderLatest::RuntimeEndPoint input;
is.read(reinterpret_cast<char *>(&input), sizeof(input));
HeaderLatest::RuntimeEndPoint input = ReadEndPoint(is);
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
inputsDesc->orientation_in[name] = input.orientation;
inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;
auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
auto inputDims = InferenceEngine::SizeVector();
for (auto i = 0; i < input.shape.NumberOfDimensions; ++i) {
inputDims.push_back(input.shape.Dimensions[i]);
}
InferenceEngine::Layout inputLayout = static_cast<InferenceEngine::Layout>(input.layout);
InferenceEngine::Precision inputPresicion = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(input.precision));
dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
InferenceEngine::TensorDesc(
InferenceEngine::Precision::FP32,
inputPresicion,
inputDims,
InferenceEngine::Layout::NC)));
inputLayout)));
inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
}
}
@@ -875,8 +923,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
for (uint32_t outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
? outputNames.at(outputIndex) : std::string("output" + std::to_string(outputIndex));
HeaderLatest::RuntimeEndPoint output;
is.read(reinterpret_cast<char *>(&output), sizeof(output));
HeaderLatest::RuntimeEndPoint output = ReadEndPoint(is);
OutputDesc description;
description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
description.orientation = kDnnInterleavedOrientation;
@@ -884,12 +932,17 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
description.num_bytes_per_element = output.element_size;
description.scale_factor = output.scaleFactor;
auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
auto outputDims = InferenceEngine::SizeVector();
for (auto i = 0; i < output.shape.NumberOfDimensions; ++i) {
outputDims.push_back(output.shape.Dimensions[i]);
}
InferenceEngine::Layout outputLayout = static_cast<InferenceEngine::Layout>(output.layout);
InferenceEngine::Precision outputPresicion = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(output.precision));
dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
InferenceEngine::TensorDesc(
InferenceEngine::Precision::FP32,
outputPresicion,
outputDims,
InferenceEngine::Layout::NC));
outputLayout));
desc.at(outputIndex) = description;
}
}

View File

@@ -138,6 +138,8 @@ private:
*/
static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);
GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is);
/**
* @brief Import model from FS into preallocated buffer,
* buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free

View File

@@ -0,0 +1,197 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <cstdint>
#include <map>
#include "backend/dnn_types.h"
#include "serial/headers/2dot4/gna_model_header.hpp"
#include "serial/headers/2dot6/gna_model_header.hpp"
#include "serial/headers/latest/gna_model_header.hpp"
#include "gna_data_types.hpp"
#pragma pack(push, 1)
namespace GNAPluginNS {
namespace Header2dot7 {
/**
Maximal number of supported shape dimensions.
*/
#define GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS 8
/**
* @brief Header version 2.7
*/
struct ModelHeader {
/**
*@brief MagicNumber GNAM in ascii table, equals to hex 0x474e414d
*/
char gnam[4] = {};
/**
* @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
* usually it is an indicator of working with version of model different that is current export function produce
*/
uint32_t headerSize = 0u;
struct Version {
/**
* @details Version of format Major unsigned int, ex: 0x0001
* every change in the header or in the layers definition should be reflected in version change
* for backward compatibility new parsers can read old versions of model with certain restrictions
*/
uint16_t major = 2u;
/**
* @details Version of Format Minor unsigned int, corresponding to build revision for example
* changes in minor version are not affected layout of model
*/
uint32_t minor = 7u;
} version;
/**
* @brief Memory required to be allocated using GNAAlloc()
*/
uint64_t gnaMemSize = 0ull;
/**
* @brief Number of GNA Layers
*/
uint64_t layersCount = 0ull;
/**
* @brief Grouping level
* This is depricted field and used for old models only (<=2.6)
*/
uint32_t nGroup = 0u;
/**
* Convolution related setting - they are affecting input transformation
*/
uint32_t nRotateRows = 0u;
uint32_t nRotateColumns = 0u;
bool doRotateInput = false;
uint32_t nInputs = 0u;
uint32_t nOutputs = 0u;
/**
* Convolution related setting - they are affecting output transformation
*/
uint32_t nRotateOutputRows = 0u;
uint32_t nRotateOutputColumns = 0u;
bool doRotateOutput = false;
uint32_t nTransposeInputs = 0u;
uint32_t nTransposeOutputs = 0u;
/**
* Reserved Data might be here
*/
ModelHeader() = default;
ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
gnaMemSize = old.gnaMemSize;
layersCount = old.layersCount;
nGroup = old.nGroup;
nRotateRows = old.nRotateRows;
nRotateColumns = old.nRotateColumns;
nInputs = old.nInputs;
nOutputs = old.nOutputs;
version.minor = old.version.minor;
}
ModelHeader(GNAPluginNS::Header2dot4::ModelHeader const &old) {
gnaMemSize = old.gnaMemSize;
layersCount = old.layersCount;
nGroup = old.nGroup;
nRotateRows = old.nRotateRows;
nRotateColumns = old.nRotateColumns;
nInputs = old.nInputs;
nOutputs = old.nOutputs;
nRotateOutputRows = old.nRotateOutputRows;
nRotateOutputColumns = old.nRotateOutputColumns;
doRotateOutput = old.doRotateOutput;
version.minor = old.version.minor;
}
};
#pragma pack(pop)
/*
* In runtime endpoint mostly same as in serial version, except of descriptor field
*/
struct RuntimeEndPoint {
/**
* if scale factor is different then pased into infer , network might need to be requantized
*/
float scaleFactor = 0;
/**
* Pointer descriptor
*/
void* descriptor_ptr = nullptr;
/**
* Endpoint resolution in bytes.
*/
uint32_t element_size = 0;
/**
* Number of elements
*/
uint32_t elements_count = 0;
/**
* Offset in bytes of pointer descriptor
*/
uint64_t descriptor_offset = 0ull;
/**
Shape specifying dimension values.
*/
struct Shape {
/**
Number of dimensions or rank or order.
*/
uint32_t NumberOfDimensions = 0;
/**
array specifying value of each dimension.
Set all zeros for scalars.
*/
uint32_t Dimensions[GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS] = {0};
} shape;
/**
* Blob layout
*/
uint8_t layout = InferenceEngine::Layout::NC;
/**
* Blob precision
*/
uint8_t precision = InferenceEngine::Precision::FP32;
intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
RuntimeEndPoint() = default;
RuntimeEndPoint(const GNAPluginNS::Header2dot6::RuntimeEndPoint &old, uint32_t ngroup) {
scaleFactor = old.scaleFactor;
descriptor_ptr = old.descriptor_ptr;
element_size = old.element_size;
elements_count = old.elements_count;
orientation = old.orientation;
layout = InferenceEngine::Layout::NC;
precision = InferenceEngine::Precision::FP32;
descriptor_offset = old.descriptor_offset;
InferenceEngine::SizeVector dims = {ngroup, elements_count / ngroup};
shape.NumberOfDimensions = static_cast<uint32_t>(dims.size());
for (auto i = 0; i < dims.size(); i++) {
shape.Dimensions[i] = dims[i];
}
}
RuntimeEndPoint(double scaleFactor,
void* descriptor_ptr,
uint32_t element_size,
uint32_t elements_count,
Shape shape,
uint8_t layout,
uint8_t precision,
intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
descriptor_ptr(descriptor_ptr),
element_size(element_size),
elements_count(elements_count),
shape(shape),
layout(layout),
precision(precision),
orientation(orientation) { }
};
} // namespace Header2dot7
} // namespace GNAPluginNS

View File

@@ -4,11 +4,11 @@
#pragma once
#include "serial/headers/2dot6/gna_model_header.hpp"
#include "serial/headers/2dot7/gna_model_header.hpp"
namespace GNAPluginNS {
namespace HeaderLatest {
using ModelHeader = GNAPluginNS::Header2dot6::ModelHeader;
using RuntimeEndPoint = GNAPluginNS::Header2dot6::RuntimeEndPoint;
using ModelHeader = GNAPluginNS::Header2dot7::ModelHeader;
using RuntimeEndPoint = GNAPluginNS::Header2dot7::RuntimeEndPoint;
}
}

View File

@@ -60,8 +60,6 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
// TODO: Issue 51528
R"(.*CachingSupport.*_(u8|i16)_.*)",
// TODO: Issue 51527
R"(.*CachingSupport.*_batch2_.*)",
// TODO: Issue 51525
R"(.*CachingSupport.*KSOFunction.*)",
// TODO: Issue 57363 (Param -> Result subgraphs)

View File

@@ -69,13 +69,16 @@ void ImportNetworkTestBase::Run() {
for (const auto& next_input : importedExecNetwork.GetInputsInfo()) {
ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]);
Compare(next_input.second->getTensorDesc(), compiledExecNetwork.GetInputsInfo()[next_input.first]->getTensorDesc());
}
for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) {
ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]);
}
auto importedOutputs = GetOutputs();
ASSERT_EQ(actualOutputs.size(), importedOutputs.size());
for (size_t i = 0; i < actualOutputs.size(); i++) {
Compare(actualOutputs[i]->getTensorDesc(), importedOutputs[i]->getTensorDesc());
Compare(actualOutputs[i], importedOutputs[i]);
}
}

View File

@@ -72,6 +72,8 @@ public:
virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual);
virtual void Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc);
virtual void SetRefMode(RefMode mode);
std::shared_ptr<ngraph::Function> GetFunction();

View File

@@ -274,6 +274,17 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const
}
}
void LayerTestsCommon::Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc) {
auto expectedDims = actualDesc.getDims();
auto actualDims = expectedDesc.getDims();
ASSERT_EQ(actualDims.size(), expectedDims.size());
for (size_t j = 0; j < actualDims.size(); ++j) {
ASSERT_EQ(actualDims.at(j), expectedDims.at(j));
}
ASSERT_EQ(actualDesc.getLayout(), expectedDesc.getLayout());
ASSERT_EQ(actualDesc.getPrecision(), expectedDesc.getPrecision());
}
void LayerTestsCommon::ConfigureNetwork() {
for (const auto &in : cnnNetwork.getInputsInfo()) {
if (inLayout != InferenceEngine::Layout::ANY) {