[GNA] Fixed export/import functionality (#5963)

* Rebase master * [GNA] Fixed export/import functionality * Extended import log * Added logs * Fixed importing issue for the old models * Revert "Added logs" This reverts commit 39a3882d56. * Revert "Extended import log" This reverts commit 59eb9d6fba. * Reverted precision import * Extended tests * Enabled skipped tests * Included gna2-common-api header * Replaced included header * Centos7 build fix
2021-06-09 20:39:05 +03:00
parent 3bedd051dc
commit 1a6392eb53
8 changed files with 301 additions and 35 deletions
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -17,6 +17,7 @@
 #include <mm_malloc.h>
 #include <serial/headers/2dot2/gna_model_header.hpp>
 #include <serial/headers/2dot5/gna_model_header.hpp>
+#include <serial/headers/2dot6/gna_model_header.hpp>

 #endif

@@ -133,10 +134,11 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
                }
                case 5:
                case 6:
+                case 7:
                    readNBytes(&header, sizeof(HeaderLatest::ModelHeader), is);
                    break;
                default:
-                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 4 and is: " << header.version.minor;
+                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << header.version.minor;
            }
            break;
        default:
@@ -154,6 +156,40 @@ GNAPluginNS::HeaderLatest::ModelHeader GNAModelSerial::ReadHeader(std::istream &
    return header;
 }

+GNAPluginNS::HeaderLatest::RuntimeEndPoint GNAModelSerial::ReadEndPoint(std::istream &is) {
+    is.exceptions(std::istream::failbit);
+
+    HeaderLatest::RuntimeEndPoint endPoint;
+    switch (modelHeader.version.major) {
+        case 2:
+            switch (modelHeader.version.minor) {
+                case 1:
+                case 2:
+                case 3:
+                case 4:
+                case 5:
+                case 6:
+                {
+                    Header2dot6::RuntimeEndPoint tempEndPoint2dot6;
+                    readBits(tempEndPoint2dot6, is);
+                    endPoint = HeaderLatest::RuntimeEndPoint(tempEndPoint2dot6, modelHeader.nGroup);
+                    break;
+                }
+                case 7:
+                    readNBytes(&endPoint, sizeof(HeaderLatest::RuntimeEndPoint), is);
+                    break;
+                default:
+                    THROW_GNA_EXCEPTION << "Imported file unsupported. minor version should have values in range 1 to 7 and is: " << modelHeader.version.minor;
+            }
+            break;
+        default:
+            THROW_GNA_EXCEPTION << "Imported file unsupported. Import for files with major version equal to: "
+            << modelHeader.version.major << " is not implemented";
+    }
+
+    return endPoint;
+}
+
 #define offsetFromBase(field)\
 getOffsetFromBase(field, #field)

@@ -324,18 +360,6 @@ void GNAModelSerial::Import(void *basePointer,
    is.read(reinterpret_cast<char*>(basePointer), gnaGraphSize);
 }

-
-uint32_t guessGrouping(Gna2Model const& model) {
-    if (model.NumberOfOperations == 0 ||
-        model.Operations == nullptr ||
-        model.Operations[0].Operands == nullptr ||
-        model.Operations[0].NumberOfOperands == 0 ||
-        model.Operations[0].Operands[0]->Shape.NumberOfDimensions < 2) {
-        THROW_GNA_EXCEPTION << "Can not guess grouping";
-    }
-    return (std::min)(model.Operations[0].Operands[0]->Shape.Dimensions[0], model.Operations[0].Operands[0]->Shape.Dimensions[1]);
-}
-
 void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostream & os) const {
    os.exceptions(std::ostream::failbit);

@@ -366,6 +390,9 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
        out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
        out.scaleFactor = ep.scaleFactor;
        out.element_size = ep.element_size;
+        out.shape = ep.shape;
+        out.layout = ep.layout;
+        out.precision = ep.precision;
        out.orientation = ep.orientation;
        return out;
    };
@@ -381,7 +408,7 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
    header.headerSize = sizeof(HeaderLatest::ModelHeader);
    header.gnaMemSize = gnaGraphSize;
    header.layersCount = layers.size();
-    header.nGroup = guessGrouping(*gna2Model);
+    header.nGroup = 1; // just to support the old models
    header.nInputs = inputs.size();
    header.nOutputs = outputs.size();
    header.nTransposeInputs = transposeInputsInfo.size();
@@ -796,13 +823,22 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeOutputs(cons
    std::size_t outputIndex = 0;
    for (auto const &output : outputsDataMap) {
        auto outputName = output.first;
-        auto inputDims = output.second->getTensorDesc().getDims();
-        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
-
+        auto outputDims = output.second->getTensorDesc().getDims();
+        HeaderLatest::RuntimeEndPoint::Shape outputShape;
+        outputShape.NumberOfDimensions = outputDims.size();
+        for (size_t i=0; i < outputShape.NumberOfDimensions; ++i) {
+            outputShape.Dimensions[i] = static_cast<uint32_t>(outputDims[i]);
+        }
+        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(outputDims.begin(), outputDims.end()));
+        InferenceEngine::Layout outputLayout = output.second->getLayout();
+        InferenceEngine::Precision::ePrecision outputPrecision = InferenceEngine::Precision::FP32;
        HeaderLatest::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
                                                 outputsDesc[outputIndex].ptrs[0],
                                                 outputsDesc[outputIndex].num_bytes_per_element,
                                                 elementsCount,
+                                                 outputShape,
+                                                 outputLayout,
+                                                 outputPrecision,
                                                 outputsDesc[outputIndex].orientation);
        endPoints.push_back(endPoint);
        outputIndex++;
@@ -818,18 +854,26 @@ std::vector<HeaderLatest::RuntimeEndPoint> GNAModelSerial::serializeInputs(const
    for (auto const& input : inputsDataMap) {
        auto inputName = input.first;
        auto inputDims = input.second->getTensorDesc().getDims();
-
+        HeaderLatest::RuntimeEndPoint::Shape inputShape;
+        inputShape.NumberOfDimensions = inputDims.size();
+        for (size_t i=0; i < inputShape.NumberOfDimensions; ++i) {
+            inputShape.Dimensions[i] = static_cast<uint32_t>(inputDims[i]);
+        }
        double scaleFactor = inputDesc->getScaleFactor(inputIndex);
        std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
        IE_ASSERT(descriptor_ptr.size() > 0);
        uint32_t element_size = 2u;
        uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
        intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
-
+        InferenceEngine::Layout inputLayout = input.second->getLayout();
+        InferenceEngine::Precision::ePrecision inputPrecision = InferenceEngine::Precision::FP32;
        HeaderLatest::RuntimeEndPoint endPoint(scaleFactor,
                                                 descriptor_ptr[0],
                                                 element_size,
                                                 elementsCount,
+                                                 inputShape,
+                                                 inputLayout,
+                                                 inputPrecision,
                                                 orientation);
        endPoints.push_back(endPoint);
        inputIndex++;
@@ -846,20 +890,24 @@ void GNAModelSerial::ImportInputs(std::istream &is,
    for (uint32_t inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
        const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                ? inputNames.at(inputIndex) : std::string("input" + std::to_string(inputIndex));
-        HeaderLatest::RuntimeEndPoint input;
-        is.read(reinterpret_cast<char *>(&input), sizeof(input));
+
+        HeaderLatest::RuntimeEndPoint input = ReadEndPoint(is);
        inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
        inputsDesc->orientation_in[name] = input.orientation;
        inputsDesc->bytes_allocated_for_input[name] = input.element_size * input.elements_count;

-        auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
-
+        auto inputDims = InferenceEngine::SizeVector();
+        for (auto i = 0; i < input.shape.NumberOfDimensions; ++i) {
+            inputDims.push_back(input.shape.Dimensions[i]);
+        }
+        InferenceEngine::Layout inputLayout = static_cast<InferenceEngine::Layout>(input.layout);
+        InferenceEngine::Precision inputPresicion = InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(input.precision));
        dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
        dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
                                                            InferenceEngine::TensorDesc(
-                                                                    InferenceEngine::Precision::FP32,
+                                                                    inputPresicion,
                                                                    inputDims,
-                                                                    InferenceEngine::Layout::NC)));
+                                                                    inputLayout)));
        inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
    }
 }
@@ -875,8 +923,8 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
    for (uint32_t outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
        const std::string& name = (modelHeader.version.major == 2 && modelHeader.version.minor >= 3)
                                  ? outputNames.at(outputIndex) : std::string("output" + std::to_string(outputIndex));
-        HeaderLatest::RuntimeEndPoint output;
-        is.read(reinterpret_cast<char *>(&output), sizeof(output));
+
+        HeaderLatest::RuntimeEndPoint output = ReadEndPoint(is);
        OutputDesc description;
        description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
        description.orientation = kDnnInterleavedOrientation;
@@ -884,12 +932,17 @@ void GNAModelSerial::ImportOutputs(std::istream &is,
        description.num_bytes_per_element = output.element_size;
        description.scale_factor = output.scaleFactor;

-        auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
+        auto outputDims = InferenceEngine::SizeVector();
+        for (auto i = 0; i < output.shape.NumberOfDimensions; ++i) {
+            outputDims.push_back(output.shape.Dimensions[i]);
+        }
+        InferenceEngine::Layout outputLayout = static_cast<InferenceEngine::Layout>(output.layout);
+        InferenceEngine::Precision outputPresicion =  InferenceEngine::Precision(static_cast<InferenceEngine::Precision::ePrecision>(output.precision));
        dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
                                                 InferenceEngine::TensorDesc(
-                                                         InferenceEngine::Precision::FP32,
+                                                         outputPresicion,
                                                         outputDims,
-                                                         InferenceEngine::Layout::NC));
+                                                         outputLayout));
        desc.at(outputIndex) = description;
    }
 }
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@@ -138,6 +138,8 @@ private:
     */
    static GNAPluginNS::HeaderLatest::ModelHeader ReadHeader(std::istream &is);

+    GNAPluginNS::HeaderLatest::RuntimeEndPoint ReadEndPoint(std::istream &is);
+
    /**
     * @brief Import model from FS into preallocated buffer,
     * buffers for pLayers, and pStructs are allocated here and required manual deallocation using mm_free
--- a/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp
+++ b/inference-engine/src/gna_plugin/serial/headers/2dot7/gna_model_header.hpp
@@ -0,0 +1,197 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include "backend/dnn_types.h"
+#include "serial/headers/2dot4/gna_model_header.hpp"
+#include "serial/headers/2dot6/gna_model_header.hpp"
+#include "serial/headers/latest/gna_model_header.hpp"
+#include "gna_data_types.hpp"
+
+#pragma pack(push, 1)
+
+namespace GNAPluginNS {
+namespace Header2dot7 {
+
+/**
+ Maximal number of supported shape dimensions.
+ */
+#define GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS 8
+
+/**
+ * @brief Header version 2.7
+ */
+struct ModelHeader {
+    /**
+     *@brief MagicNumber – GNAM in ascii table, equals to hex 0x474e414d
+     */
+    char gnam[4] = {};
+    /**
+     * @brief if header size is not equal to sizeof ModelHeader - some reserved data append in the end of header
+     * usually it is an indicator of working with version of model different that is current export function produce
+     */
+    uint32_t headerSize = 0u;
+    struct Version {
+        /**
+         * @details Version of format Major – unsigned int, ex: 0x0001
+         * every change in the header or in the layers definition should be reflected in version change
+         * for backward compatibility new parsers can read old versions of model with certain restrictions
+         */
+        uint16_t major = 2u;
+        /**
+         * @details Version of Format Minor – unsigned int,  corresponding to build revision for example
+         * changes in minor version are not affected layout of model
+         */
+        uint32_t minor = 7u;
+    } version;
+    /**
+     * @brief Memory required to be allocated using GNAAlloc()
+     */
+    uint64_t gnaMemSize = 0ull;
+    /**
+     * @brief Number of GNA Layers
+     */
+    uint64_t layersCount = 0ull;
+    /**
+     * @brief Grouping level
+     * This is depricted field and used for old models only (<=2.6)
+     */
+    uint32_t nGroup = 0u;
+
+    /**
+     * Convolution related setting - they are affecting input transformation
+     */
+    uint32_t nRotateRows = 0u;
+    uint32_t nRotateColumns = 0u;
+    bool doRotateInput = false;
+
+    uint32_t nInputs = 0u;
+    uint32_t nOutputs = 0u;
+
+    /**
+     * Convolution related setting - they are affecting output transformation
+     */
+    uint32_t nRotateOutputRows = 0u;
+    uint32_t nRotateOutputColumns = 0u;
+    bool doRotateOutput = false;
+
+    uint32_t nTransposeInputs = 0u;
+    uint32_t nTransposeOutputs = 0u;
+
+    /**
+     * Reserved Data might be here
+     */
+    ModelHeader() = default;
+    ModelHeader(GNAPluginNS::Header2dot1::ModelHeader const &old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+        version.minor = old.version.minor;
+    }
+    ModelHeader(GNAPluginNS::Header2dot4::ModelHeader const &old) {
+        gnaMemSize = old.gnaMemSize;
+        layersCount = old.layersCount;
+        nGroup = old.nGroup;
+        nRotateRows = old.nRotateRows;
+        nRotateColumns = old.nRotateColumns;
+        nInputs = old.nInputs;
+        nOutputs = old.nOutputs;
+        nRotateOutputRows = old.nRotateOutputRows;
+        nRotateOutputColumns = old.nRotateOutputColumns;
+        doRotateOutput = old.doRotateOutput;
+        version.minor = old.version.minor;
+    }
+};
+#pragma pack(pop)
+
+/*
+ * In runtime endpoint mostly same as in serial version, except of descriptor field
+ */
+struct RuntimeEndPoint {
+    /**
+     * if scale factor is different then pased into infer , network might need to be requantized
+     */
+    float scaleFactor = 0;
+    /**
+     * Pointer descriptor
+     */
+    void* descriptor_ptr = nullptr;
+    /**
+     * Endpoint resolution in bytes.
+     */
+    uint32_t element_size = 0;
+    /**
+     * Number of elements
+     */
+    uint32_t elements_count = 0;
+    /**
+     * Offset in bytes of pointer descriptor
+    */
+    uint64_t descriptor_offset = 0ull;
+    /**
+     Shape specifying dimension values.
+    */
+    struct Shape {
+        /**
+         Number of dimensions or rank or order.
+        */
+        uint32_t NumberOfDimensions = 0;
+        /**
+         array specifying value of each dimension.
+        Set all zeros for scalars.
+        */
+        uint32_t Dimensions[GNA_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS] = {0};
+    } shape;
+    /**
+     * Blob layout
+     */
+    uint8_t layout = InferenceEngine::Layout::NC;
+    /**
+     * Blob precision
+     */
+    uint8_t precision = InferenceEngine::Precision::FP32;
+
+    intel_dnn_orientation_t orientation = kDnnUnknownOrientation;
+
+    RuntimeEndPoint() = default;
+    RuntimeEndPoint(const GNAPluginNS::Header2dot6::RuntimeEndPoint &old, uint32_t ngroup) {
+        scaleFactor = old.scaleFactor;
+        descriptor_ptr = old.descriptor_ptr;
+        element_size = old.element_size;
+        elements_count = old.elements_count;
+        orientation = old.orientation;
+        layout = InferenceEngine::Layout::NC;
+        precision = InferenceEngine::Precision::FP32;
+        descriptor_offset = old.descriptor_offset;
+        InferenceEngine::SizeVector dims = {ngroup, elements_count / ngroup};
+        shape.NumberOfDimensions = static_cast<uint32_t>(dims.size());
+        for (auto i = 0; i < dims.size(); i++) {
+            shape.Dimensions[i] = dims[i];
+        }
+    }
+    RuntimeEndPoint(double scaleFactor,
+                    void* descriptor_ptr,
+                    uint32_t element_size,
+                    uint32_t elements_count,
+                    Shape shape,
+                    uint8_t layout,
+                    uint8_t precision,
+                    intel_dnn_orientation_t orientation) : scaleFactor(scaleFactor),
+                                                           descriptor_ptr(descriptor_ptr),
+                                                           element_size(element_size),
+                                                           elements_count(elements_count),
+                                                           shape(shape),
+                                                           layout(layout),
+                                                           precision(precision),
+                                                           orientation(orientation) { }
+};
+} // namespace Header2dot7
+} // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
+++ b/inference-engine/src/gna_plugin/serial/headers/latest/gna_model_header.hpp
@@ -4,11 +4,11 @@

 #pragma once

-#include "serial/headers/2dot6/gna_model_header.hpp"
+#include "serial/headers/2dot7/gna_model_header.hpp"

 namespace GNAPluginNS {
 namespace HeaderLatest {
-using ModelHeader = GNAPluginNS::Header2dot6::ModelHeader;
-using RuntimeEndPoint = GNAPluginNS::Header2dot6::RuntimeEndPoint;
+using ModelHeader = GNAPluginNS::Header2dot7::ModelHeader;
+using RuntimeEndPoint = GNAPluginNS::Header2dot7::RuntimeEndPoint;
 }
 }
--- a/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/gna/shared_tests_instances/skip_tests_config.cpp
@@ -60,8 +60,6 @@ std::vector<std::string> disabledTestPatterns() {
        R"(.*ConstantResultSubgraphTest.*inPrc=(U8|I8|I32|U64|I64|BOOL).*)",
        // TODO: Issue 51528
        R"(.*CachingSupport.*_(u8|i16)_.*)",
-        // TODO: Issue 51527
-        R"(.*CachingSupport.*_batch2_.*)",
        // TODO: Issue 51525
        R"(.*CachingSupport.*KSOFunction.*)",
        // TODO: Issue 57363 (Param -> Result subgraphs)
--- a/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/base/import_export_base/import_export_base.cpp
@@ -69,13 +69,16 @@ void ImportNetworkTestBase::Run() {

    for (const auto& next_input : importedExecNetwork.GetInputsInfo()) {
        ASSERT_NO_THROW(compiledExecNetwork.GetInputsInfo()[next_input.first]);
+        Compare(next_input.second->getTensorDesc(), compiledExecNetwork.GetInputsInfo()[next_input.first]->getTensorDesc());
    }
    for (const auto& next_output : importedExecNetwork.GetOutputsInfo()) {
        ASSERT_NO_THROW(compiledExecNetwork.GetOutputsInfo()[next_output.first]);
    }
    auto importedOutputs = GetOutputs();
    ASSERT_EQ(actualOutputs.size(), importedOutputs.size());
+
    for (size_t i = 0; i < actualOutputs.size(); i++) {
+        Compare(actualOutputs[i]->getTensorDesc(), importedOutputs[i]->getTensorDesc());
        Compare(actualOutputs[i], importedOutputs[i]);
    }
 }
--- a/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
+++ b/inference-engine/tests/functional/shared_test_classes/include/shared_test_classes/base/layer_test_utils.hpp
@@ -72,6 +72,8 @@ public:

    virtual void Compare(const InferenceEngine::Blob::Ptr &expected, const InferenceEngine::Blob::Ptr &actual);

+    virtual void Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc);
+
    virtual void SetRefMode(RefMode mode);

    std::shared_ptr<ngraph::Function> GetFunction();
--- a/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
+++ b/inference-engine/tests/functional/shared_test_classes/src/base/layer_test_utils.cpp
@@ -274,6 +274,17 @@ void LayerTestsCommon::Compare(const InferenceEngine::Blob::Ptr &expected, const
    }
 }

+void LayerTestsCommon::Compare(const InferenceEngine::TensorDesc &actualDesc, const InferenceEngine::TensorDesc &expectedDesc) {
+    auto expectedDims = actualDesc.getDims();
+    auto actualDims = expectedDesc.getDims();
+    ASSERT_EQ(actualDims.size(), expectedDims.size());
+    for (size_t j = 0; j < actualDims.size(); ++j) {
+        ASSERT_EQ(actualDims.at(j), expectedDims.at(j));
+    }
+    ASSERT_EQ(actualDesc.getLayout(), expectedDesc.getLayout());
+    ASSERT_EQ(actualDesc.getPrecision(), expectedDesc.getPrecision());
+}
+
 void LayerTestsCommon::ConfigureNetwork() {
    for (const auto &in : cnnNetwork.getInputsInfo()) {
        if (inLayout != InferenceEngine::Layout::ANY) {