[GNA] Move old headers (#2002)

2020-09-03 14:42:30 +03:00 · 2020-09-03 14:42:30 +03:00 · 166ec394bd
commit 166ec394bd
parent 6b2ac800aa
35 changed files with 324 additions and 135 deletions
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@ -22,6 +22,7 @@
 #include "dnn.hpp"
 #include "am_intel_dnn.hpp"
 #include "dnn_types.h"
+#include "gna_types.h"

 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@ -288,7 +289,7 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
                                                     float input_scale_factor,
                                                     void *&ptr_inputs,
                                                     void *&ptr_outputs,
-                                                     intel_pwl_segment_t *ptr_segments,
+                                                     gna_pwl_segment_t *ptr_segments,
                                                     bool postInitMem) {
    comp.num_rows_in = num_rows;
    comp.num_columns_in = num_columns;
@ -313,8 +314,8 @@ void GNAPluginNS::backend::AMIntelDNN::InitPiecewiseLinearComponentPrivate(intel
        ptr_inputs = &comp.ptr_inputs;
        ptr_outputs = &comp.ptr_outputs;
        if (ptr_segments != nullptr) {
-            *reinterpret_cast<intel_pwl_segment_t **>(ptr_segments) =
-                    reinterpret_cast<intel_pwl_segment_t *>(& comp.op.pwl.ptr_segments);
+            *reinterpret_cast<gna_pwl_segment_t **>(ptr_segments) =
+                    reinterpret_cast<gna_pwl_segment_t *>(& comp.op.pwl.ptr_segments);
        }
    }
 }
@ -823,7 +824,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_

                    if (num_bytes_per_weight == 1) {
                        int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.affine.ptr_weights);
-                        intel_compound_bias_t *ptr_bias = reinterpret_cast<intel_compound_bias_t *>(component[i].op.affine.ptr_biases);
+                        gna_compound_bias_t *ptr_bias = reinterpret_cast<gna_compound_bias_t *>(component[i].op.affine.ptr_biases);
 #ifdef DUMP_WB
                        for (uint32_t row = 0; row < num_weight_rows; row++) {
                            for (uint32_t col = 0; col < num_weight_columns; col++) {
@ -871,8 +872,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                    }
                    if (compute_precision_ == kDnnInt) {
                        if (num_bytes_per_weight == 1) {
-                            intel_compound_bias_t
-                                *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.affine.ptr_biases);
+                            gna_compound_bias_t
+                                *ptr_biases = reinterpret_cast<gna_compound_bias_t *>(component[i].op.affine.ptr_biases);
 #ifdef DUMP_WB
                            for (uint32_t row = 0; row < num_rows_out; row++) {
                                if (logging_precision == kDnnInt) {
@ -952,7 +953,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_

                    if (num_bytes_per_weight == 1) {
                        int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.conv1D.ptr_filters);
-                        intel_compound_bias_t *ptr_bias = reinterpret_cast<intel_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
+                        gna_compound_bias_t *ptr_bias = reinterpret_cast<gna_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
 #ifdef DUMP_WB
                        for (uint32_t row = 0; row < num_filters; row++) {
                            for (uint32_t col = 0; col < num_filter_coefficients; col++) {
@ -1001,8 +1002,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                    if (compute_precision_ == kDnnInt) {
                        if (logging_precision == kDnnInt) {
                            if (num_bytes_per_weight == 1) {
-                                intel_compound_bias_t
-                                        *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
+                                gna_compound_bias_t
+                                        *ptr_biases = reinterpret_cast<gna_compound_bias_t *>(component[i].op.conv1D.ptr_biases);
 #ifdef DUMP_WB
                                for (uint32_t row = 0; row < num_filters; row++) {
                                    out_bfile << "0x" << std::setfill('0') << std::setw(8) << std::hex
@ -1073,8 +1074,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                             << GNAPluginNS::memory::MemoryOffset(component[i].op.recurrent.ptr_feedbacks, ptr_dnn_memory_) << "\n";
                    if (num_bytes_per_weight == 1) {
                        int8_t *ptr_weight = reinterpret_cast<int8_t *>(component[i].op.recurrent.ptr_weights);
-                        intel_compound_bias_t
-                                *ptr_bias = reinterpret_cast<intel_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
+                        gna_compound_bias_t
+                                *ptr_bias = reinterpret_cast<gna_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
 #ifdef DUMP_WB
                        for (uint32_t row = 0; row < num_weight_rows; row++) {
                            out_file << "<weight_row> ";
@ -1128,8 +1129,8 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                    if (compute_precision_ == kDnnInt) {
                        if (logging_precision == kDnnInt) {
                            if (num_bytes_per_weight == 1) {
-                                intel_compound_bias_t
-                                        *ptr_biases = reinterpret_cast<intel_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
+                                gna_compound_bias_t
+                                        *ptr_biases = reinterpret_cast<gna_compound_bias_t *>(component[i].op.recurrent.ptr_biases);
                                out_file << "<compound_bias>" << " ";
 #ifdef DUMP_WB
                                for (uint32_t col = 0; col < num_columns_out; col++) {
@ -1182,7 +1183,7 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                }
                    break;
                case kDnnPiecewiselinearOp: {
-                    intel_pwl_segment_t *ptr_segment = component[i].op.pwl.ptr_segments;
+                    gna_pwl_segment_t *ptr_segment = component[i].op.pwl.ptr_segments;
                    DnnActivationType func_id = component[i].op.pwl.func_id.type;
                    uint32_t num_segments = component[i].op.pwl.num_segments;
                    float output_scale_factor = component[i].output_scale_factor;
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.hpp
@ -9,6 +9,7 @@
 #include <vector>

 #include "dnn_types.h"
+#include "gna_types.h"

 #include "gna_plugin_log.hpp"

@ -176,7 +177,7 @@ public:
                                             float input_scale_factor,
                                             A *&ptr_inputs,
                                             B *&ptr_outputs,
-                                             intel_pwl_segment_t *ptr_segments) {
+                                             gna_pwl_segment_t *ptr_segments) {
        InitPiecewiseLinearComponentPrivate(cmp,
                                            function_id,
                                            orientation,
@ -381,7 +382,7 @@ private:
                                                    float input_scale_factor,
                                                    void *&ptr_inputs,
                                                    void *&ptr_outputs,
-                                                    intel_pwl_segment_t *ptr_segments,
+                                                    gna_pwl_segment_t *ptr_segments,
                                                    bool postInitMem);

    static void InitInterleaveComponentPrivate(intel_dnn_component_t &cmp,
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@ -6,7 +6,7 @@

 #include <cstdint>
 #include <type_traits>
-#include <gna-api-types-xnn.h>
+#include "gna_types.h"

 #include "gna_plugin_log.hpp"

@ -183,7 +183,7 @@ typedef struct {
 typedef struct {
    DnnActivation func_id;       // identifies function being approximated
    uint32_t num_segments;
-    intel_pwl_segment_t *ptr_segments;
+    gna_pwl_segment_t *ptr_segments;
 } intel_piecewiselinear_t;

 typedef struct {
--- a/inference-engine/src/gna_plugin/backend/gna_types.h
+++ b/inference-engine/src/gna_plugin/backend/gna_types.h
@ -0,0 +1,164 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#if GNA_LIB_VER == 1
+#include "gna_lib_ver_selector.hpp"
+#else
+#include <cstdint>
+
+/** PWL Segment - as read directly by the accelerator */
+typedef struct _pwl_segment_t {
+    int32_t xBase;                  // X Component of segment starting point, with scaling encoded if needed.
+    int16_t yBase;                  // Y Component of segment starting point.
+    int16_t slope;                  // Slope of linear function.
+} gna_pwl_segment_t;
+static_assert(8 == sizeof(gna_pwl_segment_t), "Invalid size of gna_pwl_segment_t");
+
+/** Piecewise-linear activation function (PWL) details */
+typedef struct _pwl_func_t {
+    uint32_t nSegments;             // Number of segments, set to 0 to disable activation function.
+    gna_pwl_segment_t* pSegments; // Activation function segments data or NULL if disabled.
+} gna_pwl_func_t;
+
+/**
+ * Compound bias
+ * Used for nBytesPerWeight=GNA_INT8 and nBytesPerBias=GNA_INT16 only.
+ * As read directly by the accelerator.
+ */
+typedef struct _compound_bias_t {
+    int32_t bias;              // 4B Signed integer bias (constant) value.
+    uint8_t multiplier;             // Scaling factor that weight elements are multiplied by.
+    uint8_t reserved[3];            // Not used.
+} gna_compound_bias_t;
+static_assert(8 == sizeof(gna_compound_bias_t), "Invalid size of gna_compound_bias_t");
+
+/**
+ * Layer operation type.
+ * Defines type of layer "core" operation.
+ * All nodes/cells within a layer are of the same type,
+ * e.g. affine transform cell, convolutional cell, recurrent cell.
+ * Affine, convolutional and recurrent layers are in fact "fused operation" layers
+ * and "core" operation is fused with activation and/or pooling functions.
+ * NOTE: Operation types are exclusive.
+ */
+typedef enum _layer_operation {
+    // Fully connected affine transform (deep feed forward) with activation function. Cast pLayerStruct to intel_affine_layer_t.
+    INTEL_AFFINE,
+    // Fully connected affine transform (matrix x vector) (deep feed forward) with activation function.Cast pLayerStruct to intel_affine_layer_t.
+    INTEL_AFFINE_DIAGONAL,
+    /*
+     * Fully connected affine transform (with grouped bias vectors) (deep feed forward) with activation function.
+     * Cast pLayerStruct to intel_affine_multibias_layer_t.
+     */
+    INTEL_AFFINE_MULTIBIAS,
+    INTEL_CONVOLUTIONAL,            // Convolutional transform with activation function and pooling. Cast pLayerStruct to intel_convolutional_layer_t.
+    INTEL_CONVOLUTIONAL_2D,         // Convolutional transform with activation function and pooling. Cast pLayerStruct to nn_layer_cnn2d.
+    INTEL_COPY,                     // Auxiliary data copy operation. Cast pLayerStruct to intel_copy_layer_t.
+    INTEL_DEINTERLEAVE,             // Auxiliary 2D tensor transpose operation (interleave to flat). No casting, always set pLayerStruct to null.
+    INTEL_GMM,                      // Gaussian Mixture Model operation. Cast pLayerStruct to intel_gmm_layer_t.
+    INTEL_INTERLEAVE,               // Auxiliary 2D tensor transpose operation (flat to interleave). No casting, always set pLayerStruct to null.
+    INTEL_RECURRENT,                // Fully connected affine transform with recurrence and activation function. Cast pLayerStruct to intel_recurrent_layer_t.
+    GNA_LAYER_CNN_2D_POOLING,
+    LAYER_OPERATION_TYPE_COUT,
+} gna_layer_operation;
+
+typedef enum _layer_mode {
+    INTEL_INPUT,            // Layer serves as model input layer (usually first layer)
+    INTEL_OUTPUT,           // Layer serves as model output layer (usually last layer)
+    INTEL_INPUT_OUTPUT,     // Layer serves as model input nad output layer (usually in single layer topology)
+    INTEL_HIDDEN,           // Layer serves as model hidden layer (layers between input and output layers)
+    LAYER_MODE_COUNT        // Number of Layer modes.
+} gna_layer_mode;
+
+/** Layer common configuration descriptor */
+typedef struct _nnet_layer_t {
+    gna_layer_operation operation;  // Layer operation type.
+    gna_layer_mode mode;            // Layer connection mode.
+    uint32_t nInputColumns;         // Number of input columns.
+    uint32_t nInputRows;            // Number of input rows.
+    uint32_t nOutputColumns;        // Number of output columns.
+    uint32_t nOutputRows;           // Number of output rows.
+    uint32_t nBytesPerInput;        // Precision/mode of input node, use a value from gna_data_mode. Valid values {GNA_INT8, GNA_INT16, GNA_DATA_DISABLED}
+    // Precision/ activation mode of output node, use a value from gna_data_mode. Valid values {GNA_INT8, GNA_INT16, GNA_INT32, GNA_DATA_ACTIVATION_DISABLED}
+    uint32_t nBytesPerOutput;
+    uint32_t nBytesPerIntermediateOutput;// Number of bytes per intermediate output node, always set to GNA_INT32.
+    void* pLayerStruct;             // Layer detailed configuration, cast to intel_[LAYER_KIND]_layer_t.
+    void* pInputs;                  // Signed integer NN or GMM input buffer.
+    void* pOutputsIntermediate;     // 4B Signed integer Auxiliary output buffer.
+    void* pOutputs;                 // Signed integer output buffer.
+} gna_nnet_layer_t;
+
+/** GNA Network descriptor */
+typedef struct _nnet_type_t {
+    uint32_t nLayers;               // The number of layers in the network.
+    uint32_t nGroup;                // Input vector grouping level.
+    gna_nnet_layer_t *pLayers;    // Layer configurations.
+} gna_nnet_type_t;
+
+/** Affine function details */
+typedef struct _affine_func_t {
+    uint32_t nBytesPerWeight;       // Precision/mode of weight element, use a value from gna_data_mode.
+    uint32_t nBytesPerBias;         // Precision/mode of bias (constant) element, use a value from gna_data_mode.
+    void* pWeights;                 // Signed integer weights data buffer.
+    void* pBiases;                  // Biases (constants) data buffer. Signed integer biases or gna_compound_bias_t
+} gna_affine_func_t;
+
+/** Fully connected affine layer detailed descriptor */
+typedef struct _affine_layer_t {
+    gna_affine_func_t affine;     // Affine function details.
+    gna_pwl_func_t pwl;           // Activation function details.
+} gna_affine_layer_t;
+
+/** Pooling function types */
+typedef enum _pool_type_t {
+    INTEL_NO_POOLING = 0,           // Pooling function disabled.
+    INTEL_MAX_POOLING = 1,          // Max Pooling function.
+    INTEL_SUM_POOLING = 2,          // Sum Pooling function.
+    NUM_POOLING_TYPES               // Number of Pooling function types.
+} gna_pool_type_t;
+
+/** Convolutional Layer detailed descriptor */
+typedef struct _convolutional_layer_t {
+    uint32_t nFilters;              // Number of filters.
+    uint32_t nFilterCoefficients;   // Number of filter elements, including 0-padding if necessary.
+    uint32_t nFilterRows;           // Number of rows in each filter.
+    uint32_t nBytesFilterCoefficient;// Precision/mode of filter coefficient element, use a value from gna_data_mode.
+    uint32_t nBytesBias;            // Precision/mode of bias (constant) element, use a value from gna_data_mode.
+    uint32_t nFeatureMaps;          // Number of feature maps.
+    uint32_t nFeatureMapRows;       // Number of rows in each feature map.
+    uint32_t nFeatureMapColumns;    // Number of columns in each feature map.
+    void* pFilters;                 // Signed integer Filters data buffer, filters stored one after the other.
+    void* pBiases;                  // Signed integer Biases (constants) data buffer, biases are specified per kernel/filter.
+    gna_pool_type_t poolType;     // Pooling function type.
+    uint32_t nPoolSize;             // Pool size, set 1 to disable pooling.
+    uint32_t nPoolStride;           // Pool stride.
+    gna_pwl_func_t pwl;           // Activation function details.
+} gna_convolutional_layer_t;
+
+/**
+ The list of processing acceleration modes.
+ Current acceleration modes availability depends on the CPU type.
+ Available modes are detected by GNA.
+
+ NOTE:
+ - GNA_HARDWARE: in some GNA hardware generations, model components unsupported
+   by hardware will be processed using software acceleration.
+ When software inference is used, by default "fast" algorithm is used
+ and results may be not bit-exact with these produced by hardware device.
+ */
+typedef enum  _acceleration {
+    GNA_HARDWARE = static_cast<int>(0xFFFFFFFE), // GNA Hardware acceleration enforcement
+    GNA_AUTO     = 0x3,             // GNA selects the best available acceleration
+    GNA_SOFTWARE = 0x5,             // GNA selects the best available software acceleration
+    GNA_GENERIC  = 0x7,             // Enforce the usage of generic software mode
+    GNA_SSE4_2   = 0x9,             // Enforce the usage of SSE 4.2 CPU instruction set
+    GNA_AVX1     = 0xB,             // Enforce the usage of AVX1 CPU instruction set
+    GNA_AVX2     = 0xD              // Enforce the usage of AVX2 CPU instruction set
+} gna_acceleration;
+
+static_assert(4 == sizeof(gna_acceleration), "Invalid size of gna_acceleration");
+
+#endif
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@ -8,6 +8,7 @@
 #include <runtime/pwl.h>
 #include <gna_slope_scale.h>
 #include "dnn_types.h"
+#include "backend/gna_types.h"
 #include "round_float_define.hpp"

 void make_gna_pwl(const DnnActivation  fun,
@ -16,7 +17,7 @@ void make_gna_pwl(const DnnActivation  fun,
                  const double u_bound,
                  const double in_scale,
                  const double out_scale,
-                  std::vector<intel_pwl_segment_t> &gna_pwl) {
+                  std::vector<gna_pwl_segment_t> &gna_pwl) {
    pwl_gna_slope_scale_t s;
    uint32_t pwl_size = static_cast<int32_t>(pwl.size());
    gnalog() << "make_gna_pwl\n";
--- a/inference-engine/src/gna_plugin/backend/make_pwl.hpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.hpp
@ -6,6 +6,7 @@

 #include <vector>
 #include <runtime/pwl.h>
+#include "backend/gna_types.h"


 void make_gna_pwl(const DnnActivation  fun,
@ -14,4 +15,4 @@ void make_gna_pwl(const DnnActivation  fun,
                  const double u_bound,
                  const double in_scale,
                  const double out_scale,
-                  std::vector<intel_pwl_segment_t> &gna_pwl);
+                  std::vector<gna_pwl_segment_t> &gna_pwl);
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@ -8,7 +8,7 @@
 #include <utility>
 #include <cmath>

-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "gna_plugin_log.hpp"
 #include "quantized_layer_params.hpp"
 #include "quantization.h"
@ -77,7 +77,7 @@ struct QuantI16 : public QuantDescTmpl<PRECISION_TYPE(I16, I32, I16, I32, MIXED)
        _Np = InferenceEngine::Precision::MIXED;
    }
 };
-struct QuantI8  : public QuantDescTmpl<P_TYPE(I16), P_TYPE(I32), P_TYPE(I8), intel_compound_bias_t, P_TYPE(MIXED)> {
+struct QuantI8  : public QuantDescTmpl<P_TYPE(I16), P_TYPE(I32), P_TYPE(I8), gna_compound_bias_t, P_TYPE(MIXED)> {
    QuantI8() {
        _Np = InferenceEngine::Precision::MIXED;
    }
@ -102,7 +102,7 @@ inline bool shouldAlwaysAllocate() {
 }

 template <>
-inline bool shouldAlwaysAllocate<intel_compound_bias_t>() {
+inline bool shouldAlwaysAllocate<gna_compound_bias_t>() {
    return true;
 }

--- a/inference-engine/src/gna_plugin/frontend/quantization.cpp
+++ b/inference-engine/src/gna_plugin/frontend/quantization.cpp
@ -5,6 +5,7 @@
 #include <cstring>
 #include <iostream>
 #include <details/ie_exception.hpp>
+#include "backend/gna_types.h"
 #include "quantization.h"

 void QuantizeAffine16(float *ptr_float_weights,
@ -149,7 +150,7 @@ void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t
 }

 void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases,
-                     int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
+                     int8_t *ptr_int_weights, gna_compound_bias_t *ptr_int_biases,
                     float input_scale_factor, float *ptr_weight_scale_factor,
                     float *ptr_output_scale_factor, uint32_t num_rows, uint32_t num_columns,
                     uint32_t num_rows_padded, uint32_t num_columns_padded) {
--- a/inference-engine/src/gna_plugin/frontend/quantization.h
+++ b/inference-engine/src/gna_plugin/frontend/quantization.h
@ -4,12 +4,12 @@

 #pragma once

-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
 #include <vector>
 #include <cstdint>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"

 #define MAX_OUT_MULTIPLIER 230
 #define MAX_VAL_1B_WEIGHT 127
@ -35,6 +35,6 @@ void QuantizeAffine16(float *ptr_float_weights,
                      uint32_t num_columns_padded);
 float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements);
 void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor);
-void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, intel_compound_bias_t *ptr_int_biases,
+void QuantizeAffine8(float *ptr_float_weights, float *ptr_float_biases, int8_t *ptr_int_weights, gna_compound_bias_t *ptr_int_biases,
                     float input_scale_factor, float *ptr_weight_scale_factor, float *ptr_output_scale_factor,
                     uint32_t num_rows, uint32_t num_columns, uint32_t num_rows_padded, uint32_t num_columns_padded);
--- a/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
+++ b/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
@ -9,7 +9,7 @@
 #else
 #include <mm_malloc.h>
 #endif
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "gna_plugin_log.hpp"

 #if GNA_LIB_VER == 2
@ -81,7 +81,7 @@ class CPPWrapper<Gna2Model> {
 };
 #else
 template <>
-class CPPWrapper<intel_nnet_type_t> {
+class CPPWrapper<gna_nnet_type_t> {
 public:
    intel_nnet_type_t obj;

--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -15,7 +15,6 @@
 #include <limits>

 #include <legacy/ie_layers.h>
-#include <gna-api-types-xnn.h>
 #include <ie_algorithm.hpp>
 #include <debug.h>

@ -25,7 +24,6 @@
 #include "layers/gna_layer_info.hpp"
 #include "ie_memcpy.h"
 #include "caseless.hpp"
-#include "gna-api.h"
 #include "backend/am_intel_dnn.hpp"
 #include "runtime/pwl.h"
 #include "gna_graph_tools.hpp"
@ -539,7 +537,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
        }
    } else {
        //use PWL to calculate power
-        std::vector<intel_pwl_segment_t> ptr_pwl_segments;
+        std::vector<gna_pwl_segment_t> ptr_pwl_segments;

        auto orientation = kDnnInterleavedOrientation;

@ -550,7 +548,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {

        auto& pwlComponent = dnnComponents.addComponent(layer->name, "power");

-        intel_pwl_segment_t* ptr_pwl_segments_target = nullptr;
+        gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;

        float output_pwl_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
        float input_pwl_scale_factor = quantized != nullptr ? quantized->_src_quant.scale : 1.0f;
@ -576,7 +574,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
            }
        }

-        ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
+        ptr_pwl_segments_target = reinterpret_cast<gna_pwl_segment_t*>(&ptr_pwl_segments_target);

        void* ptr_pwl_input = nullptr;
        void* ptr_pwl_outputs = nullptr;
@ -600,7 +598,7 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
        if (ptr_pwl_segments_target != nullptr) {
            gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
                &ptr_pwl_segments.front(),
-                ptr_pwl_segments.size() * sizeof(intel_pwl_segment_t),
+                ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
                64);
        }
    }
@ -1466,7 +1464,7 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
 void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto* generic = dynamic_cast<GenericLayer*>(layer.get());
    std::string type;
-    std::vector<intel_pwl_segment_t> ptr_pwl_segments;
+    std::vector<gna_pwl_segment_t> ptr_pwl_segments;
    uint32_t num_rows;
    uint32_t num_columns;
    void* ptr_inputs = nullptr;
@ -1582,8 +1580,7 @@ case name:\
 #endif

    auto& currentComponent = dnnComponents.addComponent(layer->name, actName);
-
-    intel_pwl_segment_t* ptr_pwl_segments_target = nullptr;
+    gna_pwl_segment_t* ptr_pwl_segments_target = nullptr;

    if (!gnaFlags->sw_fp32) {
        // TODO: generalize activation function code
@ -1618,7 +1615,7 @@ case name:\
                input_pwl_scale_factor,
                output_pwl_scale_factor);
        }
-        ptr_pwl_segments_target = reinterpret_cast<intel_pwl_segment_t*>(&ptr_pwl_segments_target);
+        ptr_pwl_segments_target = reinterpret_cast<gna_pwl_segment_t*>(&ptr_pwl_segments_target);
    }

    dnn->InitPiecewiseLinearComponent(currentComponent,
@ -1641,7 +1638,7 @@ case name:\
    if (ptr_pwl_segments_target != nullptr) {
        gnamem->readonly().push_local_ptr(ptr_pwl_segments_target,
            &ptr_pwl_segments.front(),
-            ptr_pwl_segments.size() * sizeof(intel_pwl_segment_t),
+            ptr_pwl_segments.size() * sizeof(gna_pwl_segment_t),
            64);
    }
 }
--- a/inference-engine/src/gna_plugin/gna_helper.cpp
+++ b/inference-engine/src/gna_plugin/gna_helper.cpp
@ -4,15 +4,13 @@
 //  gna_helper.cpp : various GNA-related utility functions
 //

-#define PROFILE
-
 #include <cstdint>
 #include <cstdio>
 #include <fstream>
 #include <vector>
 #include <sstream>
 #include <string>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "gna_plugin_log.hpp"

 #include "gna_lib_ver_selector.hpp"
@ -48,21 +46,6 @@ void PrintMatrixFloat32(char *ptr_name, float *ptr_matrix, int num_rows, int num
    }
 }

-void PrintGnaNetwork(intel_nnet_type_t *ptr_nnet) {
-    PrintMatrixInt16("input", reinterpret_cast<int16_t*>(ptr_nnet->pLayers[0].pInputs),
-                     ptr_nnet->pLayers[0].nInputRows, ptr_nnet->pLayers[0].nInputColumns, ptr_nnet->pLayers[0].nInputColumns, 1.0);
-    for (uint32_t i = 0; i < ptr_nnet->nLayers; i++) {
-        char name[256];
-        snprintf(name, sizeof(name), "output %d", i);
-        if (ptr_nnet->pLayers[i].nBytesPerOutput == 2) {
-            PrintMatrixInt16(name, reinterpret_cast<int16_t*>(ptr_nnet->pLayers[i].pOutputs),
-                             ptr_nnet->pLayers[i].nOutputRows, ptr_nnet->pLayers[i].nOutputColumns, ptr_nnet->pLayers[i].nOutputColumns, 1.0);
-        } else {
-            PrintMatrixInt32(name, reinterpret_cast<int32_t*>(ptr_nnet->pLayers[i].pOutputs),
-                             ptr_nnet->pLayers[i].nOutputRows, ptr_nnet->pLayers[i].nOutputColumns, ptr_nnet->pLayers[i].nOutputColumns, 1.0);
-        }
-    }
-}

 typedef struct {
    std::string sName;
@ -146,7 +129,7 @@ uint32_t BufferOffsetFromAddress(std::vector<intel_memory_region_t> &vBuffer, vo
    return (nOffsetBytes);
 }

-std::string LayerName(intel_nnet_layer_t *pLayer) {
+std::string LayerName(gna_nnet_layer_t *pLayer) {
    const auto nKind = pLayer->nLayerKind;
    std::string sKind;
    if (nKind == INTEL_AFFINE) {
@ -164,7 +147,7 @@ std::string LayerName(intel_nnet_layer_t *pLayer) {
    return (sKind);
 }

-uint32_t NumInputs(intel_nnet_layer_t *pLayer) {
+uint32_t NumInputs(gna_nnet_layer_t *pLayer) {
    const auto nKind = pLayer->nLayerKind;
    uint32_t nInputs;
    if ((nKind == INTEL_AFFINE) || (nKind == INTEL_AFFINE_DIAGONAL)) {
@ -180,7 +163,7 @@ uint32_t NumInputs(intel_nnet_layer_t *pLayer) {
    return (nInputs);
 }

-uint32_t NumOutputs(intel_nnet_layer_t *pLayer) {
+uint32_t NumOutputs(gna_nnet_layer_t *pLayer) {
    const auto nKind = pLayer->nLayerKind;
    uint32_t nOutputs;
    if ((nKind == INTEL_AFFINE) || (nKind == INTEL_AFFINE_DIAGONAL)) {
@ -196,7 +179,7 @@ uint32_t NumOutputs(intel_nnet_layer_t *pLayer) {
    return (nOutputs);
 }

-uint32_t NumGroupSize(intel_nnet_layer_t *pLayer) {
+uint32_t NumGroupSize(gna_nnet_layer_t *pLayer) {
    const auto nKind = pLayer->nLayerKind;
    uint32_t nGroupSize;
    if ((nKind == INTEL_AFFINE) || (nKind == INTEL_AFFINE_DIAGONAL)) {
--- a/inference-engine/src/gna_plugin/gna_lib_ver_selector.hpp
+++ b/inference-engine/src/gna_plugin/gna_lib_ver_selector.hpp
@ -7,9 +7,39 @@
 #if GNA_LIB_VER == 2

 #include <cstdint>
-#include <gna-api-types-xnn.h>

 #define nLayerKind operation
 #define intel_layer_kind_t gna_layer_operation
 #define intel_gna_proc_t uint32_t
+
+
+/**
+ * Rounds a number up, to the nearest multiple of significance
+ * Used for calculating the memory sizes of GNA data buffers
+ *
+ * @param number        Memory size or a number to round up.
+ * @param significance  Informs the function how to round up. The function "ceils"
+ *                      the number to the lowest possible value divisible by "significance".
+ * @return Rounded integer value.
+ */
+#define ALIGN(number, significance) ((((number) + (significance) - 1) / (significance)) * (significance))
+
+/**
+ * Rounds a number up, to the nearest multiple of 64
+ * Used for calculating memory sizes of GNA data arrays
+ */
+#define ALIGN64(number) ALIGN(number, 64)
+
+#else
+
+#include <gna-api.h>
+#include <gna-api-types-xnn.h>
+
+#define gna_pwl_segment_t intel_pwl_segment_t
+#define gna_compound_bias_t intel_compound_bias_t
+#define gna_nnet_layer_t intel_nnet_layer_t
+#define gna_nnet_type_t intel_nnet_type_t
+#define gna_affine_func_t intel_affine_func_t
+#define gna_affine_layer_t intel_affine_layer_t
+#define gna_convolutional_layer_t intel_convolutional_layer_t
 #endif
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@ -8,7 +8,6 @@
 #include <vector>
 #include <utility>

-#include <gna-api.h>
 #include "descriptions/gna_input_desc.hpp"
 #include "descriptions/gna_output_desc.hpp"
 #include "gna_plugin_log.hpp"
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@ -10,8 +10,9 @@
 #include "ie_layers.h"
 #include "caseless.hpp"
 #include "ie_algorithm.hpp"
-#include "gna-api.h"
+#include "backend/gna_types.h"
 #include "gna_permute.hpp"
+#include "gna_lib_ver_selector.hpp"


 namespace GNAPluginNS {
--- a/inference-engine/src/gna_plugin/memory/gna_memory.hpp
+++ b/inference-engine/src/gna_plugin/memory/gna_memory.hpp
@ -13,8 +13,7 @@
 #include <list>
 #include <algorithm>
 #include <functional>
-
-#include <gna-api.h>
+#include "gna_lib_ver_selector.hpp"

 namespace GNAPluginNS {
 namespace memory {
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@ -16,7 +16,6 @@
 #include <iomanip>

 #include <legacy/graph_transformer.h>
-#include <gna-api.h>
 #include <blob_factory.hpp>
 #include <ie_memcpy.h>
 #include <ie_algorithm.hpp>
--- a/inference-engine/src/gna_plugin/runtime/pwl.cpp
+++ b/inference-engine/src/gna_plugin/runtime/pwl.cpp
@ -9,6 +9,7 @@
 #include <limits>
 #include <cstdint>
 #include <algorithm>
+#include "backend/gna_types.h"

 #ifdef _NO_MKL_
 #include <cmath>
@ -497,7 +498,7 @@ std::vector<pwl_t> pwl_search(const DnnActivation& activation_type,


 void PwlDesignOpt16(const DnnActivation activation_type,
-                    std::vector<intel_pwl_segment_t> &ptr_segment,
+                    std::vector<gna_pwl_segment_t> &ptr_segment,
                    const float scale_in,
                    const float scale_out) {
    std::vector<pwl_t> pwl;
@ -588,7 +589,7 @@ void PwlDesignOpt16(const DnnActivation activation_type,
 }

 void PwlDesign16(const DnnActivation activation_type,
-                 intel_pwl_segment_t *ptr_segment,
+                 gna_pwl_segment_t *ptr_segment,
                 const uint32_t num_segments,
                 const float scale_in,
                 const float scale_out) {
@ -869,7 +870,7 @@ void PwlApply16(intel_dnn_component_t *component,
    uint32_t num_saturate = 0;
    uint32_t num_segments = component->op.pwl.num_segments;
    if (num_segments > 0) {
-        intel_pwl_segment_t *ptr_segment = component->op.pwl.ptr_segments;
+        gna_pwl_segment_t *ptr_segment = component->op.pwl.ptr_segments;
        for (int i = num_row_start; i <= num_row_end; i++) {
            int32_t *ptr_input = reinterpret_cast<int32_t *>(component->ptr_inputs) + i * component->num_columns_in;
            int16_t *ptr_output = reinterpret_cast<int16_t *>(component->ptr_outputs) + i * component->num_columns_in;
--- a/inference-engine/src/gna_plugin/runtime/pwl.h
+++ b/inference-engine/src/gna_plugin/runtime/pwl.h
@ -8,6 +8,7 @@
 #include <cstdint>

 #include "backend/dnn_types.h"
+#include "backend/gna_types.h"

 #define SIGMOID_NUM_SEGMENTS 65
 #define SIGMOID_DOMAIN 10.0f  // portion of input to be approximated (-10,10)
@ -95,11 +96,11 @@ void PwlApply32(intel_dnn_component_t *component,
                const uint32_t num_col_start,
                const uint32_t num_col_end);
 void PwlDesign16(const DnnActivation activation_type,
-                 intel_pwl_segment_t *ptr_segment,
+                 gna_pwl_segment_t *ptr_segment,
                 const uint32_t num_segments,
                 const float scale_in,
                 const float scale_out);
 void PwlDesignOpt16(const DnnActivation activation_type,
-                std::vector<intel_pwl_segment_t> &ptr_segment,
+                std::vector<gna_pwl_segment_t> &ptr_segment,
                const float scale_in,
                const float scale_out);
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_graph_aot_test.cpp
@ -46,7 +46,7 @@ TEST_F(GNAAOTTests, DISABLED_AffineWith2AffineOutputs_canbe_imported_verify_stru
 #if GNA_LIB_VER == 1
    GTEST_SKIP();
 #endif
-    auto & nnet_type = storage<intel_nnet_type_t>();
+    auto & nnet_type = storage<gna_nnet_type_t>();

    // saving pointer to nnet - todo probably deep copy required
    save_args().onInferModel(AffineWith2AffineOutputsModel())
@ -120,7 +120,7 @@ TEST_F(GNAAOTTests, PoolingModel_canbe_export_imported) {

 TEST_F(GNAAOTTests, DISABLED_CanConvertFromAOTtoSueModel) {

-    auto & nnet_type = storage<intel_nnet_type_t>();
+    auto & nnet_type = storage<gna_nnet_type_t>();

    // saving pointer to nnet - todo probably deep copy required
    save_args().onInferModel(AffineWith2AffineOutputsModel())
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_hardware_precision_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_hardware_precision_test.cpp
@ -29,7 +29,7 @@ TEST_F(GNAHWPrecisionTest, canPassInt8Precision) {
            nnet_input_precision(Precision::I16).
            nnet_ouput_precision(Precision::I32).
            nnet_weights_precision(Precision::I8).
-            nnet_biases_precision(Precision::fromType<intel_compound_bias_t>());
+            nnet_biases_precision(Precision::fromType<gna_compound_bias_t>());
 }

 TEST_F(GNAHWPrecisionTest, canPassInt16Precision) {
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@ -4,7 +4,7 @@

 #include "gna_matcher.hpp"
 #include <gna/gna_config.hpp>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include <gna_executable_network.hpp>
 #include "gna_plugin.hpp"
 #include "gna_mock_api.hpp"
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@ -25,7 +25,7 @@

 #include <backend/dnn_types.h>
 #include <gna_plugin_policy.hpp>
-#include <gna-api.h>
+#include <backend/gna_types.h>
 #include <gna/gna_config.hpp>
 #include <gna_plugin.hpp>
 #include <gna_lib_ver_selector.hpp>
@ -292,7 +292,7 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
        return *this;
    }

-    GNAPropagateMatcher & exact_nnet_structure(intel_nnet_type_t * pNet) {
+    GNAPropagateMatcher & exact_nnet_structure(gna_nnet_type_t * pNet) {

        getMatcher().type = GnaPluginTestEnvironment::exactNNetStructure;
        original_nnet = pNet;
@ -415,7 +415,7 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
        return * this;
    }

-    GNAPropagateMatcher & to(intel_nnet_type_t *savedNet) {
+    GNAPropagateMatcher & to(gna_nnet_type_t *savedNet) {
        this->savedNet = savedNet;
        return *this;
    }
@ -427,8 +427,8 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {

 protected:
    void match();
-    intel_nnet_type_t * original_nnet = nullptr;
-    intel_nnet_type_t * savedNet = nullptr;
+    gna_nnet_type_t * original_nnet = nullptr;
+    gna_nnet_type_t * savedNet = nullptr;
 };


--- a/inference-engine/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/i16_quantisation_test.cpp
@ -5,7 +5,7 @@
 #include <vector>
 #include <gtest/gtest.h>
 #include <legacy/layer_transform.hpp>
-#include <gna-api-types-xnn.h>
+#include "backend/gna_types.h"
 #include "frontend/model_quantizer.hpp"
 #include "frontend/layer_quantizer.hpp"
 #include "gna_matcher.hpp"
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/conv_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/conv_matcher.hpp
@ -4,22 +4,22 @@

 #pragma once

-#include"gna-api.h"
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"

-class ConvoluionLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class ConvoluionLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    bool matchInserted;
    int matchQuantity;
 public:
    ConvoluionLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        for(int i = 0; i < foo->nLayers; i++) {
            if (foo->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;

-            auto conv = (intel_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
+            auto conv = (gna_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;

            return matchInserted;
        }
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/copy_matcher.hpp
@ -3,14 +3,17 @@
 //

 #pragma once
+
 #include "nnet_base_matcher.hpp"
-class CopyLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+#include "backend/gna_types.h"
+
+class CopyLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    bool matchInserted;
    const int matchQuantity;
    mutable int actualNumberOfCopyLayers;
 public:
    CopyLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        actualNumberOfCopyLayers = 0;
@ -40,7 +43,7 @@ class CopyLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_typ
    }
 };

-inline ::testing::Matcher<const intel_nnet_type_t*> HasCopyLayer(bool matchInserted = false, int matchQuantity = -1) {
+inline ::testing::Matcher<const gna_nnet_type_t*> HasCopyLayer(bool matchInserted = false, int matchQuantity = -1) {
    std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
    c->add(new CopyLayerMatcher(matchInserted, matchQuantity));
    return ::testing::MakeMatcher(c.release());
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/diag_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/diag_matcher.hpp
@ -3,17 +3,18 @@
 //

 #pragma once
-#include"gna-api.h"
+
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"

-class DiagLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class DiagLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    bool matchInserted;
    int  matchQuantity;
    mutable int  actualQuantity;
 public:
    DiagLayerMatcher(bool matchInserted, int matchQuantity) : matchInserted(matchInserted), matchQuantity(matchQuantity) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        actualQuantity = 0;
@ -21,7 +22,7 @@ public:
            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL) continue;
            // diagonal layer has to have 1 for weights and 0 for biases

-            auto diag = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+            auto diag = (gna_affine_func_t*)foo->pLayers[i].pLayerStruct;
            bool bWeightsOK = true;

            int beforePadding = 0;
@ -69,7 +70,7 @@ public:
    }
 };

-inline ::testing::Matcher<const intel_nnet_type_t*> HasDiagonalLayer(bool matchInserted = false, int matchQuantity = -1) {
+inline ::testing::Matcher<const gna_nnet_type_t*> HasDiagonalLayer(bool matchInserted = false, int matchQuantity = -1) {
    std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
    c->add(new DiagLayerMatcher(matchInserted, matchQuantity));
    return ::testing::MakeMatcher(c.release());
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/fill_with_data.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/fill_with_data.hpp
@ -4,8 +4,9 @@

 #pragma once

+#include "backend/gna_types.h"

-class OutputFiller : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class OutputFiller : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    mutable std::stringstream reason;
    int32_t fill32BValue;
    int16_t fill16BValue;
@ -14,7 +15,7 @@ class OutputFiller : public ::testing::MatcherInterface<const intel_nnet_type_t*
    OutputFiller(int32_t fill32BValue, int16_t fill16BValue) : fill32BValue(fill32BValue), fill16BValue(fill16BValue) {}


-    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
        if (foo == nullptr)
            return false;
        reason.str("");
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/input_data_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/input_data_matcher.hpp
@ -6,15 +6,16 @@
 #pragma once

 #include <gmock/gmock-matchers.h>
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"

-class InputDataMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t *> {
+class InputDataMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t *> {
    std::vector<int16_t> refInput;
 public:

    explicit InputDataMatcher(const std::vector<int16_t> &_refInput) : refInput(_refInput) {}

-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo->pLayers == nullptr) {
            *listener << "Address of the first layer descriptor is NULL";
            return false;
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/nnet_base_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/nnet_base_matcher.hpp
@ -3,23 +3,25 @@
 //

 #pragma once
+
+#include "backend/gna_types.h"
 #include "gna_lib_ver_selector.hpp"

-class NNetComponentMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
-    std::vector<std::shared_ptr<::testing::MatcherInterface<const intel_nnet_type_t*>>> matchers;
+class NNetComponentMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
+    std::vector<std::shared_ptr<::testing::MatcherInterface<const gna_nnet_type_t*>>> matchers;
    mutable int failIdx = -1;
    mutable std::stringstream reason;
    int bitness;
 public:
    NNetComponentMatcher(int bitness  = 16) : bitness(bitness) {}
-    NNetComponentMatcher& add(::testing::MatcherInterface<const intel_nnet_type_t*> * p) {
-        matchers.push_back(std::shared_ptr<::testing::MatcherInterface<const intel_nnet_type_t*>>(p));
+    NNetComponentMatcher& add(::testing::MatcherInterface<const gna_nnet_type_t*> * p) {
+        matchers.push_back(std::shared_ptr<::testing::MatcherInterface<const gna_nnet_type_t*>>(p));
        return *this;
    }
    bool empty() const {
        return matchers.empty();
    }
-    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
        if (foo == nullptr)
            return false;
        reason.str("");
@ -48,7 +50,7 @@ class NNetComponentMatcher : public ::testing::MatcherInterface<const intel_nnet
                    }
                    if (foo->pLayers[j].nLayerKind == INTEL_AFFINE ||
                        foo->pLayers[j].nLayerKind == INTEL_AFFINE_DIAGONAL) {
-                        auto pAffine = reinterpret_cast<intel_affine_func_t*>(foo->pLayers[j].pLayerStruct);
+                        auto pAffine = reinterpret_cast<gna_affine_func_t*>(foo->pLayers[j].pLayerStruct);

                        if (pAffine->pWeights == foo->pLayers[i].pOutputs) {
                            reason << "numberOfBytes per output int pLayers[" << i << "] should be " << (bitness/8) << ", but was "
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pool_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pool_matcher.hpp
@ -4,24 +4,24 @@

 #pragma once

-#include"gna-api.h"
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"

-class PoolingLayerMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class PoolingLayerMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    bool matchInserted;
    int matchQuantity;
    bool bMaxPool;
 public:
    PoolingLayerMatcher(bool matchInserted, int matchQuantity, bool bMaxPool)
        : matchInserted(matchInserted), matchQuantity(matchQuantity), bMaxPool(bMaxPool) {}
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        for(int i = 0; i < foo->nLayers; i++) {
            if (foo->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;

-            auto conv = (intel_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
+            auto conv = (gna_convolutional_layer_t*)foo->pLayers[i].pLayerStruct;
            if (conv->poolType != INTEL_MAX_POOLING) continue;

            return matchInserted;
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/precision_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/precision_matcher.hpp
@ -4,15 +4,16 @@

 #pragma once
 #include "nnet_base_matcher.hpp"
+#include "backend/gna_types.h"
 #include "gna_lib_ver_selector.hpp"

-class NNetPrecisionMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class NNetPrecisionMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    GnaPluginTestEnvironment::NnetPrecision nnetPrecision;
    intel_layer_kind_t layerKind = (intel_layer_kind_t)-1;
 public:
    explicit  NNetPrecisionMatcher(GnaPluginTestEnvironment::NnetPrecision nnetPrecision,
                                   intel_layer_kind_t layerKind = (intel_layer_kind_t)-1) : nnetPrecision(nnetPrecision), layerKind(layerKind) {}
-    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {

        auto ioPrecision = (foo->pLayers->nBytesPerInput == nnetPrecision.input_precision.size()) &&
            (foo->pLayers->nBytesPerOutput== nnetPrecision.output_precision.size());
@ -25,7 +26,7 @@ class NNetPrecisionMatcher : public ::testing::MatcherInterface<const intel_nnet
            }
            switch (layerKind) {
                case INTEL_AFFINE : {
-                    auto affine = (intel_affine_layer_t *) (foo->pLayers->pLayerStruct);
+                    auto affine = (gna_affine_layer_t *) (foo->pLayers->pLayerStruct);

                    return affine->affine.nBytesPerBias == nnetPrecision.biases_precision.size() &&
                        affine->affine.nBytesPerWeight == nnetPrecision.weights_precision.size();
@ -47,7 +48,7 @@ class NNetPrecisionMatcher : public ::testing::MatcherInterface<const intel_nnet
    }
 };

-inline ::testing::Matcher<const intel_nnet_type_t*> BitnessOfNNetEq(GnaPluginTestEnvironment::NnetPrecision nnetPrecision,
+inline ::testing::Matcher<const gna_nnet_type_t*> BitnessOfNNetEq(GnaPluginTestEnvironment::NnetPrecision nnetPrecision,
                                                         intel_layer_kind_t component) {
    std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
    c->add(new NNetPrecisionMatcher(nnetPrecision, component));
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_matcher.hpp
@ -10,7 +10,7 @@

 extern void PwlApply16(intel_dnn_component_t *component, uint32_t num_subset_size);

-class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class PWLMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    bool matchInserted;
    int matchQuantity;
    mutable int timesInserted = 0;
@ -25,7 +25,7 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
        : matchInserted(inserted), matchQuantity(matchQuantity), activationsToLookFor(particularActivations) {
    }

-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        timesInserted = 0;
@ -35,7 +35,7 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE &&
                foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL &&
                foo->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;
-            auto affine = reinterpret_cast<intel_affine_layer_t*>(foo->pLayers[i].pLayerStruct);
+            auto affine = reinterpret_cast<gna_affine_layer_t*>(foo->pLayers[i].pLayerStruct);
            if (affine == nullptr) continue;

            bool hasPwl = affine->pwl.nSegments != 0 && affine->pwl.pSegments != nullptr;
@ -73,7 +73,7 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>
        return timesInserted == 0;
    };

-    DnnActivationType detectPwlType(intel_nnet_layer_t *layer) const {
+    DnnActivationType detectPwlType(gna_nnet_layer_t *layer) const {

        intel_dnn_component_t comp;
        comp.ptr_outputs = layer->pOutputs;
@ -82,11 +82,11 @@ class PWLMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*>

        if (layer->nLayerKind == INTEL_AFFINE ||
            layer->nLayerKind == INTEL_AFFINE_DIAGONAL) {
-            auto pAffineLayer = reinterpret_cast<intel_affine_layer_t *>(layer->pLayerStruct);
+            auto pAffineLayer = reinterpret_cast<gna_affine_layer_t *>(layer->pLayerStruct);
            comp.op.pwl.num_segments = pAffineLayer->pwl.nSegments;
            comp.op.pwl.ptr_segments = pAffineLayer->pwl.pSegments;
        } else if (layer->nLayerKind == INTEL_CONVOLUTIONAL) {
-            auto pConvolutionalLayer = reinterpret_cast<intel_convolutional_layer_t *>(layer->pLayerStruct);
+            auto pConvolutionalLayer = reinterpret_cast<gna_convolutional_layer_t *>(layer->pLayerStruct);
            comp.op.pwl.num_segments = pConvolutionalLayer->pwl.nSegments;
            comp.op.pwl.ptr_segments = pConvolutionalLayer->pwl.pSegments;
        } else {
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
@ -10,10 +10,11 @@
 #include <iostream>

 #include <runtime/pwl.h>
+#include <backend/gna_types.h>

 #include "nnet_base_matcher.hpp"

-class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    const float rmse_threshold;
    const uint32_t activation_type;
    const uint16_t segment_threshold;
@ -23,7 +24,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
                                                            rmse_threshold(precision_threshold),
                                                            segment_threshold(segments) {}

-    bool MatchAndExplain(const intel_nnet_type_t *nnet, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *nnet, ::testing::MatchResultListener *listener) const override {
        float rmse = 0.f;
        const float test_arg_scale_factor = 16384;

@ -35,7 +36,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
                nnet->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL &&
                nnet->pLayers[i].nLayerKind != INTEL_CONVOLUTIONAL) continue;

-            auto affine = reinterpret_cast<intel_affine_layer_t*>(nnet->pLayers[i].pLayerStruct);
+            auto affine = reinterpret_cast<gna_affine_layer_t*>(nnet->pLayers[i].pLayerStruct);

            if (affine == nullptr ||
                affine->pwl.nSegments == 0 ||
@ -85,7 +86,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
            }

            std::vector<double> y_diviation(2*domain);
-            std::vector<intel_pwl_segment_t*> segments_vector(affine->pwl.nSegments);
+            std::vector<gna_pwl_segment_t*> segments_vector(affine->pwl.nSegments);
            std::iota(segments_vector.begin(), segments_vector.begin()+affine->pwl.nSegments,
                                                                                affine->pwl.pSegments);

@ -132,7 +133,7 @@ class PWLQuantizationMetricsMatcher : public ::testing::MatcherInterface<const i
    }
 };

-inline ::testing::Matcher<const intel_nnet_type_t*> PrecisionOfQuantizedPwlMetrics(uint32_t type,
+inline ::testing::Matcher<const gna_nnet_type_t*> PrecisionOfQuantizedPwlMetrics(uint32_t type,
                                                                                    float threshold,
                                                                                    uint16_t segments) {
    std::unique_ptr<NNetComponentMatcher> c (new NNetComponentMatcher());
--- a/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/matchers/weights_matcher.hpp
@ -3,7 +3,7 @@
 //

 #pragma once
-#include"gna-api.h"
+#include "backend/gna_types.h"
 #include "nnet_base_matcher.hpp"
 #include "frontend/quantization.h"

@ -72,7 +72,7 @@ class TranspozeIterator {
    }
 };

-class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class WeightsMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    enum HowMatch{
        eNone,
        eEq,
@ -92,7 +92,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
            eMatchKind = eEq;
        }
    }
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        iterator.reset();
@ -101,7 +101,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE &&
                foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL) continue;

-            auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+            auto affine = (gna_affine_func_t*)foo->pLayers[i].pLayerStruct;

            auto affineWeightsSize = foo->pLayers[i].nOutputRows *
                (foo->pLayers[i].nLayerKind == INTEL_AFFINE_DIAGONAL ? 1 : foo->pLayers[i].nInputRows);
@ -136,7 +136,7 @@ class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_
    }
 };

-class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class WeightsSizeMatcher : public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    enum HowMatch{
        eNone,
        eEqAffine,
@ -150,7 +150,7 @@ class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_t
        eMatchKind(eEqAffine),
        expected_weights_size(data_len){
    }
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;

@ -180,20 +180,20 @@ class WeightsSizeMatcher : public ::testing::MatcherInterface<const intel_nnet_t
 };


-class WeightsSaver: public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+class WeightsSaver: public ::testing::MatcherInterface<const gna_nnet_type_t*> {
    mutable TranspozeIterator iterator;
    std::vector<uint16_t>* weights;
 public:
    explicit WeightsSaver(TranspozedData data) :
        weights(std::get<0>(data)), iterator(data) {
    }
-    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+    bool MatchAndExplain(const gna_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
        if (foo == nullptr)
            return false;
        for(int i = 0; i < foo->nLayers; i++) {
            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE) continue;

-            auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+            auto affine = (gna_affine_func_t*)foo->pLayers[i].pLayerStruct;

            auto affineWeightsSize = foo->pLayers[i].nOutputRows * foo->pLayers[i].nInputRows;
            auto pWeights = reinterpret_cast<uint16_t *>(affine->pWeights);