[GNA] Plugin transition to the library v3.0 Master (#7386)

* GNA Plugin transition to the library v3.0 Master * [GNA] Enable 2D convolution decomposition on GNA 3.0 * [GNA] Fix cpplint * [GNA] Fix cpplint Co-authored-by: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
2021-09-21 10:59:35 +03:00 · 2021-09-21 10:59:35 +03:00 · 2349caa482
commit 2349caa482
parent d725a17561
22 changed files with 318 additions and 143 deletions
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@ -277,8 +277,8 @@ if(ENABLE_GNA)
            set(GNA_HASH "cc954e67525006bf8bd353a6682e38bf208f6d74e973e0fc292850e721f17452")
        endif()
        if(GNA_LIBRARY_VERSION STREQUAL "GNA2")
-            set(GNA_VERSION "02.00.00.1226")
-            set(GNA_HASH "d5450af15c993e264c25ac4591a7dab44722e10d15fca4f222a1b84429d4e5b6")
+            set(GNA_VERSION "03.00.00.1377")
+            set(GNA_HASH "d45fb48994d8c2803a16e88e29ae48851066325b97c1c6c4a5bf4f4573d55c65")
        endif()

        set(FILES_TO_EXTRACT_LIST gna_${GNA_VERSION}/include)
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@ -201,18 +201,6 @@ enum OvGnaType {
    OvGnaTypePwl = 8,
 };

-#if GNA_LIB_VER == 2
-enum OvGnaMode {
-    OvGnaModeDefault = 0,
-    OvGnaModeDisabled = -1
-};
-
-struct OvGnaTensor {
-    std::vector<uint32_t> dimensions;
-    OvGnaType type;
-    OvGnaMode mode;
-};
-
 template <class T>
 OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) {
    static const std::map<T, OvGnaType> m = {
@ -227,6 +215,18 @@ OvGnaType OvGnaTypeIntFromBytes(T bytesPerElement) {
    return r->second;
 }

+#if GNA_LIB_VER == 2
+enum OvGnaMode {
+    OvGnaModeDefault = 0,
+    OvGnaModeDisabled = -1
+};
+
+struct OvGnaTensor {
+    std::vector<uint32_t> dimensions;
+    OvGnaType type;
+    OvGnaMode mode;
+};
+
 inline std::string OvGnaTypeToString(OvGnaType type) {
    static const std::map<OvGnaType, std::string> typeToString = {
        {OvGnaTypeInt8, "OvGnaTypeInt8"},
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.cpp
@ -94,23 +94,36 @@ std::string VectorOrSquareLimitByChannelsAndPrecision::GetErrorOrEmpty(const uin
    return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
 }

-void Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
-    const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
-    const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const {
+bool Validator::ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
+    const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
+    const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
+    OvGnaType inPrecision, bool exception) const {
    const std::string prefix = "Layer Convolution2D: " + name + ":";
    auto error = inputHWLimit.GetErrorOrEmpty(inHeight, inWidth);

-    error += kernelNumberLimit.GetErrorOrEmpty(kN);
-
+    error += kernelNumberLimit.GetErrorOrEmpty(kernelN);
    error += inputChannelsNumberLimit.GetErrorOrEmpty(inChannels);
-    error += kernelLimit.GetErrorOrEmpty(kH, kW, inPrecision, inChannels, "kernel");
+    error += kernelLimit.GetErrorOrEmpty(kernelH, kernelW, inPrecision, inChannels, "kernel");
    error += strideLimit.GetErrorOrEmpty(strideH, strideW, inPrecision, inChannels, "convolution stride");
-    ThrowIfNotEmpty(prefix, error);
+
+    const RangeLimit kernelStrideHLimit{1, kernelH, "kernel stride height (must be up to kernel height)"};
+    const RangeLimit kernelStrideWLimit{1, kernelW, "kernel stride width (must be up to kernel width)"};
+
+    error += kernelStrideHLimit.GetErrorOrEmpty(strideH);
+    error += kernelStrideWLimit.GetErrorOrEmpty(strideW);
+
+    error += dilationLimit.GetErrorOrEmpty(dilationH, dilationW);
+
+    if (exception)
+        ThrowIfNotEmpty(prefix, error);
+
+    return error.empty() ? true : false;
 }

-void Validator::ValidatePooling2D(std::string name,
+bool Validator::ValidatePooling2D(std::string name,
    const uint32_t windowH, const uint32_t windowW,
-    const uint32_t strideH, const uint32_t strideW) const {
+    const uint32_t strideH, const uint32_t strideW,
+    bool exception) const {
    const std::string prefix = "Layer Pooling2D: " + name + ":";

    auto error = poolingWindowLimit.GetErrorOrEmpty(windowH, windowW, "pooling window");
@ -120,7 +133,10 @@ void Validator::ValidatePooling2D(std::string name,
    error += poolingStrideHLimit.GetErrorOrEmpty(strideH);
    error += poolingStrideWLimit.GetErrorOrEmpty(strideW);

-    ThrowIfNotEmpty(prefix, error);
+    if (exception)
+        ThrowIfNotEmpty(prefix, error);
+
+    return error.empty() ? true : false;
 }

 void Validator::ThrowIfNotEmpty(const std::string prefix, const std::string error) {
--- a/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
+++ b/inference-engine/src/gna_plugin/backend/gna_limitations.hpp
@ -16,6 +16,8 @@ constexpr uint32_t bufferMaxSize = 65528;

 constexpr uint32_t convMinFiltersNum = 4;
 constexpr uint32_t convMaxFiltersNum = 65532;
+constexpr uint32_t convDilationHeight = 1;
+constexpr uint32_t convDilationWidth = 1;
 constexpr uint32_t convFiltersNumDivider = 4;
 constexpr uint32_t convFilterSizeDivider = 8;
 constexpr uint32_t convFilterMaxSize = 768;
@ -97,19 +99,24 @@ class Validator {
        { 240, { 3, 7, 3 }, { 2, 7, 2 } },
        { 120, { 3, 7, 3 }, { 1, 7, 1 } } };
    VectorOrSquareLimitByChannelsAndPrecision& strideLimit = kernelLimit;
+    RangeLimit2D dilationLimit{ {convDilationHeight, convDilationHeight, "dilation height" },
+        { convDilationWidth, convDilationWidth, "dilation width" } };
    const VectorOrSquareLimit poolingWindowLimit{ 3, 1, 1 };

    static void ThrowIfNotEmpty(const std::string prefix, const std::string error);
+
 public:
    Validator() = default;

-    void ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
-        const uint32_t inChannels, const uint32_t kH, const uint32_t kW, const uint32_t kN,
-        const uint32_t strideH, const uint32_t strideW, OvGnaType inPrecision) const;
+    bool ValidateCnn2D(std::string name, const uint32_t inHeight, const uint32_t inWidth,
+        const uint32_t inChannels, const uint32_t kernelH, const uint32_t kernelW, const uint32_t kernelN,
+        const uint32_t strideH, const uint32_t strideW, const uint32_t dilationH, const uint32_t dilationW,
+        OvGnaType inPrecision, bool exception = true) const;

-    void ValidatePooling2D(std::string name,
+    bool ValidatePooling2D(std::string name,
        const uint32_t windowH, const uint32_t windowW,
-        const uint32_t strideH, const uint32_t strideW) const;
+        const uint32_t strideH, const uint32_t strideW,
+        bool exception = true) const;
 };
 } // namespace Cnn2D

--- a/inference-engine/src/gna_plugin/gna2_model_export_helper.cpp
+++ b/inference-engine/src/gna_plugin/gna2_model_export_helper.cpp
@ -9,19 +9,21 @@
 #include "gna2-model-export-api.h"
 #include "gna2-model-suecreek-header.h"
 #include "gna_api_wrapper.hpp"
+#include "gna2-device-api.h"

 #include <cstdint>
 #include <fstream>

 void * ExportSueLegacyUsingGnaApi2(
    uint32_t modelId,
+    uint32_t deviceIndex,
    Gna2ModelSueCreekHeader* modelHeader) {

    uint32_t exportConfig;
    auto status = Gna2ModelExportConfigCreate(gnaUserAllocatorAlignedPage, &exportConfig);
    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigCreate");

-    status = Gna2ModelExportConfigSetSource(exportConfig, 0, modelId);
+    status = Gna2ModelExportConfigSetSource(exportConfig, deviceIndex, modelId);
    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetSource");
    status = Gna2ModelExportConfigSetTarget(exportConfig, Gna2DeviceVersionEmbedded1_0);
    GNADeviceHelper::checkGna2Status(status, "Gna2ModelExportConfigSetTarget");
--- a/inference-engine/src/gna_plugin/gna2_model_export_helper.hpp
+++ b/inference-engine/src/gna_plugin/gna2_model_export_helper.hpp
@ -14,6 +14,7 @@

 void * ExportSueLegacyUsingGnaApi2(
    uint32_t modelId,
+    uint32_t deviceIndex,
    Gna2ModelSueCreekHeader* modelHeader);

 void ExportLdForDeviceVersion(
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@ -170,13 +170,9 @@ void GNADeviceHelper::releaseModel(const uint32_t model_id) {

 bool GNADeviceHelper::enforceLegacyCnnNeeded() const {
    const auto compileTargetDevice = getTargetDevice(false);
-    return isGnaLibVersion2_1 && isUpTo20HwGnaDevice(compileTargetDevice);
+    return (isGnaLibVersion3_0 || isGnaLibVersion2_1) && isUpTo20HwGnaDevice(compileTargetDevice);
 }

-namespace {
-    const volatile auto Gna2DeviceVersion3_0 = static_cast<Gna2DeviceVersion>(0x30);
-} // namespace
-
 Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const bool execTarget) const {
    auto parsed = Gna2DeviceVersion2_0;
    auto throwUnsupportedGnaTarget = [&](std::string extraSuffix) {
@ -184,7 +180,7 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const
        THROW_GNA_EXCEPTION << "Unsupported " << key << " = \"" << target << "\"" << extraSuffix;
    };
    if (target == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0) {
-        if (!isGnaLibVersion2_1)
+        if (!isGnaLibVersion2_1 && !isGnaLibVersion3_0)
            throwUnsupportedGnaTarget(", when GNA Library version is 2.0.X.Y");
        parsed = Gna2DeviceVersion3_0;
    } else if (target != InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
@ -195,7 +191,7 @@ Gna2DeviceVersion GNADeviceHelper::parseDeclaredTarget(std::string target, const

 Gna2DeviceVersion GNADeviceHelper::getDefaultTarget() const {
    if (detectedGnaDevVersion == Gna2DeviceVersionSoftwareEmulation)
-        return isGnaLibVersion2_1 ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
+        return (isGnaLibVersion3_0 ||  isGnaLibVersion2_1) ? Gna2DeviceVersion3_0 : Gna2DeviceVersion2_0;
    return detectedGnaDevVersion;
 }

@ -213,14 +209,6 @@ uint32_t GNADeviceHelper::createRequestConfig(const uint32_t model_id) {
    auto status = Gna2RequestConfigCreate(model_id, &reqConfId);
    checkGna2Status(status, "Gna2RequestConfigCreate");

-    // When the GNA_SW_EXACT mode is chosen inference results should be computed exactly the same way
-    // (bit exactly) as on the selected GNA execution target generation.
-    // See the GNA Plugin's GNA_EXEC_TARGET config option description.
-    if (swExactMode) {
-        const auto consistentDevice = getTargetDevice(true);
-        status = Gna2RequestConfigEnableHardwareConsistency(reqConfId, consistentDevice);
-        checkGna2Status(status, "Gna2RequestConfigEnableHardwareConsistency(" + std::to_string(static_cast<long>(consistentDevice)) + ")");
-    }
    status = Gna2InstrumentationConfigAssignToRequestConfig(instrumentationConfigId, reqConfId);
    checkGna2Status(status, "Gna2InstrumentationConfigAssignToRequestConfig");

@ -454,7 +442,7 @@ GNADeviceHelper::DumpResult GNADeviceHelper::dumpXnn(const uint32_t modelId) {
    checkStatus();
 #else
    r.model.reset(
-        ExportSueLegacyUsingGnaApi2(modelId, &r.header),
+        ExportSueLegacyUsingGnaApi2(modelId, nGnaDeviceIndex, &r.header),
        gnaUserFree);
 #endif

@ -473,7 +461,7 @@ void GNADeviceHelper::dumpXnnForDeviceVersion(
    const Gna2DeviceVersion targetDeviceVersion) {

    Gna2ModelSueCreekHeader sueHeader;
-    auto ptr = ExportSueLegacyUsingGnaApi2(modelId, &sueHeader);
+    auto ptr = ExportSueLegacyUsingGnaApi2(modelId, nGnaDeviceIndex, &sueHeader);
    gnaUserFree(ptr);

    ExportGnaDescriptorPartiallyFilled(sueHeader.NumberOfLayers, outStream);
@ -506,8 +494,15 @@ void GNADeviceHelper::open(uint8_t n_threads) {
 #else
    auto status = Gna2DeviceGetVersion(nGnaDeviceIndex, &detectedGnaDevVersion);
    checkGna2Status(status, "Gna2DeviceGetVersion");
-    status = Gna2DeviceOpen(nGnaDeviceIndex);
-    checkGna2Status(status, "Gna2DeviceOpen");
+
+    if (useDeviceEmbeddedExport) {
+        status = Gna2DeviceCreateForExport(exportGeneration, &nGnaDeviceIndex);
+        GNADeviceHelper::checkGna2Status(status, "Gna2DeviceCreateForExport");
+    } else {
+        status = Gna2DeviceOpen(nGnaDeviceIndex);
+        checkGna2Status(status, "Gna2DeviceOpen");
+    }
+
    // TODO: GNA2: uncomment when scratchpad repaired
    // status = Gna2DeviceSetNumberOfThreads(nGnaDeviceIndex, n_threads);
    // checkGna2Status(status);
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@ -55,7 +55,10 @@ class GNADeviceHelper {
    Gna2DeviceVersion detectedGnaDevVersion = Gna2DeviceVersionSoftwareEmulation;
    std::string executionTarget;
    std::string compileTarget;
+    bool useDeviceEmbeddedExport = false;
+    Gna2DeviceVersion exportGeneration = Gna2DeviceVersionEmbedded1_0;
    bool isGnaLibVersion2_1 = false;
+    bool isGnaLibVersion3_0 = false;

    static const uint32_t TotalGna2InstrumentationPoints = 2;
    Gna2InstrumentationPoint gna2InstrumentationPoints[TotalGna2InstrumentationPoints] = {
@ -82,12 +85,16 @@ public:
         bool swExactModeIn = false,
         uint8_t lib_async_n_threads = 1,
         bool use_openmp = false,
-         bool isPerformanceMeasuring = false) :
+         bool isPerformanceMeasuring = false,
+         bool deviceEmbedded = false,
+         int deviceVersionParsed = 0) :
         swExactMode(swExactModeIn),
         executionTarget(executionTargetIn),
         compileTarget(compileTargetIn),
         isPerformanceMeasuring(isPerformanceMeasuring),
-         nGnaDeviceIndex{selectGnaDevice()} {
+         nGnaDeviceIndex{selectGnaDevice()},
+         useDeviceEmbeddedExport(deviceEmbedded),
+         exportGeneration(static_cast<Gna2DeviceVersion>(deviceVersionParsed)) {
 #endif
        open(lib_async_n_threads);
        initGnaPerfCounters();
@ -98,6 +105,9 @@ public:
        if (gnaLibVersion.rfind("2.1", 0) == 0) {
            isGnaLibVersion2_1 = true;
        }
+        if (gnaLibVersion.rfind("3.0", 0) == 0) {
+            isGnaLibVersion3_0 = true;
+        }
 #endif

        if (use_openmp) {
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@ -580,7 +580,8 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP

    cnn2dValidator.ValidateCnn2D(layer->name,
        in_height, in_width, in_channels,
-        convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x, inputPrec);
+        convolution._kernel_y, convolution._kernel_x, filter_n, convolution._stride_y, convolution._stride_x,
+        convolution._dilation_y, convolution._dilation_x, inputPrec);

    float weight_scale_factor = getScaleFactor(layer, QuantizedDataType::weights);
    float output_scale_factor = getScaleFactor(layer, QuantizedDataType::output);
@ -2577,4 +2578,4 @@ GNAGraphCompiler::transposeMatrix(uint8_t* ptr_matrix, size_t element_size, uint
        }
    }
    return temp_buffer;
-}
+}
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@ -420,7 +420,9 @@ void GNAPlugin::InitGNADevice() {
                config.swExactMode,
                gnaFlags->gna_lib_async_threads_num,
                gnaFlags->gna_openmp_multithreading,
-                gnaFlags->performance_counting);
+                gnaFlags->performance_counting,
+                !config.dumpXNNPath.empty(),
+                GetDeviceVersionFromString(config.dumpXNNGeneration));
 #endif
    size_t page_size_bytes = 4096;
    gnamem = std::make_shared<gna_memory_type>(memory::make_polymorph<memory::GNAAllocator>(gnadevice), page_size_bytes);
@ -706,6 +708,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        InitGNADevice();
    }

+    std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
+    if (gnadevice) {
+        effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
+    }
+
    bool isNgraphPassesUsed = false;

    if (_network.getFunction()) {
@ -719,11 +726,9 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        manager.register_pass<ngraph::pass::LSTMCellDecomposition>();
        manager.register_pass<ConvertDWSCToScaleShifts>();
        manager.register_pass<ConvertPaddedToValidConv>();
-        if (config.gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_2_0) {
-            manager.register_pass<Decompose2DConvTransposedWithBiasAF>();
-            manager.register_pass<Decompose2DConvTransposedWithBias>();
-            manager.register_pass<Decompose2DConv>();
-        }
+        manager.register_pass<Decompose2DConvTransposedWithBiasAF>(effectiveGnaCompileTarget, config.gnaPrecision);
+        manager.register_pass<Decompose2DConvTransposedWithBias>(effectiveGnaCompileTarget, config.gnaPrecision);
+        manager.register_pass<Decompose2DConv>(effectiveGnaCompileTarget, config.gnaPrecision);
        // TODO enable this transformation for networks with convolutions
        if (!ngraph::op::util::has_op_with_type<ngraph::opset7::Convolution>(graph)) {
            manager.register_pass<ConvertMatmulWithFqToPointWiseConvolution>();
@ -1036,10 +1041,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
 #else
    nnets.emplace_back(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap());
 #endif
-    std::string effectiveGnaCompileTarget = config.gnaCompileTarget;
-    if (gnadevice) {
-        effectiveGnaCompileTarget = gnadevice->getEffectiveGnaCompileTarget();
-    }
+
    if (!gnaFlags->sw_fp32 && !graphCompiler.dnnComponents.components.empty()) {
        // number of layer gets calculated inside that InitGNAStruct function
 #if GNA_LIB_VER == 2
--- a/inference-engine/src/gna_plugin/transformations/convert_padded_to_valid_convolution.cpp
+++ b/inference-engine/src/gna_plugin/transformations/convert_padded_to_valid_convolution.cpp
@ -201,20 +201,25 @@ ConvertPaddedToValidConv::ConvertPaddedToValidConv() {
        ngraph::pattern::consumers_count(1));
    auto af1 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({conv}, ngraph::pattern::consumers_count(1));
    auto af2 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
    auto af3 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
    auto af4 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
+    auto af5 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool2}, ngraph::pattern::consumers_count(1));
-    auto fq_af = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af4, const_input, const_input, const_input, const_input},
+    auto fq_af1 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af3, const_input, const_input, const_input, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto fq_af2 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af5, const_input, const_input, const_input, const_input},
        ngraph::pattern::consumers_count(1));
    auto transpose_input =
-        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af});
+        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, af5, fq_af1, fq_af2});
    auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input},
        consumers_and_rank(1, 4));

--- a/inference-engine/src/gna_plugin/transformations/decompose_2d_convolution.cpp
+++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_convolution.cpp
@ -12,8 +12,8 @@
 #include <ngraph/pattern/op/or.hpp>
 #include <ngraph/rt_info.hpp>
 #include <ngraph/pass/manager.hpp>
-#include <ie_common.h>
 #include "utils/transformation_helper.hpp"
+#include <gna/gna_config.hpp>
 #include "backend/gna_limitations.hpp"
 #include "layers/gna_convolution_layer.hpp"

@ -71,10 +71,12 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
    auto pool_strides = max_pool->get_strides();

    // Check Max Pool padding and limitations
+    // Allow only Max Pool 1D (2D is currently not supported by this transformation)
    if ((max_pool->get_auto_pad() != ngraph::op::PadType::VALID &&
        (max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
            max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
        pool_filter.size() != 2 || pool_strides.size() != 2 ||
+        pool_filter[0] > 1 || pool_strides[0] > 1 ||
        pool_filter[0] > GNALimitations::maxPoolMaxWindowSize)
        return false;

@ -83,6 +85,25 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
    return true;
 }

+static bool GNA30SupportedConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision,
+    const GraphData& graph_data, const ConvData& conv_data) {
+    const GNALimitations::Cnn2D::Validator cnn2dValidator;
+
+    if (gnaCompileTarget == InferenceEngine::GNAConfigParams::GNA_TARGET_3_0 &&
+        cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
+            conv_data.input_height, conv_data.input_width, conv_data.input_channel_count,
+            conv_data.filter_height, conv_data.filter_width, conv_data.filter_channel_count,
+            conv_data.filter_stride_height, conv_data.filter_stride_width, conv_data.filter_dilation_height, conv_data.filter_dilation_width,
+            OvGnaTypeIntFromBytes(gnaPrecision.size()), false) &&
+        (!graph_data.max_pool || cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
+            graph_data.max_pool->get_kernel()[0], graph_data.max_pool->get_kernel()[1],
+            graph_data.max_pool->get_strides()[0], graph_data.max_pool->get_strides()[1],
+            false)))
+        return true;
+
+    return false;
+}
+
 static size_t CalculateConvCount(const ConvData& conv_data) {
    // Check if split of plane due to GNA HW limitations of 768 filter elements is possible
    size_t conv_count = 1;
@ -349,7 +370,7 @@ static std::shared_ptr<ngraph::Node> CreateDecomposedConv(const GraphData& graph
            // We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution
            // we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition
            size_t filter_dilation_width = conv_data.filter_width > 1 ? conv_data.filter_dilation_width : 1;
-            size_t output_width = (conv_data.input_width - (conv_data.filter_width + filter_dilation_width - 2));
+            size_t output_width = (conv_data.input_width - (filter_dilation_width * (conv_data.filter_width - 1)));

            if (conv_data.filter_width > 1) {
                for (size_t filter_width = 0; filter_width < conv_data.filter_width; filter_width++) {
@ -442,7 +463,9 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
    conv_result->set_friendly_name(conv_result_name);
 }

-static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
+static bool Convert(const std::string& gnaCompileTarget,
+    const InferenceEngine::Precision& gnaPrecision,
+    std::shared_ptr<ngraph::Node> leading_transpose,
    std::shared_ptr<ngraph::Node> fq_conv,
    std::shared_ptr<ngraph::Node> conv,
    std::shared_ptr<ngraph::Node> trailing_transpose,
@ -468,6 +491,13 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
    if (!VerifyAndGetConvData(std::dynamic_pointer_cast<ngraph::opset7::Convolution>(conv), conv_data))
        return false;

+    if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(max_pool)))
+        return false;
+
+    // If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
+    if (GNA30SupportedConv(gnaCompileTarget, gnaPrecision, graph_data, conv_data))
+        return false;
+
    // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
    // or similar cases, so required network must be in NHWC order like in TF
    if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(leading_transpose), {0, 3, 1, 2}))
@ -476,9 +506,6 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
    if (!TransposeOrderMatches(std::dynamic_pointer_cast<ngraph::opset7::Transpose>(trailing_transpose), {0, 2, 3, 1}))
        return false;

-    if (max_pool && !VerifyMaxPool(graph_data, std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(max_pool)))
-        return false;
-
    if (!ShouldDecompose(graph_data, conv_data))
        return false;

@ -488,7 +515,7 @@ static bool Convert(std::shared_ptr<ngraph::Node> leading_transpose,
    return true;
 }

-Decompose2DConv::Decompose2DConv() {
+Decompose2DConv::Decompose2DConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision) {
    MATCHER_SCOPE(Decompose2DConv);

    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -510,20 +537,25 @@ Decompose2DConv::Decompose2DConv() {
        ngraph::pattern::consumers_count(1));
    auto af1 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({conv}, ngraph::pattern::consumers_count(1));
    auto af2 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({bias}, ngraph::pattern::consumers_count(1));
    auto af3 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
-        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({fq_bias}, ngraph::pattern::consumers_count(1));
    auto af4 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
+        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
+        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool1}, ngraph::pattern::consumers_count(1));
+    auto af5 = ngraph::pattern::wrap_type<ngraph::opset7::Relu, ngraph::opset7::Sigmoid,
        ngraph::opset7::Tanh, ngraph::opset7::Abs, ngraph::opset7::Log, ngraph::opset7::Exp,
        ngraph::opset7::Sign, ngraph::opset7::Clamp>({max_pool2}, ngraph::pattern::consumers_count(1));
-    auto fq_af = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af4, const_input, const_input, const_input, const_input},
+    auto fq_af1 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af3, const_input, const_input, const_input, const_input},
+        ngraph::pattern::consumers_count(1));
+    auto fq_af2 = ngraph::pattern::wrap_type<ngraph::opset7::FakeQuantize>({af5, const_input, const_input, const_input, const_input},
        ngraph::pattern::consumers_count(1));
    auto transpose_input =
-        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af});
+        std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{conv, bias, max_pool1, max_pool2, fq_bias, af1, af2, af3, af4, fq_af1, fq_af2});
    auto trailing_transpose = ngraph::pattern::wrap_type<ngraph::opset7::Transpose>({transpose_input, const_input},
        consumers_and_rank(1, 4));

@ -540,8 +572,10 @@ Decompose2DConv::Decompose2DConv() {

        auto fq_bias_it = pattern_map.find(fq_bias);
        auto fq_bias_node = (fq_bias_it == std::end(pattern_map) ? nullptr : fq_bias_it->second.get_node_shared_ptr());
-        auto fq_af_it = pattern_map.find(fq_af);
-        auto fq_af_node = (fq_af_it == std::end(pattern_map) ? nullptr : fq_af_it->second.get_node_shared_ptr());
+        auto fq_af1_it = pattern_map.find(fq_af1);
+        auto fq_af2_it = pattern_map.find(fq_af2);
+        auto fq_af_node = (fq_af1_it == std::end(pattern_map) ?
+            ((fq_af2_it == std::end(pattern_map) ? nullptr : fq_af2_it->second.get_node_shared_ptr())) : fq_af1_it->second.get_node_shared_ptr());
        auto max_pool1_it = pattern_map.find(max_pool1);
        auto max_pool2_it = pattern_map.find(max_pool2);
        auto max_pool_node = (max_pool1_it == std::end(pattern_map) ?
@ -557,7 +591,8 @@ Decompose2DConv::Decompose2DConv() {
            }
        }

-        return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_conv_node, pattern_map.at(conv).get_node_shared_ptr(),
+        return Convert(gnaCompileTarget, gnaPrecision,
+            pattern_map.at(leading_transpose).get_node_shared_ptr(), fq_conv_node, pattern_map.at(conv).get_node_shared_ptr(),
            pattern_map.at(trailing_transpose).get_node_shared_ptr(), bias_node, bias_const_node, fq_bias_node, max_pool_node, af_node, fq_af_node,
            pattern_map.at(trailing_transpose).get_node_shared_ptr());
    };
@ -566,7 +601,7 @@ Decompose2DConv::Decompose2DConv() {
    this->register_matcher(m, callback);
 }

-Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
+Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision) {
    MATCHER_SCOPE(Decompose2DConvTransposedWithBias);

    auto const_input_i64 = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::type_matches(ngraph::element::i64));
@ -588,7 +623,8 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
        if (!(bias_const_node = VerifyBiasGetConst(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())))
            return false;

-        return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
+        return Convert(gnaCompileTarget, gnaPrecision,
+            pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
            pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), bias_const_node, nullptr, nullptr,
            nullptr, nullptr, pattern_map.at(bias).get_node_shared_ptr());
    };
@ -597,7 +633,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias() {
    this->register_matcher(m, callback);
 }

-Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() {
+Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision) {
    MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);

    auto const_input_i64 = ngraph::pattern::wrap_type<ngraph::opset7::Constant>(ngraph::pattern::type_matches(ngraph::element::i64));
@ -623,7 +659,8 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF() {
        if (!(bias_const_node = VerifyBiasGetConst(pattern_map.at(conv).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr())))
            return false;

-        return Convert(pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
+        return Convert(gnaCompileTarget, gnaPrecision,
+            pattern_map.at(leading_transpose).get_node_shared_ptr(), nullptr, pattern_map.at(conv).get_node_shared_ptr(),
            pattern_map.at(trailing_transpose).get_node_shared_ptr(), pattern_map.at(bias).get_node_shared_ptr(), bias_const_node, nullptr,
            nullptr, pattern_map.at(af).get_node_shared_ptr(), nullptr, pattern_map.at(af).get_node_shared_ptr());
    };
--- a/inference-engine/src/gna_plugin/transformations/decompose_2d_convolution.hpp
+++ b/inference-engine/src/gna_plugin/transformations/decompose_2d_convolution.hpp
@ -5,6 +5,7 @@
 #pragma once

 #include <ngraph/pass/graph_rewrite.hpp>
+#include <ie_precision.hpp>

 namespace GNAPluginNS {

@ -30,7 +31,7 @@ namespace GNAPluginNS {
 class Decompose2DConv : public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
-    Decompose2DConv();
+    Decompose2DConv(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision);
 };

 /**
@ -51,7 +52,7 @@ public:
 class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
-    Decompose2DConvTransposedWithBias();
+    Decompose2DConvTransposedWithBias(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision);
 };

 /**
@ -74,7 +75,7 @@ public:
 class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
 public:
    NGRAPH_RTTI_DECLARATION;
-    Decompose2DConvTransposedWithBiasAF();
+    Decompose2DConvTransposedWithBiasAF(const std::string& gnaCompileTarget, const InferenceEngine::Precision& gnaPrecision);
 };

 } // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp
+++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.cpp
@ -73,8 +73,8 @@ std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::No
        std::vector<int64_t>{1, 0});                                                                            // end mask
 }

-std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias) {
-    auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(bias->input_value(1).get_node_shared_ptr());
+std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> add) {
+    auto add_const = std::dynamic_pointer_cast<ngraph::opset7::Constant>(add->input_value(1).get_node_shared_ptr());

    // Check if it's really a bias and not just addition
    if (add_const) {
--- a/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp
+++ b/inference-engine/src/gna_plugin/transformations/utils/transformation_helper.hpp
@ -65,13 +65,13 @@ std::shared_ptr<ngraph::opset7::StridedSlice> FlatCrop(ngraph::Output<ngraph::No
 /**
 * @brief checks whether an add present after convolution is a bias and gets its const input
 * @param conv convolution layer preceding potential bias
- * @param bias potential bias layer passed from ngraph matcher
+ * @param add potential bias layer passed from ngraph matcher
 * @return bias const if the add layer present after convolution is a bias, nullptr otherwise
 */
-std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> bias);
+std::shared_ptr<ngraph::Node> VerifyBiasGetConst(std::shared_ptr<ngraph::Node> conv, std::shared_ptr<ngraph::Node> add);

 /**
- * @brief inserts a new fake quantize layer (if it exists) copied from an existing fake quantize layer and conncts it to the output of a given layer
+ * @brief inserts a new fake quantize layer copied from an existing one and connects it to the output of a given layer
 * @param fq_layer existing fake quantize layer to be copied
 * @param last_node the node to which output the new fake quantize layer will be connected
 * @return new fake quantize layer or the last node
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/convert_padded_to_valid_conv.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/convert_padded_to_valid_conv.cpp
@ -25,10 +25,11 @@ namespace LayerTestsDefinitions {
 enum class modelType {
    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
+    TranspConvActTransp,                /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
-    TranspConvTranspBcastAdd,           /* Transpose(NHWC->NCHW) => conv => Transpose(NCHW->NHWC) => Bias */
+    TranspConvTranspBcastAdd,           /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias */
    TranspConvTranspBcastAddAct         /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) => Bias => Activation Function */
 };

@ -142,6 +143,13 @@ protected:
        }
        break;

+        case modelType::TranspConvActTransp:
+        {
+            auto activation = std::make_shared<Relu>(conv);
+            lastOp = std::make_shared<Transpose>(activation, transposeOutOrder);
+        }
+        break;
+
        case modelType::TranspConvBcastAddMaxPoolTransp:
        {
            auto bcastAdd = std::make_shared<Add>(conv, biasConst);
@ -257,6 +265,7 @@ const std::vector<op::PadType> padTypes = {
 const std::vector<modelType> models = {
    modelType::TranspConvTransp,
    modelType::TranspConvBcastAddTransp,
+    modelType::TranspConvActTransp,
    modelType::TranspConvBcastAddActTransp,
    modelType::TranspConvTranspBcastAdd,
    modelType::TranspConvTranspBcastAddAct,
@ -277,8 +286,8 @@ const std::vector<std::vector<size_t >> maxpool1DPools = {{1, 2}};
 const std::vector<std::vector<size_t >> maxpool1DStrides = {{1, 1}};

 const std::vector<std::vector<size_t>> input2DNHWC = {{1, 16, 16, 32}};
-const std::vector<std::vector<size_t >> kernels2D = {{2, 2}, {4, 1}, {1, 3}};
-const std::vector<std::vector<size_t >> strides2D = {{1, 1}, {1, 2}, {2, 1}, {2, 2}};
+const std::vector<std::vector<size_t >> kernels2D = {{2, 2}, {4, 1}};
+const std::vector<std::vector<size_t >> strides2D = {{1, 1}, {2, 1}};
 const std::vector<std::vector<ptrdiff_t>> padBegins2D = {{1, 2}};
 const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{3, 1}};
 const std::vector<std::vector<size_t >> dilations2D = {{1, 1}};
--- a/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/decompose_2d_conv.cpp
@ -24,6 +24,7 @@ namespace LayerTestsDefinitions {
 enum class modelType {
    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
+    TranspConvActTransp,                /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
@ -141,13 +142,20 @@ protected:
        }
        break;

+        case modelType::TranspConvActTransp:
+        {
+            auto activation = std::make_shared<Relu>(conv);
+            lastOp = std::make_shared<Transpose>(activation, transposeOutOrder);
+        }
+        break;
+
        case modelType::TranspConvBcastAddMaxPoolTransp:
        {
            auto bcastAdd = std::make_shared<Add>(conv, biasConst);
            auto maxpool = std::make_shared<MaxPool>(bcastAdd, maxpoolStrides, Shape{0, 0}, Shape{0, 0}, maxpoolShape,
                op::RoundingType::FLOOR, op::PadType::VALID);
            auto transpose = std::make_shared<Transpose>(maxpool, transposeOutOrder);
-            auto lastOp = std::make_shared<Relu>(transpose);
+            lastOp = std::make_shared<Relu>(transpose);
        }
        break;

@ -221,6 +229,7 @@ const std::vector<op::PadType> padTypes = {
 const std::vector<modelType> models = {
    modelType::TranspConvTransp,
    modelType::TranspConvBcastAddTransp,
+    modelType::TranspConvActTransp,
    modelType::TranspConvBcastAddActTransp,
    modelType::TranspConvTranspBcastAdd,
    modelType::TranspConvTranspBcastAddAct,
@ -236,9 +245,9 @@ const std::vector<std::vector<ptrdiff_t>> padEnds2D = {{3, 1}};
 const std::vector<std::vector<size_t >> dilations2D = {{1, 1}, {1, 2}, {2, 1}, {2, 2}};
 const std::vector<size_t> numOutChannels2D = {4};
 const std::vector<std::vector<size_t >> biases2D = {{1, 4, 1, 1}};
-const std::vector<std::vector<size_t >> transp_biases2D = {{1, 1, 1, 4}};
-const std::vector<std::vector<size_t >> maxpool1D_pools = {{1, 2}};
-const std::vector<std::vector<size_t >> maxpool1D_strides = {{1, 1}};
+const std::vector<std::vector<size_t >> transpBiases2D = {{1, 1, 1, 4}};
+const std::vector<std::vector<size_t >> maxpool1DPools = {{1, 2}};
+const std::vector<std::vector<size_t >> maxpool1DStrides = {{1, 1}};

 const auto conv2DParams = ::testing::Combine(
    ::testing::ValuesIn(kernels2D),
@ -252,9 +261,9 @@ const auto conv2DParams = ::testing::Combine(

 const auto miscParams = ::testing::Combine(
    ::testing::ValuesIn(biases2D),
-    ::testing::ValuesIn(transp_biases2D),
-    ::testing::ValuesIn(maxpool1D_pools),
-    ::testing::ValuesIn(maxpool1D_strides)
+    ::testing::ValuesIn(transpBiases2D),
+    ::testing::ValuesIn(maxpool1DPools),
+    ::testing::ValuesIn(maxpool1DStrides)
 );

 INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConv, Decompose2DConvTest,
@ -313,4 +322,60 @@ INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConvStridesDilations, Decompose2DConvTe
        ::testing::ValuesIn(modelsStrides)),
    Decompose2DConvTest::getTestCaseName);

+/* ============= GNA 3.0 Supported Convolutions Combination ============= */
+
+const std::vector<std::map<std::string, std::string>> configsGNA30 = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"GNA_SCALE_FACTOR_0", "1"},
+        {"GNA_EXEC_TARGET", "GNA_TARGET_3_0"}
+    }
+};
+
+const std::vector<op::PadType> padTypesGNA30 = {
+    op::PadType::VALID,
+};
+
+const std::vector<modelType> modelsGNA30 = {
+    modelType::TranspConvBcastAddMaxPoolTransp,
+};
+
+const std::vector<std::vector<size_t>> input2DNHWCGNA30 = {{1, 16, 16, 32}};
+const std::vector<std::vector<size_t >> kernels2DGNA30 = {{1, 2}, {1, 4}};
+const std::vector<std::vector<size_t >> strides2DGNA30 = {{1, 1}};
+const std::vector<std::vector<size_t >> dilations2DGNA30 = {{1, 1}, {1, 2}};
+const std::vector<size_t> numOutChannels2DGNA30 = {8};
+const std::vector<std::vector<size_t >> biases2DGNA30 = {{1, 8, 1, 1}};
+const std::vector<std::vector<size_t >> transpBiases2DGNA30 = {{1, 1, 1, 8}};
+const std::vector<std::vector<size_t >> maxpool2DPoolsGNA30 = {{1, 1}, {1, 2}};
+const std::vector<std::vector<size_t >> maxpoo2DStridesGNA30 = {{1, 1}};
+
+const auto conv2DParamsGNA30 = ::testing::Combine(
+    ::testing::ValuesIn(kernels2DGNA30),
+    ::testing::ValuesIn(strides2DGNA30),
+    ::testing::ValuesIn(padBegins2D),
+    ::testing::ValuesIn(padEnds2D),
+    ::testing::ValuesIn(dilations2DGNA30),
+    ::testing::ValuesIn(numOutChannels2DGNA30),
+    ::testing::ValuesIn(padTypesGNA30)
+);
+
+const auto miscParamsGNA30 = ::testing::Combine(
+    ::testing::ValuesIn(biases2DGNA30),
+    ::testing::ValuesIn(transpBiases2DGNA30),
+    ::testing::ValuesIn(maxpool2DPoolsGNA30),
+    ::testing::ValuesIn(maxpoo2DStridesGNA30)
+);
+
+INSTANTIATE_TEST_CASE_P(smoke_Decompose2DConvGNA30, Decompose2DConvTest,
+    ::testing::Combine(
+        conv2DParamsGNA30,
+        miscParamsGNA30,
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configsGNA30),
+        ::testing::ValuesIn(input2DNHWCGNA30),
+        ::testing::ValuesIn(modelsGNA30)),
+    Decompose2DConvTest::getTestCaseName);
+
 } // namespace LayerTestsDefinitions
--- a/inference-engine/tests/unit/gna/gna_api_stub.cpp
+++ b/inference-engine/tests/unit/gna/gna_api_stub.cpp
@ -45,6 +45,12 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
    return Gna2StatusSuccess;
 }

+GNA2_API enum Gna2Status Gna2DeviceCreateForExport(
+    Gna2DeviceVersion targetDeviceVersion,
+    uint32_t * deviceIndex) {
+    *deviceIndex = 1;
+    return Gna2StatusSuccess;
+}
 GNA2_API enum Gna2Status Gna2DeviceOpen(
    uint32_t deviceIndex) {
    return Gna2StatusSuccess;
@ -109,12 +115,6 @@ GNA2_API enum Gna2Status Gna2RequestConfigEnableActiveList(
    return Gna2StatusSuccess;
 }

-GNA2_API enum Gna2Status Gna2RequestConfigEnableHardwareConsistency(
-    uint32_t requestConfigId,
-    enum Gna2DeviceVersion deviceVersion) {
-    return Gna2StatusSuccess;
-}
-
 GNA2_API enum Gna2Status Gna2RequestConfigSetAccelerationMode(
    uint32_t requestConfigId,
    enum Gna2AccelerationMode accelerationMode) {
--- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_convert_padded_to_valid_convolution.cpp
+++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_convert_padded_to_valid_convolution.cpp
@ -20,6 +20,7 @@ namespace {
 enum class modelType {
    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
+    TranspConvActTransp,                /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
@ -66,7 +67,7 @@ void GetConvParams(std::shared_ptr<ngraph::opset7::Convolution> conv, ConvData&
    conv_data.pads_end_width = conv->get_pads_end()[1];
 }

-std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(ngraph::Output<ngraph::Node>& in_node) {
+std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(std::shared_ptr<ngraph::Node>& in_node) {
    auto input_low = ngraph::opset7::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {1});
    auto input_high = ngraph::opset7::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {5});
    auto output_low = ngraph::opset7::Constant::create(ngraph::element::f32, ngraph::Shape{1}, {0});
@ -76,7 +77,7 @@ std::shared_ptr<ngraph::opset7::FakeQuantize> createFQ(ngraph::Output<ngraph::No

 ngraph::Output<ngraph::Node> createBiasFQ(const ngraph::Output<ngraph::Node>& in_node,
    std::shared_ptr<ngraph::opset7::Constant>& bias_const, const bool& fq) {
-    ngraph::Output<ngraph::Node> bcast_add = std::make_shared<ngraph::opset7::Add>(in_node, bias_const);
+    std::shared_ptr<ngraph::Node> bcast_add = std::make_shared<ngraph::opset7::Add>(in_node, bias_const);

    if (fq) {
        bcast_add = createFQ(bcast_add);
@ -100,7 +101,7 @@ std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
    ConvData* conv_data) {
    auto transpose_in_order = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64, ngraph::Shape{4}, std::vector<int64_t>{0, 3, 1, 2});
    auto transpose_in = std::make_shared<ngraph::opset7::Transpose>(input_node, transpose_in_order);
-    ngraph::Output<ngraph::Node> filters = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64,
+    std::shared_ptr<ngraph::Node> filters = std::make_shared<ngraph::opset7::Constant>(ngraph::element::i64,
        ngraph::Shape{4, input_node.get_shape()[3], filters_shape[0], filters_shape[1]});

    if (fq) {
@ -123,6 +124,19 @@ std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
    }
    break;

+    case modelType::TranspConvActTransp:
+    {
+        auto bcast_add = createBiasFQ(conv, bias_const, fq);
+        std::shared_ptr<ngraph::Node> activation = std::make_shared<ngraph::opset7::Relu>(bcast_add);
+
+        if (fq) {
+            activation = createFQ(activation);
+        }
+
+        last_op = std::make_shared<ngraph::opset7::Transpose>(activation, transpose_out_order);
+    }
+    break;
+
    case modelType::TranspConvBcastAddMaxPoolTransp:
    {
        auto bcast_add = createBiasFQ(conv, bias_const, fq);
@ -428,6 +442,9 @@ INSTANTIATE_TEST_SUITE_P(ConvertPaddedToValidConvTestSuite, ConvertPaddedToValid
            std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{1, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
                ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
+            std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{1, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
+                ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
            std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
                ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
@ -459,6 +476,9 @@ INSTANTIATE_TEST_SUITE_P(ConvertPaddedToValidConvInvalidTestSuite, ConvertPadded
            std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{2, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
                ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
+            std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{2, 1, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
+                ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}, ngraph::op::PadType::EXPLICIT),
            std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{2, 16, 16, 8}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::CoordinateDiff{0, 2}, ngraph::CoordinateDiff{0, 3}, ngraph::Strides{1, 1},
                ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{5, 1}, ngraph::op::PadType::EXPLICIT),
--- a/inference-engine/tests/unit/gna/ngraph/transformations/gna_decompose_2d_convolution.cpp
+++ b/inference-engine/tests/unit/gna/ngraph/transformations/gna_decompose_2d_convolution.cpp
@ -22,6 +22,7 @@ namespace {
 enum class modelType {
    TranspConvTransp = 0,               /* Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC) */
    TranspConvBcastAddTransp,           /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Transpose(NCHW->NHWC) */
+    TranspConvActTransp,                /* Transpose(NHWC->NCHW) => Conv => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolTransp,    /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPooling => Transpose(NCHW->NHWC) (2D Max Pool case) */
    TranspConvBcastAddActTransp,        /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => Activation Function => Transpose(NCHW->NHWC) */
    TranspConvBcastAddMaxPoolActTransp, /* Transpose(NHWC->NCHW) => Conv => Broadcasted Add (Bias) => MaxPool => Activation Function => Transpose(NCHW->NHWC) */
@ -157,6 +158,19 @@ std::shared_ptr<ngraph::opset7::Result> createFunction(const bool& fq,
    }
    break;

+    case modelType::TranspConvActTransp:
+    {
+        fq_bias = createBiasFQ(conv, bias_const, bias, fq);
+        std::shared_ptr<ngraph::Node> activation = std::make_shared<ngraph::opset7::Relu>(fq_bias);
+
+        if (fq) {
+            activation = createFQ(activation);
+        }
+
+        last_op = std::make_shared<ngraph::opset7::Transpose>(activation, transpose_out_order);
+    }
+    break;
+
    case modelType::TranspConvBcastAddMaxPoolTransp:
    {
        fq_bias = createBiasFQ(conv, bias_const, bias, fq);
@ -555,7 +569,7 @@ std::shared_ptr<ngraph::Node> CreateDeomposedConv(const GraphData& graph_data, C
            // We need to calculate some parameters in case horizontal stride > 1 is used, because if we use the ones available from the original convolution
            // we won't take into account the fact horizontal strides will be supported by the newly created 1D convolution, and not by decomposition
            size_t filter_dilation_width = conv_params.filter_width > 1 ? conv_params.filter_dilation_width : 1;
-            size_t output_width = (conv_params.input_width - (conv_params.filter_width + filter_dilation_width - 2));
+            size_t output_width = (conv_params.input_width - (filter_dilation_width * (conv_params.filter_width - 1)));

            if (conv_params.filter_width > 1) {
                for (size_t filter_width = 0; filter_width < conv_params.filter_width; filter_width++) {
@ -695,21 +709,23 @@ std::shared_ptr<ngraph::Function> Decompose2DConvTestFixture::get_reference(cons
 void execute_test(modelType model, std::shared_ptr<ngraph::Function> function, std::shared_ptr<ngraph::Function> reference_function) {
    ngraph::pass::Manager manager;
    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    InferenceEngine::Precision gnaPrecision = InferenceEngine::Precision::I16;

    switch (model) {
    default:
    case modelType::TranspConvTransp:
    case modelType::TranspConvBcastAddTransp:
+    case modelType::TranspConvActTransp:
    case modelType::TranspConvBcastAddMaxPoolTransp:
    case modelType::TranspConvBcastAddActTransp:
    case modelType::TranspConvBcastAddMaxPoolActTransp:
-        manager.register_pass<GNAPluginNS::Decompose2DConv>();
+        manager.register_pass<GNAPluginNS::Decompose2DConv>("", gnaPrecision);
        break;
    case modelType::TranspConvTranspBcastAdd:
-        manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBias>();
+        manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBias>("", gnaPrecision);
        break;
    case modelType::TranspConvTranspBcastAddAct:
-        manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBiasAF>();
+        manager.register_pass<GNAPluginNS::Decompose2DConvTransposedWithBiasAF>("", gnaPrecision);
        break;
    }

@ -732,6 +748,8 @@ INSTANTIATE_TEST_SUITE_P(Decompose2DConvTestSuite, Decompose2DConvTestFixture,
                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
            std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
+            std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
            std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 1}),
            std::make_tuple(modelType::TranspConvBcastAddActTransp, ngraph::PartialShape{1, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
@ -756,6 +774,8 @@ INSTANTIATE_TEST_SUITE_P(Decompose2DConvInvalidTestSuite, Decompose2DConvTestInv
                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
            std::make_tuple(modelType::TranspConvBcastAddTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
+            std::make_tuple(modelType::TranspConvActTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
+                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{1, 2}),
            std::make_tuple(modelType::TranspConvBcastAddMaxPoolTransp, ngraph::PartialShape{1, 16, 16, 128}, ngraph::Shape{5, 5}, ngraph::Strides{1, 1},
                ngraph::Strides{1, 1}, ngraph::Shape{1, 4, 1, 1}, ngraph::Strides{1, 1}, ngraph::Shape{2, 2}),
            std::make_tuple(modelType::TranspConvBcastAddActTransp, ngraph::PartialShape{2, 4, 4, 32}, ngraph::Shape{1, 2}, ngraph::Strides{1, 1},
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
@ -44,6 +44,13 @@ GNA2_API enum Gna2Status Gna2MemoryAlloc(
    return Gna2StatusSuccess;
 }

+GNA2_API enum Gna2Status Gna2DeviceCreateForExport(
+    Gna2DeviceVersion targetDeviceVersion,
+    uint32_t * deviceIndex) {
+    *deviceIndex = 1;
+    return Gna2StatusSuccess;
+}
+
 GNA2_API enum Gna2Status Gna2DeviceOpen(
    uint32_t deviceIndex) {
    if (current != nullptr) {
@ -139,15 +146,6 @@ GNA2_API enum Gna2Status Gna2RequestConfigEnableActiveList(
    return Gna2StatusSuccess;
 }

-GNA2_API enum Gna2Status Gna2RequestConfigEnableHardwareConsistency(
-    uint32_t requestConfigId,
-    enum Gna2DeviceVersion deviceVersion) {
-    if (current != nullptr) {
-        return current->Gna2RequestConfigEnableHardwareConsistency(requestConfigId, deviceVersion);
-    }
-    return Gna2StatusSuccess;
-}
-
 GNA2_API enum Gna2Status Gna2RequestConfigSetAccelerationMode(
    uint32_t requestConfigId,
    enum Gna2AccelerationMode accelerationMode) {
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@ -66,7 +66,7 @@ public:
    }
 };
 #if GNA_LIB_VER == 2
-void expect_enqueue_calls(GNACppApi &mockApi, bool enableHardwareConsistency = true){
+void expect_enqueue_calls(GNACppApi &mockApi){
    EXPECT_CALL(mockApi, Gna2ModelCreate(_,_,_)).Times(AtLeast(1)).WillRepeatedly(Invoke([](
        uint32_t deviceIndex,
        struct Gna2Model const * model,
@ -82,10 +82,6 @@ void expect_enqueue_calls(GNACppApi &mockApi, bool enableHardwareConsistency = t
            return Gna2StatusSuccess;
        }));

-    if (enableHardwareConsistency) {
-        EXPECT_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
-    }
-
    EXPECT_CALL(mockApi, Gna2RequestConfigSetAccelerationMode(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));

    EXPECT_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
@ -352,11 +348,7 @@ void GNAPropagateMatcher :: match() {
                        EXPECT_CALL(mockApi, GNAPropagateForward(_, _, _, _, _, Eq(_env.proc_type)))
                            .WillOnce(Return(GNA_NOERROR));
 #elif GNA_LIB_VER == 2
-                        if(_env.proc_type == (GNA_SOFTWARE & GNA_HARDWARE)) {
-                            expect_enqueue_calls(mockApi);
-                        } else {
-                            expect_enqueue_calls(mockApi, false);
-                        }
+                        expect_enqueue_calls(mockApi);
 #endif
                        break;
                    case GnaPluginTestEnvironment::matchPwlInserted :
@ -574,8 +566,6 @@ void GNAPluginAOTMatcher :: match() {
            return Gna2StatusSuccess;
        }));

-    EXPECT_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
-
    EXPECT_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
 #else
 #error "Not supported GNA_LIB_VER"
@ -703,8 +693,6 @@ void GNADumpXNNMatcher::match() {

        ON_CALL(mockApi, Gna2RequestConfigSetAccelerationMode(_,_)).WillByDefault(Return(Gna2StatusSuccess));

-        ON_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).WillByDefault(Return(Gna2StatusSuccess));
-
        ON_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).WillByDefault(Return(Gna2StatusSuccess));
    }
 #else
@ -808,8 +796,6 @@ void GNAQueryStateMatcher :: match() {
            return Gna2StatusSuccess;
        }));

-    EXPECT_CALL(mockApi, Gna2RequestConfigEnableHardwareConsistency(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
-
    EXPECT_CALL(mockApi, Gna2InstrumentationConfigAssignToRequestConfig(_,_)).Times(AtLeast(1)).WillRepeatedly(Return(Gna2StatusSuccess));
 #endif
    IE_SUPPRESS_DEPRECATED_START