[GNA]: Cleanup policy (#6216)

2021-06-23 11:20:20 +03:00
parent afe033a39a
commit 3307b1f17c
10 changed files with 39 additions and 333 deletions
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -34,7 +34,6 @@
 #include "layers/gna_crop_layer.hpp"
 #include "layers/gna_fake_quantize_layer.hpp"
 #include "round_float_define.hpp"
-#include "gna_plugin_policy.hpp"
 #include "gna_groups.hpp"
 #include "backend/gna_limitations.hpp"

@@ -62,10 +61,6 @@ void GNAGraphCompiler::setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gna
    this->gnaFlags = std::move(gnaFlagsPtr);
 }

-void GNAGraphCompiler::setPolicy(GNAPluginNS::Policy policyToSet) {
-    this->policy = policyToSet;
-}
-
 intel_dnn_component_t * GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
    if (current->insData.empty())
        return nullptr;
@@ -547,10 +542,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    auto effectiveInputWidth = in_width;
    auto effectiveInputHeight = in_height;

-    if (policy.cnn2dInputPaddingSupported) {
-        effectiveInputWidth += convolution._padding_x * 2;
-        effectiveInputHeight += convolution._padding_y * 2;
-    } else if (convolution._padding_x != 0 || convolution._padding_y != 0 ||
+    if (convolution._padding_x != 0 || convolution._padding_y != 0 ||
        convolution._pads_end.at(X_AXIS) != 0 || convolution._pads_end.at(Y_AXIS) != 0) {
        THROW_GNA_LAYER_EXCEPTION(layer) << "Convolution's input padding is not supported";
    }
@@ -1641,7 +1633,7 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
    uint32_t num_rows_copied = 0;
    // in case of left alignment succeed, but due to number of elements not multiple of 8 we need to insert align_filter
    // we are improving it by inserting copy layer of size that covers most of elements - remained max of 32x31 affine filter
-    if (policy.ConcatAlignmentPolicy == Policy::ConcatAlignment::FAST &&  0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
+    if (0 == numRowsPadded && ALIGN(num_rows_in, 32) > 32) {
        // can we use copy at all
        num_rows_copied = ALIGN(num_rows_in, 32) - 32;

--- a/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.hpp
@@ -27,7 +27,6 @@
 #include "backend/gna_limitations.hpp"
 #include "gna_device.hpp"
 #include "gna_data_types.hpp"
-#include "gna_plugin_policy.hpp"

 namespace GNAPluginNS {
 class GNAGraphCompiler {
@@ -36,7 +35,6 @@ private:
    std::shared_ptr<GNAPluginNS::gna_memory_type> gnamem;
    std::shared_ptr<GNAPluginNS::InputDesc> inputDesc;
    std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlags;
-    Policy policy;

    // layers with extra storage for connections and additional
    // non trivial processing
@@ -64,7 +62,6 @@ public:
    void setDNNPtr(std::shared_ptr<GNAPluginNS::backend::AMIntelDNN> dnnPtr);
    void setInputDescPtr(std::shared_ptr<GNAPluginNS::InputDesc> inputDescPtr);
    void setGNAFlagsPtr(std::shared_ptr<GNAPluginNS::GNAFlags> gnaFlagsPtr);
-    void setPolicy(GNAPluginNS::Policy policy);

    void fillMemoryConnections(std::unordered_map<std::string,
            std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -738,7 +738,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
    // network optimisation phases
    int passIdx = 0;
    auto run_passes = [&] (const CNNNetwork& network, bool runBeforeCopy, bool lowPrecision) {
-        auto passes = make_shared<PassManager>(PassManagerSettings{policy, runBeforeCopy, lowPrecision}, network);
+        auto passes = make_shared<PassManager>(PassManagerSettings{runBeforeCopy, lowPrecision}, network);
        passes->registerPass<RemoveConstPass>();
        passes->registerPass<UnrollTIPass>();
        passes->registerPass<RemoveConstPass>();
@@ -765,13 +765,7 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
        passes->registerPass<FlattenTrivialConcatPass>();
        passes->registerPass<InsertConcatAligningFilterPass>();
        passes->registerPass<ReorderConcatInputsPass>();
-        if (policy.PermutePolicy != Policy::Permute::DISABLED) {
-            passes->registerPass<ReversePermutationsPass>();
-        }
-        if (policy.NHWCToNCHWPolicy != Policy::NHWCToNCHW::DISABLED) {
-            passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
-        }
-
+        passes->registerPass<RemovePermutationsNHWCToNCHWPass>();
        passes->registerPass<InsertIdentityLayerPass>();
        passes->registerPass<BreakFusingOfOutputLayersPass>();
        passes->registerPass<InsertDiagonalLayerPass>();
@@ -840,9 +834,6 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {

    auto sortedNet = CNNNetSortTopologicallyEx(newNet, make_fuzed_order);

-    // passing policy to compiler
-    graphCompiler.setPolicy(policy);
-
    if (sortedNet.empty()) {
        THROW_GNA_EXCEPTION << "Sorted network is empty";
    }
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -21,7 +21,6 @@
 #include "backend/am_intel_dnn.hpp"
 #include "gna_data_types.hpp"
 #include "gna_graph_compiler.hpp"
-#include "gna_plugin_policy.hpp"
 #include "gna_plugin_log.hpp"
 #include "gna_plugin_config.hpp"
 #include <legacy/ie_util_internal.hpp>
@@ -69,8 +68,6 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {

    intel_dnn_number_type_t output_type = kDnnInt;

-    GNAPluginNS::Policy policy;
-
 #if GNA_LIB_VER == 2
    void createRequestConfigsForGnaModels();
 #endif
@@ -158,11 +155,6 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
    INFERENCE_ENGINE_DEPRECATED("Use InferRequest::QueryState instead")
    std::vector<InferenceEngine::IVariableStateInternal::Ptr>  QueryState();

-     /**
-      * test-wise API
-      */
-     void SetPolicy(GNAPluginNS::Policy p) {policy = p;}
-
     /**
      * QueryMetrics API
      */
--- a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
@@ -1,96 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ostream>
-namespace GNAPluginNS {
-/**
- * @brief policy agregates various settings that cannot be tweak using configuration options right now,
- * and essential to keep test coverage for options both in on and off cases
- */
-class Policy {
- public:
-    /**
-    * @brief for scaleshift substitution, weight tiling simplify final graph but have extra weights overhead
-    * if not defined scaleshift broadcast will result in creating multiple diagonal layers instead of weight tiling
-    */
-    enum class ScaleShift {
-        WEIGHTS_TILING,
-        /**
-         * GNA has limited amount of batch so even existed topologies cannot be substituted with only batching,
-         * this option combines batch and weights tiling
-         */
-        BATCH_AND_WEIGHTS_TILING,
-        DIAGLAYER_TILING
-    } ScaleShiftPolicy = ScaleShift::WEIGHTS_TILING;
-
-    /**
-     * Policy on whether to substitute permute layers or not
-     */
-    enum class Permute {
-        DISABLED,
-        AUTO_PERMUTE
-    } PermutePolicy = Permute::DISABLED;
-
-    enum class FlattenTrivialConcatConversion {
-        DISABLED,
-        ENABLED
-    } ConcatConversionPolicy = FlattenTrivialConcatConversion::ENABLED;
-
-    enum class ConcatAlignment {
-        DISABLED,
-        DISABLED_FOR_FP32,
-        ENABLED,
-        FAST
-    } ConcatAlignmentPolicy = ConcatAlignment::FAST;
-
-    /**
-    * Policy to support --disable_nhwc_to_nchw option in MO
-    */
-    enum class NHWCToNCHW {
-        DISABLED,
-        REMOVE_LAST,
-        REMOVE_ALL
-    } NHWCToNCHWPolicy = NHWCToNCHW::REMOVE_ALL;
-
- /**
- * @brief trim of gna diagonal affine layer maximum elements number
- */
-    class GNAAffineDiagonal {
-    public:
-        enum : uint32_t {
-            UNLIMIT,
-            // gna limit this to be OxFFFF
-            LIMITED_TO_DEFAULT_GNA2_65536 = 65536 - 64
-        };
-        uint32_t limitedTo = LIMITED_TO_DEFAULT_GNA2_65536;
-    } GNAAffineDiagonalPolicy;
-
-    bool cnn2dInputPaddingSupported = false;
-};
-
-inline std::ostream& operator<<(std::ostream& os, Policy::ScaleShift policy) {
-    switch (policy) {
-        case Policy::ScaleShift::WEIGHTS_TILING   : os << "WEIGHTS_TILING";    break;
-        case Policy::ScaleShift::BATCH_AND_WEIGHTS_TILING: os << "BATCH_AND_WEIGHTS_TILING"; break;
-        case Policy::ScaleShift::DIAGLAYER_TILING : os << "DIAGLAYER_TILING";  break;
-        default    : os.setstate(std::ios_base::failbit);
-    }
-    return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os, Policy::ConcatAlignment policy) {
-    switch (policy) {
-        case Policy::ConcatAlignment::DISABLED   : os << "DISABLED";    break;
-        case Policy::ConcatAlignment::DISABLED_FOR_FP32   : os << "DISABLED_FOR_FP32";    break;
-        case Policy::ConcatAlignment::ENABLED   : os << "ENABLED";    break;
-        case Policy::ConcatAlignment::FAST   : os << "FAST";    break;
-        default    : os.setstate(std::ios_base::failbit);
-    }
-    return os;
-}
-
-
-}  // namespace GNAPluginNS
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -2,7 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //

-#include "gna_plugin_policy.hpp"
 #include <vector>
 #include <string>
 #include <memory>
@@ -613,99 +612,6 @@ void SubstitutePReluPass::run() {
    }
 }

-void ReversePermutationsPass::run() {
-    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReversePermutationsPass");
-    std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
-        = [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
-        if (CNNNetHasPrevLayer(layer.get())) {
-            return nullptr;
-        }
-        auto prev = CNNNetPrevLayer(layer);
-
-        if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
-
-        return prev;
-    };
-
-    std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
-        if (layer->outData.empty()) {
-            return nullptr;
-        }
-        if (getInputTo(layer->outData.front()).size() != 1) {
-            return nullptr;
-        }
-        auto next = getInputTo(layer->outData.front()).begin()->second;
-
-        if (LayerInfo(next).isNonFunctional()) return nextLayerSkipReshape(next);
-
-        return next;
-    };
-
-    auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
-        return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
-            return
-                LayerInfo(l2).isNonFunctional() ||
-                LayerInfo(l2).isPooling() ||
-                LayerInfo(l2).isActivation();
-        });
-    };
-
-    std::unordered_set<std::string> affineWithPermutedWeights;
-    std::list<CNNLayerPtr> permutationstoRemove;
-
-    for (auto & l : *pLayers) {
-        if (!LayerInfo(l).isPermute()) {
-            continue;
-        }
-
-        auto layerOrder = l->GetParamAsInts("order");
-
-        if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
-            THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
-                               ", but support order is 0,3,2,1";
-        }
-
-        // search for it's input convolution
-        auto prev = prevConv(l);
-
-        // pooling no used in speech models without convolution
-        if (!prev) {
-            THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
-        }
-
-        // we can remove that permutation if it is input to ScaleShift or FC layer
-        auto next = nextLayerSkipReshape(l);
-        if (!next || !LayerInfo(next).isFullyConnected()) {
-            THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
-        }
-
-        permutationstoRemove.push_back(l);
-
-        // removing that permutation layer and saving information about affine
-        affineWithPermutedWeights.insert(next->name);
-    }
-
-    for (auto && toRemove : permutationstoRemove) {
-        CNNNetworkRemoveLayer(toRemove);
-    }
-
-    // search for conv->affine sequences
-    for (auto & l : *pLayers) {
-        if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
-            continue;
-        }
-        // found an affine layer that not involved in permutations removing
-        // searching whether it has direct input from convolution
-        auto prevConvLayer = prevConv(l);
-        if (!prevConvLayer) continue;
-
-        auto directPrev = CNNNetPrevLayer(l);
-
-        // TODO : make new permute
-        CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
-    }
-}
-
 void RemovePermutationsNHWCToNCHWPass::run() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "RemovePermutationsNHWCToNCHWPass");
    std::set<CNNLayerPtr> permutations_to_remove;
@@ -720,7 +626,7 @@ void RemovePermutationsNHWCToNCHWPass::run() {

        if (prev == nullptr || next == nullptr) continue;

-        if (LayerInfo(prev).isPermute() && getPassManager()->getPolicy().NHWCToNCHWPolicy == Policy::NHWCToNCHW::REMOVE_ALL) {
+        if (LayerInfo(prev).isPermute()) {
            permutations_to_remove.insert(prev);
        }

@@ -1040,9 +946,6 @@ void FlattenTrivialConcatPass::run() {
    // 1, 1, 5, 3 then for axis 0, 1, 2 the change will be made and inputs will be reshaped to 1, 15,
    // but for shape 2, 1, 5, 3 only axis 0 is valid and inputs will reshape to 1, 30
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
-    if (getPassManager()->getPolicy().ConcatConversionPolicy == Policy::FlattenTrivialConcatConversion::DISABLED) return;
-    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) return;
-    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) return;

    auto getLayerByIndex = [](int idx, ConcatLayer* concatLayer) {
        auto input = concatLayer->insData[idx];
@@ -1118,14 +1021,6 @@ void FlattenTrivialConcatPass::run() {
 void InsertConcatAligningFilterPass::run() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InsertConcatAligningFilterPass");
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
-
-    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED) {
-        return;
-    }
-    // aligning specific not required in fp32 mode
-    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
-        return;
-    }
    // currently concat layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
    const int bytesPerConcatElement = 2;

@@ -1244,10 +1139,6 @@ void InsertConcatAligningFilterPass::run() {
 void ReorderConcatInputsPass::run() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "ReorderConcatInputsPass");
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(pLayers->front());
-    // aligning specific not required in fp32 mode
-    if (getPassManager()->getPolicy().ConcatAlignmentPolicy == Policy::ConcatAlignment::DISABLED_FOR_FP32 && !quantized) {
-        return;
-    }
    int numOfLinkLayers = 0;

    for (auto& l : *pLayers) {
@@ -1461,9 +1352,6 @@ static InferenceEngine::Blob::Ptr tileBlob(Blob::Ptr& blob, size_t TileTo) {

 void EltwiseSplitOverChannelsPass::run() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "EltwiseSplitOverChannelsPass");
-    if (getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo == Policy::GNAAffineDiagonal::UNLIMIT) {
-        return;
-    }

    for (auto & l : *pLayers) {
        if (!LayerInfo(l).isEltwise()) {
@@ -1478,7 +1366,8 @@ void EltwiseSplitOverChannelsPass::run() {
        auto oData = l->outData.front();
        auto out_width = GetDataDimSize(oData, DataDimName::W);
        auto totalElementsForOutput = details::product(oData->getDims().begin(), oData->getDims().end());
-        auto maxAffineElements = getPassManager()->getPolicy().GNAAffineDiagonalPolicy.limitedTo;
+         // gna limit this to be OxFFFF
+        auto maxAffineElements = 65536 - 64;
        if (totalElementsForOutput <= maxAffineElements) {
            continue;
        }
@@ -1629,31 +1518,25 @@ void SubstituteScaleShiftBroadCastPass::run() {
        }

        gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
-        // approach 1 - weights tiling
-        if (getPassManager()->getPolicy().ScaleShiftPolicy == Policy::ScaleShift::WEIGHTS_TILING) {
-            if (nElements % scaleShift->_weights->size()) {
-                THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
-            }
-            scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
-            if (scaleShift->_biases) {
-                if (nElements % scaleShift->_biases->size()) {
-                    THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
-                }
-                scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
-            }
-
-            auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
-            tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
-            auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
-            auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
-            auto layer_before_scale_shift = getCreatorLayer(insData);
-
-            CNNNetworkInsertLayer(layer_before_scale_shift.lock(), l, reshape);
-            gnalog() << "\tInserted " << reshapeName << " between " << layer_before_scale_shift.lock()->name << " and " << l->name << std::endl;
-        } else {
-            THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
-                                << getPassManager()->getPolicy().ScaleShiftPolicy <<  "using layers tiling, layer: " << l->name;
+        if (nElements % scaleShift->_weights->size()) {
+            THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
        }
+        scaleShift->_weights = tileBlob(scaleShift->_weights, nElements);
+        if (scaleShift->_biases) {
+            if (nElements % scaleShift->_biases->size()) {
+                THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
+            }
+            scaleShift->_biases = tileBlob(scaleShift->_biases, nElements);
+        }
+
+        auto tensor = InferenceEngine::TensorDesc(insData->getTensorDesc());
+        tensor.reshape(SizeVector{ batchSize, nElements }, Layout::NC);
+        auto reshapeName = scaleShift->name + "_input_" + std::to_string(0) + "_reshape";
+        auto reshape = CNNNetworkCreateReshape(tensor, reshapeName, quantized);
+        auto layer_before_scale_shift = getCreatorLayer(insData);
+
+        CNNNetworkInsertLayer(layer_before_scale_shift.lock(), l, reshape);
+        gnalog() << "\tInserted " << reshapeName << " between " << layer_before_scale_shift.lock()->name << " and " << l->name << std::endl;
    }
 }

--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.hpp
@@ -8,7 +8,6 @@
 #include <string>
 #include <map>
 #include <ie_common.h>
-#include "gna_plugin_policy.hpp"

 namespace GNAPluginNS {
 /**
@@ -29,7 +28,6 @@ class IPassManager {
 public:
    virtual ~IPassManager() = default;
    virtual int &getIntVar(std::string name) = 0;
-    virtual const Policy &getPolicy() const = 0;
    virtual const bool& isLowPrecision() const = 0;
    virtual InferenceEngine::CNNNetwork &getNetwork() = 0;
 };
@@ -75,17 +73,6 @@ DECL_PASS(InsertIdentityLayer);
 */
 DECL_PASS(SubstituteScaleShiftBroadCast);

-/**
- * @brief GNA convolution layers have deinterleaved layout, while affine one doesn't
- * so between convolution and affine layers permute layers need to be inserted,
- * current MO approach is to insert such permutations
- * since GNA-HW already support conv->affine in permuted for, this pass inverses MO behavior
- * so its remove permutations of certain form conv->conv, and between conv->affine
- * and insert permutation between conv->affine if they are missed in IR
- * @param layers
- */
-DECL_PASS(ReversePermutations);
-
 /**
 * @brief Pass support --disable_nhwc_to_nchw option in MO
 * @param layers
@@ -220,7 +207,6 @@ DECL_PASS(MoveFakeQuantizeLayerIntoQuantParams);
 DECL_PASS(TransposeWeightsFromNCHWToNHWC);

 struct PassManagerSettings {
-    Policy policy;
    /// @brief whether to run passes before copy
    bool runBeforeCopy;
    bool lowPrecision;
@@ -245,9 +231,6 @@ public:
    int & getIntVar(std::string name) override {
        return intMap[name];
    }
-    const Policy & getPolicy() const override {
-        return settings.policy;
-    }
    const bool& isLowPrecision() const override {
        return settings.lowPrecision;
    }
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.cpp
@@ -107,7 +107,6 @@ void GNAPropagateMatcher :: match() {
    try {
        // matching gna propagate forward call.
        GNAPlugin plugin(_env.config);
-        plugin.SetPolicy(_env.policy);
        size_t inputSize = 10;
        size_t outputSize = 10;
        InputsDataMap inputsInfo;
@@ -508,7 +507,7 @@ void GNAPluginCreationMatcher :: match() {
 void GNAPluginAOTMatcher :: match() {
    // matching gna_propagate forward call.
    MockICNNNetwork net;
-    
+
    size_t weightsSize = 656384;
    auto weights = make_shared_blob<uint8_t >({ Precision::U8, {weightsSize}, Layout::C });
    weights->allocate();
@@ -744,7 +743,7 @@ void GNAQueryStateMatcher :: match() {
        auto weights = make_shared_blob<uint8_t>({ Precision::U8, {weightsSize}, Layout::C });
        weights->allocate();
        fillWeights(weights);
-        
+
        InferenceEngine::Core core;
        InferenceEngine::CNNNetwork network;
        ASSERT_NO_THROW_IE_EXCEPTION(network = core.ReadNetwork(_env.model, weights));
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_matcher.hpp
@@ -23,7 +23,6 @@
 #include <cpp/ie_cnn_network.h>

 #include <backend/dnn_types.h>
-#include <gna_plugin_policy.hpp>
 #include <backend/gna_types.h>
 #include <gna/gna_config.hpp>
 #include <gna_plugin.hpp>
@@ -95,7 +94,6 @@ class GnaPluginTestEnvironment {
    bool exportNetworkOnly = false;
    std::function<void (InferenceEngine::CNNNetwork &)> cb;
    std::map<std::string, std::string> config;
-    GNAPluginNS::Policy policy;
    bool matchThrows = false;
    uint32_t proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
    std::string importedModelFileName;
@@ -168,10 +166,6 @@ class GNATestConfigurability : public GNATestBase{
        _env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = GNA_CONFIG_VALUE(SW_FP32);
        return *dynamic_cast<T*>(this);
    }
-    T & withPolicy(GNAPluginNS::Policy::ConcatAlignment concatAlignmentPolicy) {
-        _env.policy.ConcatAlignmentPolicy = concatAlignmentPolicy;
-        return *dynamic_cast<T*>(this);
-    }
    T & withGNADeviceMode(std::string value) {
        _env.config[GNA_CONFIG_KEY(DEVICE_MODE)] = value;
        return *dynamic_cast<T*>(this);
--- a/inference-engine/tests_deprecated/unit/engines/gna/layers/gna_align_filter2_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/layers/gna_align_filter2_tests.cpp
@@ -11,7 +11,7 @@
 #include <ie_precision.hpp>
 #include "../gna_matcher.hpp"

-using GNAAlignFilterTestParams  = std::tuple<InferenceEngine::Precision, GNAPluginNS::Policy::ConcatAlignment, std::size_t, std::size_t>;
+using GNAAlignFilterTestParams  = std::tuple<InferenceEngine::Precision, std::size_t, std::size_t>;
 using namespace GNAPluginNS;

 class GNAAlignFilterTest : public GNATest<>,
@@ -20,11 +20,9 @@ class GNAAlignFilterTest : public GNATest<>,

    static std::string getTestName(const testing::TestParamInfo<GNAAlignFilterTestParams>& params) {
        std::string test_name;
-        if (std::get<1>(params.param) == GNAPluginNS::Policy::ConcatAlignment::FAST) {
-            test_name += "fast_";
-        }
-        test_name += "concat_of(" + std::to_string(std::get<2>(params.param));
-        test_name += "_" + std::to_string(std::get<3>(params.param));
+        test_name += "fast_";
+        test_name += "concat_of(" + std::to_string(std::get<1>(params.param));
+        test_name += "_" + std::to_string(std::get<2>(params.param));
        test_name += ")_on_";
        test_name += std::get<0>(params.param).name();
        return test_name;
@@ -34,10 +32,9 @@ class GNAAlignFilterTest : public GNATest<>,

    InferenceEngine::Precision precision = InferenceEngine::Precision::FP32;
    std::size_t concat_inputs[2];
-    GNAPluginNS::Policy::ConcatAlignment alignmentPolicy;

    void SetUp() override {
-        std::tie(precision, alignmentPolicy, concat_inputs[0], concat_inputs[1]) = GetParam();
+        std::tie(precision, concat_inputs[0], concat_inputs[1]) = GetParam();
    }

    std::shared_ptr<ngraph::Function> getNgraphModel() {
@@ -83,40 +80,18 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_Small_mem_footprint) {
        return getFastAffineFilterParams(sz).second;
    };

-    switch(alignmentPolicy) {
-        case  Policy::ConcatAlignment::ENABLED : {
-            //align first input by 8
-            auto firstFilter = ALIGN(concat_inputs[0], 8) * concat_inputs[0];
-            //align first input by 8
-            auto extraLeftElementsForSecond = concat_inputs[0] + 32 - ALIGN(concat_inputs[0], 32);
+    expected_copy_layers = getNumCopyElements(concat_inputs[0]);
+    expected_affine_size = getsNumFilterWeights(concat_inputs[0]);

-            auto secondFilter = ALIGN(concat_inputs[1], 8) * (extraLeftElementsForSecond + concat_inputs[1]);
+    // calculation size for second filter
+    auto offset = ALIGN(concat_inputs[0], 32) - 32;
+    auto zerolen = concat_inputs[0] - offset;
+    auto second_output_len = zerolen + concat_inputs[1];

-            expected_affine_size = firstFilter + secondFilter;
-            break;
-        }
-        case   Policy::ConcatAlignment::FAST  : {
-
-            expected_copy_layers = getNumCopyElements(concat_inputs[0]);
-            expected_affine_size = getsNumFilterWeights(concat_inputs[0]);
-
-            // calculation size for second filter
-            auto offset = ALIGN(concat_inputs[0], 32) - 32;
-            auto zerolen = concat_inputs[0] - offset;
-            auto second_output_len = zerolen + concat_inputs[1];
-
-            expected_affine_size += second_output_len  * ALIGN(concat_inputs[1], 8);
-            break;
-        }
-
-        default : {
-            FAIL() << "unsupported align policy: " << alignmentPolicy;
-        }
-    }
+    expected_affine_size += second_output_len  * ALIGN(concat_inputs[1], 8);

    assert_that().onInferNgraphModel(ngraf)
        .inNotCompactMode()
-        .withPolicy(alignmentPolicy)
        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
        .withGNAConfig(GNA_CONFIG_KEY(PRECISION), precision.name())
@@ -152,7 +127,6 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
            .gna()
            .propagate_forward()
            .onCPU()
-            .withPolicy(alignmentPolicy)
            .called_with()
            .input(ngraf->get_parameters().at(0)->get_name(), input_data[0])
            .input(ngraf->get_parameters().at(1)->get_name(), input_data[1])
@@ -161,7 +135,6 @@ TEST_P(GNAAlignFilterTest, concatWith_2_Inputs_accurate) {
        assert_that().onInferNgraphModel(ngraf)
            .inNotCompactMode()
            .gna()
-            .withPolicy(alignmentPolicy)
            .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_0", 1.0f)
            .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_1", 1.0f)
            .withGNAConfig(GNA_CONFIG_KEY(PRECISION), "I16")
@@ -175,8 +148,6 @@ INSTANTIATE_TEST_SUITE_P(
    GNAAlignFilterTest,
    testing::Combine(
    testing::Values(InferenceEngine::Precision::FP32, InferenceEngine::Precision::I16),
-    //fast or not fast alignment policy
-    testing::Values(GNAPluginNS::Policy::ConcatAlignment::FAST, GNAPluginNS::Policy::ConcatAlignment::ENABLED),
    // Size of first Split layer output
    testing::Values(31, 49),
    // Size of second Split layer output