[GNA] Limitations refactoring (#16957)

* Limitations refactoring * fix CI builds/tests * changes after review * Move GraphCompiler initialization to constructor * resolve conflicts after rebase * update after review * resolve problem with double initialization for Limitations
2023-05-29 10:03:58 +02:00 · 2023-05-29 10:03:58 +02:00 · cccbf7ce7e
commit cccbf7ce7e
parent 3300543eac
38 changed files with 1371 additions and 1073 deletions
--- a/src/plugins/intel_gna/src/backend/am_intel_dnn.cpp
+++ b/src/plugins/intel_gna/src/backend/am_intel_dnn.cpp
@ -48,6 +48,8 @@
 using ov::intel_gna::gna_convolution_layer::outputFromConv;
 using ov::intel_gna::gna_convolution_layer::outputFromPooling;
 using namespace ov::intel_gna::limitations;
 namespace ov {
 namespace intel_gna {
 namespace backend {
@ -180,8 +182,8 @@ void AMIntelDNN::InitConvolutional1DComponentPrivate(intel_dnn_component_t& comp
        THROW_GNA_EXCEPTION << "Number of inputs to Convolutional1DComponent (" << num_columns_in
                            << ") is not a multiply by 8";
    }
-    if (num_filters < limitations::convMinFiltersNum || num_filters > limitations::convMaxFiltersNum ||
+    if (num_filters < Limitations::kConvMinFiltersNum || num_filters > Limitations::kConvMaxFiltersNum ||
-        num_filters % limitations::convFiltersNumDivider != 0) {
+        num_filters % Limitations::kConvFiltersNumDivider != 0) {
        THROW_GNA_EXCEPTION << "Unsupported number of filters in Convolutional1DComponent: " << num_filters;
    }
    auto max_number_of_out_elements = outputFromConv(num_columns_in, num_filter_coefficients, convStride);
--- a/src/plugins/intel_gna/src/backend/gna_limitations.cpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.cpp
@ -37,267 +37,62 @@ namespace intel_gna {
 using namespace target;
 namespace limitations {
 class SupportedElementTypes {
 public:
    static bool IsParameterTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
    static bool IsConstantTypeSupported(ov::element::Type type, bool is_exception_allowed = false);
 private:
    static const std::set<ov::element::Type> supported_parameter_types;
    static const std::set<ov::element::Type> supported_constant_types;
 };
 const std::set<ov::element::Type> SupportedElementTypes::supported_parameter_types = {ov::element::u8,
                                                                                      ov::element::i16,
                                                                                      ov::element::f32};
 size_t getMemoryAlignmentBytes(target::DeviceVersion target) {
    static const std::unordered_map<target::DeviceVersion, size_t> mem_alignment_map{
        {target::DeviceVersion::GNA1_0, 64},
        {target::DeviceVersion::GNA2_0, 64},
        {target::DeviceVersion::GNA3_0, 64},
        {target::DeviceVersion::GNA3_1, 64},
        {target::DeviceVersion::GNA3_5, 64},
        {target::DeviceVersion::GNAEmbedded3_5, 64},
        {target::DeviceVersion::GNA3_6, 16},
        {target::DeviceVersion::GNA4_0, 16}};
    return common::GetValueForKey<target::DeviceVersion, size_t>(target, mem_alignment_map);
 }
 bool SupportedElementTypes::is_parameter_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
    if (supported_parameter_types.count(elem_type) == 0) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
                                << " format. Supported precisions " << supported_parameter_types << "\n";
        }
        return false;
    }
    return true;
 }
 const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
                                                                                     ov::element::u8,
                                                                                     ov::element::i16,
                                                                                     ov::element::u16,
                                                                                     ov::element::i32,
                                                                                     ov::element::f32,
                                                                                     ov::element::f64};
 bool SupportedElementTypes::is_constant_type_supported(ov::element::Type elem_type, bool is_exception_allowed) {
    if (supported_constant_types.count(elem_type) == 0) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
                                << " format. Supported precisions " << supported_constant_types << "\n";
        }
        return false;
    }
    return true;
 }
 bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
    OPENVINO_ASSERT(node, "Transpose node is empty!");
    const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
    const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
    const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
    // GNA transpose limitations:
    // - supports 2d transposes only
    // - smaller dimension should be less or equal to 8
    // - bigger dimension should be a multiple of limitations::noOfInputsDivisor
    if (squeezed_shape.size() == 2 && min_input_dim <= 8 &&
        ALIGN(max_input_dim, limitations::noOfInputsDivisor) == max_input_dim) {
        return true;
    }
    return false;
 }
 bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
                       const DeviceVersion& effective_compile_target,
                       const InferenceEngine::Precision gna_precision,
                       bool is_exception_allowed) {
    OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
    size_t batch_size = conv_ie->input_value(0).get_shape()[0];
    if (batch_size != 1) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
                                       ", type: " + conv_ie->get_type_name() + ", and batch size(" +
                                       std::to_string(batch_size) + ") != 1 not supported";
        }
        return false;
    }
    auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
        cnn2d::RangeLimit2D dilation_limit{{convDilationHeight, convDilationHeight, "dilation height"},
                                           {convDilationWidth, convDilationWidth, "dilation width"}};
        std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
        return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
                                                             error,
                                                             conv_ie->get_friendly_name(),
                                                             conv_ie->get_type_name());
    };
    auto input_shape = conv_ie->input_value(0).get_shape();
    auto filter_shape = conv_ie->input_value(1).get_shape();
    if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
        (4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
        pass::helper::ConvData conv_data;
        pass::helper::GetConvData(conv_ie, conv_data);
        if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
                                                        conv_data.input_width,
                                                        conv_data.input_channel_count,
                                                        conv_data.filter_height,
                                                        conv_data.filter_width,
                                                        conv_data.filter_stride_height,
                                                        conv_data.filter_stride_width)) {
            return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
        }
        const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
        if (cnn2dValidatorPtr) {
            return cnn2dValidatorPtr->ValidateCnn2D(conv_ie->get_friendly_name(),
                                                    conv_data.input_height,
                                                    conv_data.input_width,
                                                    conv_data.input_channel_count,
                                                    conv_data.filter_height,
                                                    conv_data.filter_width,
                                                    conv_data.filter_channel_count,
                                                    conv_data.filter_stride_height,
                                                    conv_data.filter_stride_width,
                                                    conv_data.filter_dilation_height,
                                                    conv_data.filter_dilation_width,
                                                    OvGnaTypeIntFromBytes(gna_precision.size()),
                                                    is_exception_allowed);
        }
    }
    return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
 }
 bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
                          const DeviceVersion& effective_compile_target,
                          bool is_exception_allowed) {
    OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
    auto kernels = max_pool->get_kernel();
    if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
        const auto cnn2dValidatorPtr = cnn2d::AbstractValidator::Create(effective_compile_target);
        if (cnn2dValidatorPtr) {
            auto strides = max_pool->get_strides();
            return cnn2dValidatorPtr->ValidatePooling2D(max_pool->get_friendly_name(),
                                                        kernels[0],
                                                        kernels[1],
                                                        strides[0],
                                                        strides[1],
                                                        is_exception_allowed);
        }
    }
    return true;
 }
 bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected, bool is_exception_allowed) {
    OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
    size_t output_batch_size = fully_connected->get_output_shape(0)[0];
    if (output_batch_size > 8) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
                                       ", type: " + fully_connected->get_type_name() + ", and batch size(" +
                                       std::to_string(output_batch_size) + ") not supported";
        }
        return false;
    }
    return true;
 }
 bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
    OPENVINO_ASSERT(node, "Split node is empty!");
    bool is_aligned = true;
    for (size_t i = 0; i < node->get_output_size(); i++) {
        is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
    }
    return is_aligned;
 }
 bool is_op_supported(const std::shared_ptr<ov::Node>& node,
                     const DeviceVersion& effective_compile_target,
                     const InferenceEngine::Precision gna_precision,
                     bool is_exception_allowed) {
    if (ov::op::util::is_parameter(node)) {
        return SupportedElementTypes::is_parameter_type_supported(node->get_element_type(), is_exception_allowed);
    } else if (ov::op::util::is_constant(node)) {
        return SupportedElementTypes::is_constant_type_supported(node->get_element_type(), is_exception_allowed);
    } else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
        return is_conv_supported(conv_ie, effective_compile_target, gna_precision, is_exception_allowed);
    } else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
        return is_fc_supported(fully_connected, is_exception_allowed);
    } else if (ov::intel_gna::graph_utils::is_pooling(node)) {
        return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node),
                                    effective_compile_target,
                                    is_exception_allowed);
    } else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
               ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
               ov::intel_gna::graph_utils::is_crop_affined(node) ||
               ov::intel_gna::graph_utils::is_activation(node.get()) ||
               ov::intel_gna::graph_utils::is_gna_precision_agnostic(
                   node) ||  // check concat/split are aligned when transformations will be moved to ngraph
               (std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
        return true;
    } else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
        if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
            return is_split_supported(node, is_exception_allowed);
        }
        // TODO check concat are aligned when transformation will be moved to ngraph
        return true;
    }
    return false;
 }
 void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
                             const DeviceVersion& effective_compile_target,
                             const InferenceEngine::Precision gna_precision) {
    std::stringstream error;
    // Walk through the transformed model
    for (auto& op : model->get_ops()) {
        if (!is_op_supported(op, effective_compile_target, gna_precision, true)) {
            error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
                  << ")!" << std::endl;
        }
    }
    if (!error.str().empty()) {
        THROW_GNA_EXCEPTION << error.str();
    }
 }
 namespace cnn2d {
-bool IsEqualToLimit::isValid(const uint32_t val) const {
+bool IsEqualToLimit::IsValid(const uint32_t val) const {
    return val == compared_value;
 }
 std::string IsEqualToLimit::GetErrorOrEmpty(const uint32_t val) const {
    std::ostringstream out;
-    if (!isValid(val)) {
+    if (!IsValid(val)) {
        out << "Unsupported " << what << ", actual value: " << val << ", but should be equal to " << compared_value
            << "\n";
    }
    return out.str();
 }
-bool IsLessThanLimit ::isValid(const uint32_t val) const {
+bool IsLessThanLimit::IsValid(const uint32_t val) const {
    return val < compared_value;
 }
-std::string IsLessThanLimit ::GetErrorOrEmpty(const uint32_t val) const {
+std::string IsLessThanLimit::GetErrorOrEmpty(const uint32_t val) const {
    std::ostringstream out;
-    if (!isValid(val)) {
+    if (!IsValid(val)) {
        out << "Unsupported " << what << ", actual value: " << val << ", but should be less than " << compared_value
            << "\n";
    }
    return out.str();
 }
-bool RangeLimit::isValid(const uint32_t val) const {
+bool RangeLimit::IsValid(const uint32_t val) const {
    return val >= min && val <= max;
 }
 std::string RangeLimit::GetErrorOrEmpty(const uint32_t val) const {
    std::ostringstream out;
-    if (!isValid(val)) {
+    if (!IsValid(val)) {
        out << "Unsupported " << what << ", actual value: " << val << ", valid range [" << min << ", " << max << "]\n";
    }
    return out.str();
 }
-bool RangeLimit2D::isValid(const uint32_t h, const uint32_t w) const {
+bool RangeLimit2D::IsValid(const uint32_t h, const uint32_t w) const {
-    return hLimit.isValid(h) && wLimit.isValid(w);
+    return hLimit.IsValid(h) && wLimit.IsValid(w);
 }
 std::string RangeLimit2D::GetErrorOrEmpty(const uint32_t h, const uint32_t w) const {
@ -308,8 +103,8 @@ RangeMultipleLimit::RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn)
    : RangeLimit(rlIn),
      multiplier(multiplierIn) {}
-bool RangeMultipleLimit::isValid(const uint32_t val) const {
+bool RangeMultipleLimit::IsValid(const uint32_t val) const {
-    return RangeLimit::isValid(val) && (val % multiplier == 0);
+    return RangeLimit::IsValid(val) && (val % multiplier == 0);
 }
 std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
@ -321,7 +116,7 @@ std::string RangeMultipleLimit::GetErrorOrEmpty(const uint32_t val) const {
    return e + out.str();
 }
-bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
+bool VectorOrSquareLimit::IsValid(const uint32_t h, const uint32_t w) const {
    if (w == 1 && h >= 1 && h <= maxVectorHeight)
        return true;
    if (h == 1 && w >= 1 && w <= maxVectorWidth)
@ -333,7 +128,7 @@ bool VectorOrSquareLimit::isValid(const uint32_t h, const uint32_t w) const {
 std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
    std::ostringstream out;
-    if (!isValid(h, w)) {
+    if (!IsValid(h, w)) {
        out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only vertical vector up to "
            << maxVectorHeight << "x1, horizontal up to 1x" << maxVectorWidth << " or square up to " << maxSquare << "x"
            << maxSquare << " are valid\n";
@ -341,7 +136,7 @@ std::string VectorOrSquareLimit::GetErrorOrEmpty(const uint32_t h, const uint32_
    return out.str();
 }
-bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
+bool RectLimit::IsValid(const uint32_t h, const uint32_t w) const {
    if (h >= 1 && h <= maxVectorHeight && w >= 1 && w <= maxVectorWidth)
        return true;
    return false;
@ -349,7 +144,7 @@ bool RectLimit::isValid(const uint32_t h, const uint32_t w) const {
 std::string RectLimit::GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const {
    std::ostringstream out;
-    if (!isValid(h, w)) {
+    if (!IsValid(h, w)) {
        out << "Unsupported " << what << " shape, actual HxW: " << h << "x" << w << ", only rectangular shapes up to "
            << maxVectorHeight << "x" << maxVectorWidth << " are valid\n";
    }
@ -365,8 +160,8 @@ RectLimit RectLimitByChannels::GetByChannels(const uint32_t channels) const {
    return RectLimit{0, 0};
 }
-bool RectLimitByChannels::isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
+bool RectLimitByChannels::IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const {
-    return GetByChannels(channels).isValid(h, w);
+    return GetByChannels(channels).IsValid(h, w);
 }
 std::string RectLimitByChannels::GetErrorOrEmpty(const uint32_t h,
@ -380,11 +175,11 @@ RectLimitByChannels RectLimitByChannelsAndPrecision::GetByPrecision(const OvGnaT
    return precision == OvGnaTypeInt8 ? limit_for_int8 : limit_for_int16;
 }
-bool RectLimitByChannelsAndPrecision::isValid(const uint32_t h,
+bool RectLimitByChannelsAndPrecision::IsValid(const uint32_t h,
                                              const uint32_t w,
                                              const OvGnaType precision,
                                              const uint32_t channels) const {
-    return GetByPrecision(precision).isValid(h, w, channels);
+    return GetByPrecision(precision).IsValid(h, w, channels);
 }
 std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
@ -395,6 +190,66 @@ std::string RectLimitByChannelsAndPrecision::GetErrorOrEmpty(const uint32_t h,
    return GetByPrecision(precision).GetErrorOrEmpty(h, w, channels, what);
 }
 class Validator_30 : public AbstractValidator {
    static const RangeLimit2D kInputHWLimit;
    static const RangeMultipleLimit kInputChannelsNumberLimit;
    static const RangeMultipleLimit kKernelNumberLimit;
    static const RectLimitByChannelsAndPrecision kKernelLimit;
    static const RangeLimit2D kDilationLimit;
    static const VectorOrSquareLimit kPoolingWindowLimit;
 public:
    Validator_30() = default;
    bool ValidateCnn2D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
    bool ValidatePooling2D(const std::string& name,
                           const uint32_t windowH,
                           const uint32_t windowW,
                           const uint32_t strideH,
                           const uint32_t strideW,
                           bool exception = true) const override;
    bool ValidateInputPadding(const std::string& name,
                              const uint32_t pad_h_begin,
                              const uint32_t pad_h_end,
                              const uint32_t pad_w_begin,
                              const uint32_t pad_w_end,
                              const uint32_t kernel_h,
                              const uint32_t kernel_w,
                              const bool throwOnError = true) const override;
    bool ShouldUseOnlyConv2DGnaIface() const override;
    bool ValidateCnn1D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
 };
 const RangeLimit2D Validator_30::kInputHWLimit{{16, 384, "input height"}, {16, 240, "input width"}};
 const RangeMultipleLimit Validator_30::kInputChannelsNumberLimit{{8, 384, "number of input channels"}, 8};
@ -404,8 +259,9 @@ const RectLimitByChannelsAndPrecision Validator_30::kKernelLimit{
    {{{48, {7, 7}}, {64, {7, 5}}, {80, {7, 4}}, {120, {7, 3}}, {384, {7, 1}}}},
 };
-const RangeLimit2D Validator_30::kDilationLimit{{convDilationHeight, convDilationHeight, "dilation height"},
+const RangeLimit2D Validator_30::kDilationLimit{
-                                                {convDilationWidth, convDilationWidth, "dilation width"}};
+    {Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"},
    {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}};
 bool Validator_30::ValidateCnn2D(const std::string& name,
                                 const uint32_t inHeight,
@ -493,6 +349,95 @@ bool Validator_30::ShouldUseOnlyConv2DGnaIface() const {
    return false;
 }
 class Validator_35 : public AbstractValidator {
    struct CnnLimits {
        const RangeLimit2D kInputHWLimit;
        const RangeLimit kInputChannelsNumberLimit1B;
        const RangeLimit kInputChannelsNumberLimit2B;
        const RangeLimit kKernelNumberLimit;
        const RangeLimit2D kKerneHWlLimit1B;
        const RangeLimit2D kKerneHWlLimit2B;
        const RangeLimit2D kStrideHWLimit1B;
        const RangeLimit2D kStrideHWLimit2B;
        const RangeLimit2D kDilationLimit;
        const RangeLimit2D kPoolingWindowHWLimit;
        const RangeLimit2D kPoolingStrideHWLimit;
    };
    static const CnnLimits kCnn2DLimits;
    static const CnnLimits kCnn1DLimits;
    std::string ValidateCnn(const CnnLimits& limits,
                            const std::string& name,
                            const uint32_t inHeight,
                            const uint32_t inWidth,
                            const uint32_t inChannels,
                            const uint32_t kH,
                            const uint32_t kW,
                            const uint32_t kN,
                            const uint32_t strideH,
                            const uint32_t strideW,
                            const uint32_t dilationH,
                            const uint32_t dilationW,
                            OvGnaType inPrecision) const;
    std::string ValidatePooling(const CnnLimits& limits,
                                const std::string& name,
                                const uint32_t windowH,
                                const uint32_t windowW,
                                const uint32_t strideH,
                                const uint32_t strideW) const;
 public:
    Validator_35() = default;
    bool ValidateCnn2D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
    bool ValidatePooling2D(const std::string& name,
                           const uint32_t windowH,
                           const uint32_t windowW,
                           const uint32_t strideH,
                           const uint32_t strideW,
                           bool exception = true) const override;
    bool ValidateInputPadding(const std::string& name,
                              const uint32_t pad_h_begin,
                              const uint32_t pad_h_end,
                              const uint32_t pad_w_begin,
                              const uint32_t pad_w_end,
                              const uint32_t kernel_h,
                              const uint32_t kernel_w,
                              const bool throwOnError = true) const override;
    bool ShouldUseOnlyConv2DGnaIface() const override;
    bool ValidateCnn1D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
 };
 const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
    {{1, 65535, "input height"}, {1, 65535, "input width"}},                        // kInputHWLimit
    {1, 2048, "number of input channels"},                                          // kInputChannelsNumberLimit1B
@ -502,8 +447,8 @@ const Validator_35::CnnLimits Validator_35::kCnn2DLimits{
    {{1, 255, "kernel height"}, {1, 256, "kernel width"}},                          // kKerneHWlLimit2B
    {{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}},  // kStrideHWLimit1B
    {{1, 255, "convolution stride height"}, {1, 256, "convolution stride width"}},  // kStrideHWLimit2B
-    {{convDilationHeight, convDilationHeight, "dilation height"},                   // kDilationLimit
+    {{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"},  // kDilationLimit
-     {convDilationWidth, convDilationWidth, "dilation width"}},
+     {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
    {{1, 255, "pooling window height"}, {1, 255, "pooling window width"}},  // kPoolingWindowHWLimit
    {{1, 255, "pooling stride height"}, {1, 255, "pooling stride width"}}   // kPoolingStrideHWLimit
 };
@ -517,8 +462,8 @@ const Validator_35::CnnLimits Validator_35::kCnn1DLimits{
    {{1, 1, "kernel height"}, {1, 2048, "kernel width"}},                          // kKerneHWlLimit2B
    {{1, 1, "convolution stride height"}, {1, 4096, "convolution stride width"}},  // kStrideHWLimit1B
    {{1, 1, "convolution stride height"}, {1, 2048, "convolution stride width"}},  // kStrideHWLimit2B
-    {{convDilationHeight, convDilationHeight, "dilation height"},                  // kDilationLimit
+    {{Limitations::kConvDilationHeight, Limitations::kConvDilationHeight, "dilation height"},  // kDilationLimit
-     {convDilationWidth, convDilationWidth, "dilation width"}},
+     {Limitations::kConvDilationWidth, Limitations::kConvDilationWidth, "dilation width"}},
    {{1, 1, "pooling window height"}, {1, 255, "pooling window width"}},  // kPoolingWindowHWLimit
    {{1, 1, "pooling stride height"}, {1, 255, "pooling stride width"}}   // kPoolingStrideHWLimit
 };
@ -672,16 +617,16 @@ bool Validator_35::ShouldUseOnlyConv2DGnaIface() const {
    return true;
 }
-std::unique_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
+std::shared_ptr<AbstractValidator> AbstractValidator::Create(const DeviceVersion& target) {
    switch (target) {
    case DeviceVersion::GNA3_0:
    case DeviceVersion::GNA3_1:
-        return tools::make_unique<Validator_30>();
+        return std::make_shared<Validator_30>();
    case DeviceVersion::GNA3_5:
    case DeviceVersion::GNAEmbedded3_5:
    case DeviceVersion::GNA3_6:
    case DeviceVersion::GNA4_0:
-        return tools::make_unique<Validator_35>();
+        return std::make_shared<Validator_35>();
    default:
        return nullptr;
    }
@ -705,15 +650,280 @@ bool AbstractValidator::ValidationSuccesful(const bool throwOnError,
    return error.empty();
 }
 bool UseOnly16BitConvolutionWeights(const DeviceVersion& compile_target) {
    return compile_target == DeviceVersion::GNA1_0 || compile_target == DeviceVersion::GNA2_0 ||
           compile_target == DeviceVersion::GNA3_0 || compile_target == DeviceVersion::GNA3_1;
 }
 }  // namespace cnn2d
 constexpr uint32_t Limitations::kBufferMaxSize;
 constexpr uint32_t Limitations::kConvMinFiltersNum;
 constexpr uint32_t Limitations::kConvMaxFiltersNum;
 constexpr uint32_t Limitations::kConvDilationHeight;
 constexpr uint32_t Limitations::kConvDilationWidth;
 constexpr uint32_t Limitations::kConvFiltersNumDivider;
 constexpr uint32_t Limitations::kConvFilterSizeDivider;
 constexpr uint32_t Limitations::kConvFilterMaxSize;
 constexpr uint32_t Limitations::kConvEachKernelByteAlignment;
 constexpr uint32_t Limitations::kInputByteAlignment;
 constexpr uint32_t Limitations::kNoOfInputsDivisor;
 constexpr uint32_t Limitations::kNoOfInputsLowPrecDivisor;
 constexpr uint32_t Limitations::kAffineMaxBatchSize;
 constexpr uint32_t Limitations::kMaxPoolMaxWindowSize;
 constexpr uint32_t Limitations::kCopyMaxGrouping;
 constexpr uint32_t Limitations::kTransposeMaxSize;
 constexpr uint32_t Limitations::kMaxLayersCountGNA1_0;
 constexpr uint32_t Limitations::kMaxLayersCountGNA2_0;
 constexpr uint32_t Limitations::kMaxLayersCountGNA3_X;
 constexpr uint32_t Limitations::kBytesPerSplitElement;
 constexpr uint32_t Limitations::kBytesPerCropElement;
 constexpr uint32_t Limitations::kMemoryPageSize;
 thread_local std::shared_ptr<Limitations> Limitations::k_instance{nullptr};
 Limitations::Limitations(const DeviceVersion& target) {
    m_use_only_16bit_conv_weights = (target == DeviceVersion::GNA1_0 || target == DeviceVersion::GNA2_0 ||
                                     target == DeviceVersion::GNA3_0 || target == DeviceVersion::GNA3_1);
    m_mem_alignment = get_memory_alignment_bytes(target);
    m_cnn_validator = cnn2d::AbstractValidator::Create(target);
 }
 void Limitations::init(const DeviceVersion& compile_target) {
    k_instance = std::shared_ptr<Limitations>(new Limitations(compile_target));
 }
 bool Limitations::is_transpose_2d(const std::vector<size_t>& shape) {
    return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
               return dim != 1;
           }) == 2;
 }
 bool Limitations::is_transpose_supported(const std::vector<size_t>& shape) {
    if (!is_transpose_2d(shape))
        return false;
    auto shape_no_1 = shape;
    shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
    size_t min, max;
    std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
    return min <= 8 && max % 8 == 0 && max >= 8 && max <= kTransposeMaxSize;
 }
 size_t Limitations::get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input) {
    auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
    return total_size / kBufferMaxSize + 1;
 }
 size_t Limitations::get_memory_alignment_bytes(const DeviceVersion& target) const {
    static const std::unordered_map<DeviceVersion, size_t> mem_alignment_map{{DeviceVersion::GNA1_0, 64},
                                                                             {DeviceVersion::GNA2_0, 64},
                                                                             {DeviceVersion::GNA3_0, 64},
                                                                             {DeviceVersion::GNA3_1, 64},
                                                                             {DeviceVersion::GNA3_5, 64},
                                                                             {DeviceVersion::GNAEmbedded3_5, 64},
                                                                             {DeviceVersion::GNA3_6, 16},
                                                                             {DeviceVersion::GNA4_0, 16}};
    return common::GetValueForKey<DeviceVersion, size_t>(target, mem_alignment_map);
 }
 bool SupportedElementTypes::IsParameterTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
    if (supported_parameter_types.count(elem_type) == 0) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "The plugin does not support input precision with " << elem_type.get_type_name()
                                << " format. Supported precisions " << supported_parameter_types << "\n";
        }
        return false;
    }
    return true;
 }
 const std::set<ov::element::Type> SupportedElementTypes::supported_constant_types = {ov::element::i8,
                                                                                     ov::element::u8,
                                                                                     ov::element::i16,
                                                                                     ov::element::u16,
                                                                                     ov::element::i32,
                                                                                     ov::element::f32,
                                                                                     ov::element::f64};
 bool SupportedElementTypes::IsConstantTypeSupported(ov::element::Type elem_type, bool is_exception_allowed) {
    if (supported_constant_types.count(elem_type) == 0) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "The plugin does not support constant precision with " << elem_type.get_type_name()
                                << " format. Supported precisions " << supported_constant_types << "\n";
        }
        return false;
    }
    return true;
 }
 bool Limitations::is_transpose_supported(const std::shared_ptr<const ov::Node>& node) {
    OPENVINO_ASSERT(node, "Transpose node is empty!");
    const ov::Shape squeezed_shape = graph_utils::squeeze_shape(node->get_input_shape(0));
    const size_t min_input_dim = std::min(squeezed_shape[0], squeezed_shape[1]);
    const size_t max_input_dim = std::max(squeezed_shape[0], squeezed_shape[1]);
    // GNA transpose limitations:
    // - supports 2d transposes only
    // - smaller dimension should be less or equal to 8
    // - bigger dimension should be a multiple of Limitations::kNoOfInputsDivisor
    if (squeezed_shape.size() == 2 && min_input_dim <= 8 && ALIGN(max_input_dim, kNoOfInputsDivisor) == max_input_dim) {
        return true;
    }
    return false;
 }
 bool Limitations::is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
                                    const InferenceEngine::Precision gna_precision,
                                    bool is_exception_allowed) {
    OPENVINO_ASSERT(conv_ie, "ConvolutionIE node is empty!");
    size_t batch_size = conv_ie->input_value(0).get_shape()[0];
    if (batch_size != 1) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "topology with layer: " + conv_ie->get_friendly_name() +
                                       ", type: " + conv_ie->get_type_name() + ", and batch size(" +
                                       std::to_string(batch_size) + ") != 1 not supported";
        }
        return false;
    }
    auto check_dilation = [&](size_t filter_dilation_height, size_t filter_stride_width) -> bool {
        cnn2d::RangeLimit2D dilation_limit{{kConvDilationHeight, kConvDilationHeight, "dilation height"},
                                           {kConvDilationWidth, kConvDilationWidth, "dilation width"}};
        std::string error = dilation_limit.GetErrorOrEmpty(filter_dilation_height, filter_stride_width);
        return cnn2d::AbstractValidator::ValidationSuccesful(is_exception_allowed,
                                                             error,
                                                             conv_ie->get_friendly_name(),
                                                             conv_ie->get_type_name());
    };
    auto input_shape = conv_ie->input_value(0).get_shape();
    auto filter_shape = conv_ie->input_value(1).get_shape();
    if ((4 == filter_shape.size() && filter_shape[2] > 1 && filter_shape[3] > 1) ||
        (4 == input_shape.size() && input_shape[2] > 1 && input_shape[3] > 1)) {
        pass::helper::ConvData conv_data;
        pass::helper::GetConvData(conv_ie, conv_data);
        if (gna_convolution_layer::isMappableFrom2DTo1D(conv_data.input_height,
                                                        conv_data.input_width,
                                                        conv_data.input_channel_count,
                                                        conv_data.filter_height,
                                                        conv_data.filter_width,
                                                        conv_data.filter_stride_height,
                                                        conv_data.filter_stride_width)) {
            return check_dilation(conv_data.filter_dilation_height, conv_data.filter_dilation_width);
        }
        if (m_cnn_validator) {
            return m_cnn_validator->ValidateCnn2D(conv_ie->get_friendly_name(),
                                                  conv_data.input_height,
                                                  conv_data.input_width,
                                                  conv_data.input_channel_count,
                                                  conv_data.filter_height,
                                                  conv_data.filter_width,
                                                  conv_data.filter_channel_count,
                                                  conv_data.filter_stride_height,
                                                  conv_data.filter_stride_width,
                                                  conv_data.filter_dilation_height,
                                                  conv_data.filter_dilation_width,
                                                  OvGnaTypeIntFromBytes(gna_precision.size()),
                                                  is_exception_allowed);
        }
    }
    return check_dilation(conv_ie->get_dilations()[0], conv_ie->get_dilations()[1]);
 }
 bool Limitations::is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
                                       bool is_exception_allowed) {
    OPENVINO_ASSERT(max_pool, "MaxPool node is empty!");
    auto kernels = max_pool->get_kernel();
    if (2 == kernels.size() && kernels[0] > 1 && kernels[1] > 1) {
        if (m_cnn_validator) {
            auto strides = max_pool->get_strides();
            return m_cnn_validator->ValidatePooling2D(max_pool->get_friendly_name(),
                                                      kernels[0],
                                                      kernels[1],
                                                      strides[0],
                                                      strides[1],
                                                      is_exception_allowed);
        }
    }
    return true;
 }
 bool Limitations::is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
                                  bool is_exception_allowed) {
    OPENVINO_ASSERT(fully_connected, "FullyConnected node is empty!");
    size_t output_batch_size = fully_connected->get_output_shape(0)[0];
    if (output_batch_size > 8) {
        if (is_exception_allowed) {
            THROW_GNA_EXCEPTION << "topology with layer: " + fully_connected->get_friendly_name() +
                                       ", type: " + fully_connected->get_type_name() + ", and batch size(" +
                                       std::to_string(output_batch_size) + ") not supported";
        }
        return false;
    }
    return true;
 }
 bool Limitations::is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed) {
    OPENVINO_ASSERT(node, "Split node is empty!");
    bool is_aligned = true;
    for (size_t i = 0; i < node->get_output_size(); i++) {
        is_aligned &= ov::intel_gna::graph_utils::is_aligned_split(node, i);
    }
    return is_aligned;
 }
 bool Limitations::is_op_supported(const std::shared_ptr<ov::Node>& node,
                                  const InferenceEngine::Precision gna_precision,
                                  bool is_exception_allowed) {
    if (ov::op::util::is_parameter(node)) {
        return SupportedElementTypes::IsParameterTypeSupported(node->get_element_type(), is_exception_allowed);
    } else if (ov::op::util::is_constant(node)) {
        return SupportedElementTypes::IsConstantTypeSupported(node->get_element_type(), is_exception_allowed);
    } else if (auto conv_ie = std::dynamic_pointer_cast<ngraph::op::ConvolutionIE>(node)) {
        return is_conv_supported(conv_ie, gna_precision, is_exception_allowed);
    } else if (auto fully_connected = std::dynamic_pointer_cast<ngraph::op::FullyConnected>(node)) {
        return is_fc_supported(fully_connected, is_exception_allowed);
    } else if (ov::intel_gna::graph_utils::is_pooling(node)) {
        return is_pooling_supported(std::dynamic_pointer_cast<ngraph::opset7::MaxPool>(node), is_exception_allowed);
    } else if (ov::op::util::is_output(node) || ov::op::util::is_sink(node) ||
               ov::intel_gna::graph_utils::is_eltwise_add(node) || ov::intel_gna::graph_utils::is_eltwise_mul(node) ||
               ov::intel_gna::graph_utils::is_crop_affined(node) ||
               ov::intel_gna::graph_utils::is_activation(node.get()) ||
               ov::intel_gna::graph_utils::is_gna_precision_agnostic(
                   node) ||  // check concat/split are aligned when transformations will be moved to ngraph
               (std::dynamic_pointer_cast<ov::op::util::ReadValueBase>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::op::ScaleShiftIE>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::op::PowerIE>(node) != nullptr) ||
               (std::dynamic_pointer_cast<ngraph::opset9::MatMul>(node) != nullptr)) {
        return true;
    } else if (ov::intel_gna::graph_utils::is_gna_precision_agnostic(node)) {
        if ((std::dynamic_pointer_cast<ngraph::opset9::Split>(node) != nullptr) ||
            (std::dynamic_pointer_cast<ngraph::opset9::VariadicSplit>(node) != nullptr)) {
            return is_split_supported(node, is_exception_allowed);
        }
        // TODO check concat are aligned when transformation will be moved to ngraph
        return true;
    }
    return false;
 }
 void Limitations::check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
                                          const InferenceEngine::Precision gna_precision) {
    std::stringstream error;
    // Walk through the transformed model
    for (auto& op : model->get_ops()) {
        if (!is_op_supported(op, gna_precision, true)) {
            error << "The plugin does not support layer " << op->get_friendly_name() << " (type " << op->get_type_name()
                  << ")!" << std::endl;
        }
    }
    if (!error.str().empty()) {
        THROW_GNA_EXCEPTION << error.str();
    }
 }
 bool Limitations::use_only_16bit_convolution_weights() const {
    return m_use_only_16bit_conv_weights;
 }
 IE_SUPPRESS_DEPRECATED_START
-static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
+bool Limitations::validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage) {
    LayerInfo info(layer);
    auto concat_layer = info.as<InferenceEngine::ConcatLayer*>();
    IE_ASSERT(concat_layer);
@ -747,7 +957,8 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
        // when all transformations are migrated to ngraph
        bool is_not_trivial_concat = false;
-        // Concatentaion of consts and input parameters only is supported, even if first dimentsion of input parameter >
+        // Concatentaion of consts and input parameters only is supported, even if first dimentsion of input
        // parameter >
        // 1
        bool concat_all_const_or_inputs = false;
@ -846,7 +1057,7 @@ static bool ValidateConcatAxis(const InferenceEngine::CNNLayerPtr layer, std::st
    return true;
 }
-bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
+bool Limitations::validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concat_layer) {
    IE_ASSERT(concat_layer);
    auto dims_size = concat_layer->insData[0].lock()->getDims().size();
@ -898,7 +1109,7 @@ bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concat_layer) {
    return true;
 }
-bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
+bool Limitations::are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage) {
    IE_SUPPRESS_DEPRECATED_START
    InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
    std::unordered_set<InferenceEngine::CNNLayer*> allLayers;
@ -909,7 +1120,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
        // If there are no inputs start search from an output
        startLayer = getCreatorLayer(outputs.begin()->second).lock();
    } else {
-        SupportedElementTypes::is_parameter_type_supported(
+        SupportedElementTypes::IsParameterTypeSupported(
            InferenceEngine::details::convertPrecision(inputs.begin()->second->getPrecision()),
            true);
@ -944,7 +1155,7 @@ bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMe
                    check_result = false;
                }
            } else if (info.isConcat()) {
-                if (!ValidateConcatAxis(layer, errMessage)) {
+                if (!validate_concat_axis(layer, errMessage)) {
                    THROW_GNA_EXCEPTION << errMessage;
                }
            }
--- a/src/plugins/intel_gna/src/backend/gna_limitations.hpp
+++ b/src/plugins/intel_gna/src/backend/gna_limitations.hpp
@ -9,6 +9,8 @@
 #include <cstdint>
 #include <ie_algorithm.hpp>
 #include <memory>
 #include <thread>
 #include "common/gna_target.hpp"
 #include "common/misc_utils.hpp"
@ -23,158 +25,19 @@ namespace ov {
 namespace intel_gna {
 namespace limitations {
 constexpr uint32_t bufferMaxSize = 65528;
 constexpr uint32_t convMinFiltersNum = 4;
 constexpr uint32_t convMaxFiltersNum = 65532;
 constexpr uint32_t convDilationHeight = 1;
 constexpr uint32_t convDilationWidth = 1;
 constexpr uint32_t convFiltersNumDivider = 4;
 constexpr uint32_t convFilterSizeDivider = 8;
 constexpr uint32_t convFilterMaxSize = 768;
 constexpr uint32_t convEachKernelByteAlignment = 16;
 constexpr uint32_t inputByteAlignment = 64;
 constexpr uint32_t noOfInputsDivisor = 8;
 constexpr uint32_t noOfInputsLowPrecDivisor = 16;
 constexpr uint32_t affineMaxBatchSize = 8;
 constexpr uint32_t maxPoolMaxWindowSize = 6;
 constexpr uint32_t copyMaxGrouping = 8;
 constexpr uint32_t transposeMaxSize = 65528;
 // TODO In the future there should be created class/struct representing all limitations for specific device versions.
 constexpr uint32_t kMaxLayersCountGNA1_0 = 1023;
 constexpr uint32_t kMaxLayersCountGNA2_0 = 4096;
 constexpr uint32_t kMaxLayersCountGNA3_X = 8192;
 // Currently split layer only supports 2 bytes in int16 and int8 mode.
 // In fp32 mode this is not necessary but is useful for testing
 constexpr uint32_t bytesPerSplitElement = 2;
 // Currently crop layer only supports 2 bytes in int16 and int8 mode.
 // In fp32 mode this is not necessary but is useful for testing
 constexpr uint32_t bytesPerCropElement = 2;
 constexpr uint32_t kMemoryPageSize = 4096;
 inline bool isCropAffinedOffset(size_t numberOfElements) {
    const auto cropOffset = numberOfElements * bytesPerCropElement;
    return (ALIGN64(cropOffset) != cropOffset);
 }
 inline bool IsTranspose2d(const std::vector<size_t>& shape) {
    return std::count_if(std::begin(shape), std::end(shape), [](size_t dim) {
               return dim != 1;
           }) == 2;
 }
 inline bool IsTransposeSupported(const std::vector<size_t>& shape) {
    if (!IsTranspose2d(shape))
        return false;
    auto shape_no_1 = shape;
    shape_no_1.erase(std::remove(shape_no_1.begin(), shape_no_1.end(), 1), shape_no_1.end());
    size_t min, max;
    std::tie(min, max) = std::minmax(shape_no_1[0], shape_no_1[1]);
    return min <= 8 && max % 8 == 0 && max >= 8 && max <= transposeMaxSize;
 }
 size_t getMemoryAlignmentBytes(target::DeviceVersion target);
 class SupportedElementTypes {
 public:
    static bool is_parameter_type_supported(ov::element::Type type, bool is_exception_allowed = false);
    static bool is_constant_type_supported(ov::element::Type type, bool is_exception_allowed = false);
 private:
    static const std::set<ov::element::Type> supported_parameter_types;
    static const std::set<ov::element::Type> supported_constant_types;
 };
 /**
 * @brief Validates if transpose is supported by GNA
 * @param node transpose
 * @return true if supported
 */
 bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
 /**
 * @brief Validates if legacy convolution is supported by GNA
 * @param conv_ie convolution
 * @param effective_compile_target GNA compile targets
 * @param gna_precision GNA inference precision
 * @param is_exception_allowed flag specifies whether exception is allowed
 * @return true if supported
 */
 bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
                       const target::DeviceVersion& effective_compile_target,
                       const InferenceEngine::Precision gna_precision,
                       bool is_exception_allowed = false);
 /**
 * @brief Validates if max pooling is supported by GNA
 * @param max_pool max pooling
 * @param effective_compile_target GNA compile targets
 * @param supported_types list of supported types
 * @param is_exception_allowed flag specifies whether exception is allowed
 * @return true if precision is found in supported
 */
 bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
                          const target::DeviceVersion& effective_compile_target,
                          bool is_exception_allowed = false);
 /**
 * @brief Validates if fully connected is supported by GNA
 * @param fully_connected fully connected
 * @param is_exception_allowed flag specifies whether exception is allowed
 * @return true if supported
 */
 bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
                     bool is_exception_allowed = false);
 /**
 * @brief Validates if split is supported by GNA
 * @param node split
 * @param is_exception_allowed flag specifies whether exception is allowed
 * @return true if supported
 */
 bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
 /**
 * @brief Validates if operation is supported by GNA
 * @param node operation
 * @param gna_compile_target GNA compile target
 * @param gna_precision GNA inference precision
 * @param is_exception_allowed flag specifies whether exception is allowed
 * @return true if supported
 */
 bool is_op_supported(const std::shared_ptr<ov::Node>& node,
                     const target::DeviceVersion& effective_compile_target,
                     const InferenceEngine::Precision gna_precision,
                     bool is_exception_allowed = false);
 /**
 * @brief Check if all operations are supported by GNA
 * @param model ngraph model
 * @param gna_compile_target GNA compile target
 * @param gna_precision GNA inference precision
 */
 void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
                             const target::DeviceVersion& effective_compile_target,
                             const InferenceEngine::Precision gna_precision);
 namespace cnn2d {
 struct IsEqualToLimit {
    uint32_t compared_value;
    std::string what;
-    bool isValid(const uint32_t val) const;
+    bool IsValid(const uint32_t val) const;
    std::string GetErrorOrEmpty(const uint32_t val) const;
 };
 struct IsLessThanLimit {
    uint32_t compared_value;
    std::string what;
-    bool isValid(const uint32_t val) const;
+    bool IsValid(const uint32_t val) const;
    std::string GetErrorOrEmpty(const uint32_t val) const;
 };
@ -182,28 +45,28 @@ struct RangeLimit {
    uint32_t min;
    uint32_t max;
    std::string what;
-    bool isValid(const uint32_t val) const;
+    bool IsValid(const uint32_t val) const;
    std::string GetErrorOrEmpty(const uint32_t val) const;
 };
 struct RangeLimit2D {
    RangeLimit hLimit;
    RangeLimit wLimit;
-    bool isValid(const uint32_t h, const uint32_t w) const;
+    bool IsValid(const uint32_t h, const uint32_t w) const;
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w) const;
 };
 struct RangeMultipleLimit : public RangeLimit {
    uint32_t multiplier;
    RangeMultipleLimit(RangeLimit rlIn, uint32_t multiplierIn);
-    bool isValid(const uint32_t val) const;
+    bool IsValid(const uint32_t val) const;
    std::string GetErrorOrEmpty(const uint32_t val) const;
 };
 struct RectLimit {
    uint32_t maxVectorHeight;
    uint32_t maxVectorWidth;
-    bool isValid(const uint32_t h, const uint32_t w) const;
+    bool IsValid(const uint32_t h, const uint32_t w) const;
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
 };
@ -211,14 +74,14 @@ struct VectorOrSquareLimit {
    uint32_t maxSquare;
    uint32_t maxVectorHeight;
    uint32_t maxVectorWidth;
-    bool isValid(const uint32_t h, const uint32_t w) const;
+    bool IsValid(const uint32_t h, const uint32_t w) const;
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, std::string what) const;
 };
 struct RectLimitByChannels {
    std::vector<std::pair<uint32_t, RectLimit>> limitPerChannel;
    RectLimit GetByChannels(const uint32_t channels) const;
-    bool isValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
+    bool IsValid(const uint32_t h, const uint32_t w, const uint32_t channels) const;
    std::string GetErrorOrEmpty(const uint32_t h, const uint32_t w, const uint32_t channels, std::string what) const;
 };
@ -226,7 +89,7 @@ struct RectLimitByChannelsAndPrecision {
    RectLimitByChannels limit_for_int8;
    RectLimitByChannels limit_for_int16;
    RectLimitByChannels GetByPrecision(const OvGnaType precision) const;
-    bool isValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
+    bool IsValid(const uint32_t h, const uint32_t w, const OvGnaType precision, const uint32_t channels) const;
    std::string GetErrorOrEmpty(const uint32_t h,
                                const uint32_t w,
                                const OvGnaType precision,
@ -291,177 +154,168 @@ public:
                               OvGnaType inPrecision,
                               bool exception = true) const = 0;
-    static std::unique_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
+    static std::shared_ptr<AbstractValidator> Create(const target::DeviceVersion& target);
 };
 class Validator_30 : public AbstractValidator {
    static const RangeLimit2D kInputHWLimit;
    static const RangeMultipleLimit kInputChannelsNumberLimit;
    static const RangeMultipleLimit kKernelNumberLimit;
    static const RectLimitByChannelsAndPrecision kKernelLimit;
    static const RangeLimit2D kDilationLimit;
    static const VectorOrSquareLimit kPoolingWindowLimit;
 public:
    Validator_30() = default;
    bool ValidateCnn2D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
    bool ValidatePooling2D(const std::string& name,
                           const uint32_t windowH,
                           const uint32_t windowW,
                           const uint32_t strideH,
                           const uint32_t strideW,
                           bool exception = true) const override;
    bool ValidateInputPadding(const std::string& name,
                              const uint32_t pad_h_begin,
                              const uint32_t pad_h_end,
                              const uint32_t pad_w_begin,
                              const uint32_t pad_w_end,
                              const uint32_t kernel_h,
                              const uint32_t kernel_w,
                              const bool throwOnError = true) const override;
    bool ShouldUseOnlyConv2DGnaIface() const override;
    bool ValidateCnn1D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
 };
 class Validator_35 : public AbstractValidator {
    struct CnnLimits {
        const RangeLimit2D kInputHWLimit;
        const RangeLimit kInputChannelsNumberLimit1B;
        const RangeLimit kInputChannelsNumberLimit2B;
        const RangeLimit kKernelNumberLimit;
        const RangeLimit2D kKerneHWlLimit1B;
        const RangeLimit2D kKerneHWlLimit2B;
        const RangeLimit2D kStrideHWLimit1B;
        const RangeLimit2D kStrideHWLimit2B;
        const RangeLimit2D kDilationLimit;
        const RangeLimit2D kPoolingWindowHWLimit;
        const RangeLimit2D kPoolingStrideHWLimit;
    };
    static const CnnLimits kCnn2DLimits;
    static const CnnLimits kCnn1DLimits;
    std::string ValidateCnn(const CnnLimits& limits,
                            const std::string& name,
                            const uint32_t inHeight,
                            const uint32_t inWidth,
                            const uint32_t inChannels,
                            const uint32_t kH,
                            const uint32_t kW,
                            const uint32_t kN,
                            const uint32_t strideH,
                            const uint32_t strideW,
                            const uint32_t dilationH,
                            const uint32_t dilationW,
                            OvGnaType inPrecision) const;
    std::string ValidatePooling(const CnnLimits& limits,
                                const std::string& name,
                                const uint32_t windowH,
                                const uint32_t windowW,
                                const uint32_t strideH,
                                const uint32_t strideW) const;
 public:
    Validator_35() = default;
    bool ValidateCnn2D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
    bool ValidatePooling2D(const std::string& name,
                           const uint32_t windowH,
                           const uint32_t windowW,
                           const uint32_t strideH,
                           const uint32_t strideW,
                           bool exception = true) const override;
    bool ValidateInputPadding(const std::string& name,
                              const uint32_t pad_h_begin,
                              const uint32_t pad_h_end,
                              const uint32_t pad_w_begin,
                              const uint32_t pad_w_end,
                              const uint32_t kernel_h,
                              const uint32_t kernel_w,
                              const bool throwOnError = true) const override;
    bool ShouldUseOnlyConv2DGnaIface() const override;
    bool ValidateCnn1D(const std::string& name,
                       const uint32_t inHeight,
                       const uint32_t inWidth,
                       const uint32_t inChannels,
                       const uint32_t kH,
                       const uint32_t kW,
                       const uint32_t kN,
                       const uint32_t strideH,
                       const uint32_t strideW,
                       const uint32_t dilationH,
                       const uint32_t dilationW,
                       OvGnaType inPrecision,
                       bool exception = true) const override;
 };
 bool UseOnly16BitConvolutionWeights(const target::DeviceVersion& compile_target);
 }  // namespace cnn2d
-bool AreLayersSupported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
+class Limitations {
 public:
    /**
     * @brief Create instance of the Limitations class. Due to Limitations being a singleton, multiple instances of the
     * plugin with different compilation targets cannot exist at the same time
     * @param compile_target GNA compile target
     */
    static void init(const target::DeviceVersion& compile_target);
-inline size_t GetMinBatchToFitInBuffer(InferenceEngine::DataPtr input) {
+    /**
-    auto total_size = InferenceEngine::details::product(std::begin(input->getDims()), std::end(input->getDims()));
+     * @brief Returns the instance of Limitations object. Requires an Init call before the first usage
-    return total_size / bufferMaxSize + 1;
+     */
-}
+    static inline std::shared_ptr<Limitations> get_instance();
-/**
+    static bool is_transpose_2d(const std::vector<size_t>& shape);
    static bool is_transpose_supported(const std::vector<size_t>& shape);
    static size_t get_min_batch_to_fit_in_buffer(InferenceEngine::DataPtr input);
    /**
     * @brief Validates if concat layer axis is supported by GNA
     * @param layer concat layer
     * @return true if concat layer axis is valid
     */
-IE_SUPPRESS_DEPRECATED_START
+    IE_SUPPRESS_DEPRECATED_START
-bool ValidateConvConcatAxis(const InferenceEngine::ConcatLayer* concatLayer);
+    static bool validate_conv_concat_axis(const InferenceEngine::ConcatLayer* concatLayer);
-IE_SUPPRESS_DEPRECATED_END
+    static bool are_layers_supported(InferenceEngine::CNNNetwork& network, std::string& errMessage);
    IE_SUPPRESS_DEPRECATED_END
    /**
     * @brief Validates if fully connected is supported by GNA
     * @param fully_connected fully connected
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if supported
     */
    static bool is_fc_supported(const std::shared_ptr<ngraph::op::FullyConnected>& fully_connected,
                                bool is_exception_allowed = false);
    /**
     * @brief Validates if split is supported by GNA
     * @param node split
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if supported
     */
    static bool is_split_supported(const std::shared_ptr<ov::Node>& node, bool is_exception_allowed = false);
    /**
     * @brief Validates if transpose is supported by GNA
     * @param node transpose
     * @return true if supported
     */
    static bool is_transpose_supported(const std::shared_ptr<const ov::Node>& node);
    /**
     * @brief Validates if legacy convolution is supported by GNA
     * @param conv_ie convolution
     * @param gna_precision GNA inference precision
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if supported
     */
    bool is_conv_supported(const std::shared_ptr<ngraph::op::ConvolutionIE>& conv_ie,
                           const InferenceEngine::Precision gna_precision,
                           bool is_exception_allowed = false);
    /**
     * @brief Validates if max pooling is supported by GNA
     * @param max_pool max pooling
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if precision is found in supported
     */
    bool is_pooling_supported(const std::shared_ptr<ngraph::opset7::MaxPool> max_pool,
                              bool is_exception_allowed = false);
    /**
     * @brief Validates if operation is supported by GNA
     * @param node operation
     * @param gna_precision GNA inference precision
     * @param is_exception_allowed flag specifies whether exception is allowed
     * @return true if supported
     */
    bool is_op_supported(const std::shared_ptr<ov::Node>& node,
                         const InferenceEngine::Precision gna_precision,
                         bool is_exception_allowed = false);
    /**
     * @brief Check if all operations are supported by GNA
     * @param model ngraph model
     * @param gna_precision GNA inference precision
     */
    void check_all_ops_supported(const std::shared_ptr<ov::Model>& model,
                                 const InferenceEngine::Precision gna_precision);
    bool use_only_16bit_convolution_weights() const;
    bool is_crop_affined_offset(size_t numberOfElements) const;
    size_t get_memory_alignment() const;
    std::shared_ptr<cnn2d::AbstractValidator> get_cnn_validator() const;
    constexpr static uint32_t kBufferMaxSize = 65528;
    constexpr static uint32_t kConvMinFiltersNum = 4;
    constexpr static uint32_t kConvMaxFiltersNum = 65532;
    constexpr static uint32_t kConvDilationHeight = 1;
    constexpr static uint32_t kConvDilationWidth = 1;
    constexpr static uint32_t kConvFiltersNumDivider = 4;
    constexpr static uint32_t kConvFilterSizeDivider = 8;
    constexpr static uint32_t kConvFilterMaxSize = 768;
    constexpr static uint32_t kConvEachKernelByteAlignment = 16;
    constexpr static uint32_t kInputByteAlignment = 64;
    constexpr static uint32_t kNoOfInputsDivisor = 8;
    constexpr static uint32_t kNoOfInputsLowPrecDivisor = 16;
    constexpr static uint32_t kAffineMaxBatchSize = 8;
    constexpr static uint32_t kMaxPoolMaxWindowSize = 6;
    constexpr static uint32_t kCopyMaxGrouping = 8;
    constexpr static uint32_t kTransposeMaxSize = 65528;
    constexpr static uint32_t kMaxLayersCountGNA1_0 = 1023;
    constexpr static uint32_t kMaxLayersCountGNA2_0 = 4096;
    constexpr static uint32_t kMaxLayersCountGNA3_X = 8192;
    // Currently split layer only supports 2 bytes in int16 and int8 mode.
    // In fp32 mode this is not necessary but is useful for testing
    constexpr static uint32_t kBytesPerSplitElement = 2;
    // Currently crop layer only supports 2 bytes in int16 and int8 mode.
    // In fp32 mode this is not necessary but is useful for testing
    constexpr static uint32_t kBytesPerCropElement = 2;
    constexpr static uint32_t kMemoryPageSize = 4096;
 private:
    Limitations(const target::DeviceVersion& target);
    Limitations(const Limitations&) = delete;
    Limitations& operator=(const Limitations&) = delete;
    size_t get_memory_alignment_bytes(const target::DeviceVersion& target) const;
    IE_SUPPRESS_DEPRECATED_START
    static bool validate_concat_axis(const InferenceEngine::CNNLayerPtr layer, std::string& errMessage);
    IE_SUPPRESS_DEPRECATED_END
    bool m_use_only_16bit_conv_weights = false;
    size_t m_mem_alignment = 0;
    std::shared_ptr<cnn2d::AbstractValidator> m_cnn_validator;
    static thread_local std::shared_ptr<Limitations> k_instance;
 };
 inline std::shared_ptr<Limitations> Limitations::get_instance() {
    if (!k_instance) {
        THROW_GNA_EXCEPTION << "Limitations instance is not initialized.\n";
    }
    return k_instance;
 }
 inline bool Limitations::is_crop_affined_offset(size_t numberOfElements) const {
    const auto cropOffset = numberOfElements * kBytesPerCropElement;
    return (ALIGN64(cropOffset) != cropOffset);
 }
 inline size_t Limitations::get_memory_alignment() const {
    return m_mem_alignment;
 }
 inline std::shared_ptr<cnn2d::AbstractValidator> Limitations::get_cnn_validator() const {
    return m_cnn_validator;
 }
 }  // namespace limitations
 }  // namespace intel_gna
--- a/src/plugins/intel_gna/src/common/graph_utils.hpp
+++ b/src/plugins/intel_gna/src/common/graph_utils.hpp
@ -84,7 +84,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
        std::dynamic_pointer_cast<ngraph::opset8::VariadicSplit>(input_op)) {
        for (size_t index = 0; index < input_op_out_index; index++) {
            size_t outputSize = ngraph::shape_size(input_op->get_output_shape(index));
-            offset += outputSize * limitations::bytesPerSplitElement;
+            offset += outputSize * limitations::Limitations::kBytesPerSplitElement;
        }
    }
    return (offset == ALIGN64(offset));
@ -93,7 +93,7 @@ inline bool is_aligned_split(const std::shared_ptr<ngraph::Node> input_op, size_
 inline bool is_crop_affined(std::shared_ptr<ngraph::Node> node) {
    auto crop = std::dynamic_pointer_cast<ngraph::op::CropIE>(node);
    if (crop != nullptr && !crop->offset.empty()) {
-        return limitations::isCropAffinedOffset(crop->offset.back());
+        return limitations::Limitations::get_instance()->is_crop_affined_offset(crop->offset.back());
    }
    return false;
 }
--- a/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp
+++ b/src/plugins/intel_gna/src/frontend/layer_quantizer.cpp
@ -11,6 +11,7 @@
 namespace ov {
 namespace intel_gna {
 using namespace limitations;
 namespace frontend {
 template <class T>
@ -352,7 +353,7 @@ InferenceEngine::Precision GetWeightsPrecision(const LayerInfo& layer_info,
                                               const QuantizedLayerParams& quant_layer_params,
                                               const Config& gna_config) {
    if (((layer_info.isConvolution() || layer_info.isConvolutionFilter()) &&
-         limitations::cnn2d::UseOnly16BitConvolutionWeights(gna_config.target->get_effective_compile_target())) ||
+         Limitations::get_instance()->use_only_16bit_convolution_weights()) ||
        layer_info.isScaleShift()) {
        return InferenceEngine::Precision::I16;
    }
--- a/src/plugins/intel_gna/src/gna_device.cpp
+++ b/src/plugins/intel_gna/src/gna_device.cpp
@ -38,8 +38,7 @@ GNADeviceHelper::GNADeviceHelper(std::shared_ptr<Target> targetIn, bool isPerfor
    : target(targetIn),
      nGnaDeviceIndex{selectGnaDevice()},
      useDeviceEmbeddedExport(deviceEmbedded),
-      isPerformanceMeasuring(isPerformanceMeasuring),
+      isPerformanceMeasuring(isPerformanceMeasuring) {
      m_mem_alignment(limitations::getMemoryAlignmentBytes(targetIn->get_effective_compile_target())) {
    per_request_diagnostics = log::get_log_level() >= ov::log::Level::TRACE;
    per_model_diagnostics = log::get_log_level() >= ov::log::Level::DEBUG;
    open();
@ -573,7 +572,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
    switch (target->get_effective_execution_target()) {
    case DeviceVersion::GNA1_0:
    case DeviceVersion::GNA2_0:
-        return kMaxLayersCountGNA2_0;
+        return Limitations::kMaxLayersCountGNA2_0;
    case DeviceVersion::GNA3_0:
    case DeviceVersion::GNA3_1:
    case DeviceVersion::GNA3_5:
@ -581,7 +580,7 @@ uint32_t GNADeviceHelper::retrieveMaxLayersCount() {
    case DeviceVersion::GNA3_6:
    case DeviceVersion::GNA4_0:
    default:
-        return kMaxLayersCountGNA3_X;
+        return Limitations::kMaxLayersCountGNA3_X;
    }
 }
 }  // namespace intel_gna
--- a/src/plugins/intel_gna/src/gna_device.hpp
+++ b/src/plugins/intel_gna/src/gna_device.hpp
@ -67,7 +67,6 @@ class GNADeviceHelper : public GNADevice {
    uint64_t debugLogIndexRequestWait = 0;
    static constexpr const char* kDumpExt = ".bin";
    static constexpr const char* kDumpDelimiter = ".";
    const size_t m_mem_alignment;
 public:
    explicit GNADeviceHelper(std::shared_ptr<target::Target> target = std::make_shared<target::Target>(),
@ -128,10 +127,6 @@ public:
        return allAllocations;
    }
    size_t getMemAlignment() const {
        return m_mem_alignment;
    }
    /**
     * @see GNADevice::createModel()
     */
--- a/src/plugins/intel_gna/src/gna_graph_compiler.cpp
+++ b/src/plugins/intel_gna/src/gna_graph_compiler.cpp
@ -49,6 +49,7 @@ namespace intel_gna {
 using namespace frontend;
 using namespace common;
 using namespace memory;
 using namespace limitations;
 static bool CheckIFLastComponentIsPrecededByConv2D(const backend::DnnComponents::storage_type& components,
                                                   bool verify_with_pooling = true) {
@ -81,20 +82,22 @@ static uint32_t count_conv2D_input_width_for_expected_output_width(uint32_t expe
    return (expected_ouput_width - 1) * stride_width - 2 * padding_width + kernel_width;
 };
-GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config) : gna_config(gna_config) {}
+GNAGraphCompiler::GNAGraphCompiler(const Config& gna_config,
                                   std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
                                   std::shared_ptr<GnaInputs> inputs_ptr,
                                   std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator_ptr,
                                   std::shared_ptr<gna_memory_type> gna_mem_ptr)
    : gna_config(gna_config) {
    dnn = std::move(dnn_ptr);
    inputs_ptr_ = std::move(inputs_ptr);
    m_cnn2d_validator = std::move(cnn2d_validator_ptr);
    gnamem = std::move(gna_mem_ptr);
 }
 void GNAGraphCompiler::setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr) {
    this->gnamem = std::move(gnaMemPtr);
 }
 void GNAGraphCompiler::setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr) {
    this->dnn = std::move(dnnPtr);
 }
 void GNAGraphCompiler::setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr) {
    this->inputs_ptr_ = std::move(inputsPtr);
 }
 intel_dnn_component_t* GNAGraphCompiler::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
    if (current->insData.empty())
        return nullptr;
@ -228,13 +231,8 @@ void GNAGraphCompiler::fillSplitConnections(InferenceEngine::CNNLayerPtr layer)
    split_connection.emplace(id, layerInfoItem);
 }
 void GNAGraphCompiler::SetValidatorTarget(const target::DeviceVersion& target) {
    auto temp = limitations::cnn2d::AbstractValidator::Create(target);
    cnn2dValidator.reset(temp.release());
 }
 bool GNAGraphCompiler::ShouldUseOnlyConv2DGnaIface() const {
-    return cnn2dValidator && cnn2dValidator->ShouldUseOnlyConv2DGnaIface();
+    return m_cnn2d_validator && m_cnn2d_validator->ShouldUseOnlyConv2DGnaIface();
 }
 void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
@ -249,8 +247,8 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
                                     const uint32_t dilH,
                                     const uint32_t dilW,
                                     OvGnaType inPrecision) const {
-    if (cnn2dValidator) {
+    if (m_cnn2d_validator) {
-        if (cnn2dValidator->ValidateCnn1D(name,
+        if (m_cnn2d_validator->ValidateCnn1D(name,
                                             inHeight,
                                             inWidth,
                                             inChannels,
@ -265,7 +263,7 @@ void GNAGraphCompiler::ValidateCnn2D(const std::string& name,
                                             false)) {
            return;
        }
-        cnn2dValidator
+        m_cnn2d_validator
            ->ValidateCnn2D(name, inHeight, inWidth, inChannels, kH, kW, kN, strideH, strideW, dilH, dilW, inPrecision);
    } else {
        THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << name;
@ -277,8 +275,8 @@ void GNAGraphCompiler::ValidatePooling2D(const std::string& name,
                                         const uint32_t windowW,
                                         const uint32_t strideH,
                                         const uint32_t strideW) const {
-    if (cnn2dValidator) {
+    if (m_cnn2d_validator) {
-        cnn2dValidator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
+        m_cnn2d_validator->ValidatePooling2D(name, windowH, windowW, strideH, strideW);
    } else {
        THROW_GNA_EXCEPTION << "No Pooling2D validator found for layer " << name;
    }
@ -684,11 +682,11 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    // TODO add function
    // printConvolution2DLayer(convolution);
-    if (!cnn2dValidator) {
+    if (!m_cnn2d_validator) {
        THROW_GNA_EXCEPTION << "No Cnn2D validator found for layer " << convolution.name;
    }
-    cnn2dValidator->ValidateInputPadding(convolution.name,
+    m_cnn2d_validator->ValidateInputPadding(convolution.name,
                                            convolution._padding_y,
                                            convolution._pads_end_y,
                                            convolution._padding_x,
@ -713,7 +711,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    // have to pad input to let last kernel meets it's corresponding input
    const auto num_inputs = in_batch * effective_input_width * in_height * in_channels;
-    uint32_t num_input_padding = ALIGN(num_inputs, limitations::noOfInputsDivisor) - num_inputs;
+    uint32_t num_input_padding = ALIGN(num_inputs, Limitations::kNoOfInputsDivisor) - num_inputs;
    const uint32_t filter_n = convolution._out_depth;
@ -813,7 +811,7 @@ void GNAGraphCompiler::finalizeConvolution2DPrimitive(InferenceEngine::CNNLayerP
    // Kernel is extended only for 1D case which allows to add 0-s at the end of the kernel.
    const auto kernel_pad =
-        ALIGN(effective_single_kernel_size, limitations::convEachKernelByteAlignment) - effective_single_kernel_size;
+        ALIGN(effective_single_kernel_size, Limitations::kConvEachKernelByteAlignment) - effective_single_kernel_size;
    for (uint32_t k = 0; k < convolution._out_depth; k++) {
        uint8_t* ptr_filt_current = convolution._weights->cbuffer().as<uint8_t*>() + k * single_kernel_size;
        auto transposed_part = transposeMatrix(ptr_filt_current, convolution_precision, in_channels, kernelHW);
@ -846,14 +844,15 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto input = layer->insData[0].lock();
    auto outputs = *layer->outData.begin();
-    auto reshaped_dims = Get2DReshapedData(input, limitations::GetMinBatchToFitInBuffer(input), 8)->getDims();
+    auto reshaped_dims = Get2DReshapedData(input, Limitations::get_min_batch_to_fit_in_buffer(input), 8)->getDims();
-    const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
+    const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
-                                                                               : limitations::noOfInputsDivisor;
+                                               ? Limitations::kNoOfInputsLowPrecDivisor
                                               : Limitations::kNoOfInputsDivisor;
    uint32_t num_rows_in = reshaped_dims[1];
    uint32_t num_columns_in = reshaped_dims[0];
    uint32_t num_rows_out = num_rows_in;
    uint32_t num_columns_out = num_columns_in;
-    uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
+    uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
    size_t num_data_bytes_out = num_columns_out * (num_rows_out + num_padding) * outputs->getPrecision().size();
    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding) * input->getPrecision().size();
@ -1097,7 +1096,7 @@ void GNAGraphCompiler::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto inputs = layer->insData.begin()->lock();
    auto outputs = *layer->outData.begin();
-    auto reshaped_dims = Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)->getDims();
+    auto reshaped_dims = Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)->getDims();
    uint32_t num_rows_in = reshaped_dims[1];
    uint32_t num_columns_in = reshaped_dims[0];
    uint32_t num_rows_out = num_rows_in;
@ -1159,7 +1158,7 @@ void GNAGraphCompiler::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
    }
    // Concat axis validation
-    if (!limitations::ValidateConvConcatAxis(concatLayer)) {
+    if (!Limitations::validate_conv_concat_axis(concatLayer)) {
        std::ostringstream in_dims_oss;
        auto in_dims = concatLayer->insData[0].lock()->getDims();
        std::copy(in_dims.begin(), in_dims.end(), std::ostream_iterator<size_t>(in_dims_oss, ","));
@ -1270,10 +1269,10 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
        uint32_t num_columns_in = 1;
        uint32_t num_rows_out = InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims()));
-        const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision
+        const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
-                                               ? limitations::noOfInputsLowPrecDivisor
+                                                   ? Limitations::kNoOfInputsLowPrecDivisor
-                                               : limitations::noOfInputsDivisor;
+                                                   : Limitations::kNoOfInputsDivisor;
-        uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
+        uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
        void* ptr_inputs = nullptr;
        void* ptr_outputs = nullptr;
@ -1303,7 +1302,7 @@ void GNAGraphCompiler::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
            InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())) * 4;
        size_t num_data_bytes_in =
-            num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
+            num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
        connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
        connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1326,8 +1325,9 @@ void GNAGraphCompiler::SlicePrimitive(InferenceEngine::CNNLayerPtr layer) {
 void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto& eltwise = dynamic_cast<EltwiseLayer&>(*layer.get());
    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-    const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
+    const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
-                                                                               : limitations::noOfInputsDivisor;
+                                               ? Limitations::kNoOfInputsLowPrecDivisor
                                               : Limitations::kNoOfInputsDivisor;
    // for eltwise sum/sub in 16-bit precision one input should be 4 bytes and one 2 bytes - detecting that below
    // the names of variables are left for clarity although not always reflecting the real precision/size
@ -1409,7 +1409,7 @@ void GNAGraphCompiler::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
    uint32_t num_columns_in = 1;
    uint32_t num_rows_out = num_rows_in;
    uint32_t num_columns_out = num_columns_in;
-    uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
+    uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
    void* ptr_inputs = nullptr;
    void* ptr_outputs = nullptr;
@ -1518,7 +1518,6 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
    auto outputs = *layer->outData.begin();
    auto input1_precision = quantized ? Precision(Precision::I16) : input_1->getPrecision();
    auto input2_precision = quantized ? Precision(Precision::I16) : input_2->getPrecision();
    uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
    auto in_dims = input_1->getDims();
    auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1527,7 +1526,7 @@ void GNAGraphCompiler::GemmPrimitive(InferenceEngine::CNNLayerPtr layer) {
    const auto out_dims = outputs->getDims();
    const auto out_dims_size = ngraph::shape_size(out_dims);
    uint32_t num_rows_out = InferenceEngine::GetDimFromBack(out_dims, 1);
-    uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
+    uint32_t num_padding = ALIGN(num_rows_in, Limitations::kNoOfInputsDivisor) - num_rows_in;
    // Gemm gets two inputs
    void* ptr_input_1 = nullptr;  // the first input
@ -1578,7 +1577,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
    auto outputs = *layer->outData.begin();
    const auto out_dims = outputs->getDims();
    Precision inputPrecision;
-    uint32_t noOfInputsDivisor = limitations::noOfInputsDivisor;
+    uint32_t num_of_inputs_divisor = Limitations::kNoOfInputsDivisor;
    if (!quantized) {
        inputPrecision = inputs->getPrecision();
@ -1586,11 +1585,11 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
        inputPrecision = Precision(Precision::I16);
    } else {
        inputPrecision = Precision(Precision::I8);
-        noOfInputsDivisor = limitations::noOfInputsLowPrecDivisor;
+        num_of_inputs_divisor = Limitations::kNoOfInputsLowPrecDivisor;
    }
    auto input_data = HasTo2DReshapeData(layer)
-                          ? Get2DReshapedData(inputs, limitations::GetMinBatchToFitInBuffer(inputs), 8)
+                          ? Get2DReshapedData(inputs, Limitations::get_min_batch_to_fit_in_buffer(inputs), 8)
                          : inputs;
    auto in_dims = input_data->getDims();
    auto batch_size = (in_dims.size() == 1) ? 1 : in_dims.front();
@ -1598,7 +1597,7 @@ void GNAGraphCompiler::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool
    uint32_t num_columns_in = batch_size;
    uint32_t num_rows_out = isDiag ? num_rows_in : InferenceEngine::GetDimFromBack(out_dims, 1);
    uint32_t num_columns_out = num_columns_in;
-    uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
+    uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
    uint32_t num_padding_out = isDiag ? num_padding : 0;
    void* ptr_inputs = nullptr;
@ -1803,12 +1802,13 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
    auto outputs = *layer->outData.begin();
    auto inputs = layer->insData.begin()->lock();
-    const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
+    const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
-                                                                               : limitations::noOfInputsDivisor;
+                                               ? Limitations::kNoOfInputsLowPrecDivisor
                                               : Limitations::kNoOfInputsDivisor;
    uint32_t num_columns_in = GetDimFromBack(inputs->getDims(), 2);
    uint32_t num_rows_out = GetDimFromBack(outputs->getDims(), 1);
    uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
-    uint32_t num_padding = ALIGN(num_rows_in, noOfInputsDivisor) - num_rows_in;
+    uint32_t num_padding = ALIGN(num_rows_in, num_of_inputs_divisor) - num_rows_in;
    auto numRowsPadded = filterLayer->GetParamAsInt("num_rows_padded");
    // number of rows we handled by inserting copy layer
@ -1877,7 +1877,8 @@ void GNAGraphCompiler::ConcatAlignFilterPrimitive(InferenceEngine::CNNLayerPtr l
                             false);
    size_t num_data_bytes_out = num_rows_out * num_columns_in * outputs->getPrecision().size();
-    size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, noOfInputsDivisor) * inputs->getPrecision().size();
+    size_t num_data_bytes_in =
        num_columns_in * ALIGN(num_rows_in, num_of_inputs_divisor) * inputs->getPrecision().size();
    connectInput(layer, ptr_inputs, num_data_bytes_in, num_rows_copied * inputs->getPrecision().size(), 0);
    connectOutput(layer, ptr_outputs, num_data_bytes_out);
@ -1940,8 +1941,8 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
    auto outputs = *layer->outData.begin();
    auto inputs = layer->insData.begin()->lock();
-    const auto noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
+    const auto num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision ? Limitations::kNoOfInputsLowPrecDivisor
-                                                                           : limitations::noOfInputsDivisor;
+                                                                               : Limitations::kNoOfInputsDivisor;
    const uint32_t orginalInputSize =
        InferenceEngine::details::product(std::next(inputs->getDims().begin()), inputs->getDims().end());
    const uint32_t orginalOutputSize =
@ -1956,7 +1957,7 @@ void GNAGraphCompiler::ConvolutionFilterPrimitive(InferenceEngine::CNNLayerPtr l
    const auto filterWidth = filterLayer->_kernel_x;
    const auto minOutputsPerFilter = ALIGN(orginalOutputSize, numberOfFilters) / numberOfFilters;
    const auto minInputsNeeded = (minOutputsPerFilter - 1) * convolutionStride + filterWidth;
-    const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, noOfInputsDivisor);
+    const auto numInputsFullyPadedAndAligned = ALIGN(minInputsNeeded, num_of_inputs_divisor);
    auto numOutputs =
        gna_convolution_layer::outputFromConv(numInputsFullyPadedAndAligned, filterWidth, convolutionStride);
@ -2278,14 +2279,15 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
                                         << std::min(squeezedInputOrder[0], squeezedInputOrder[1]) << " > 8)";
    }
-    const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision ? limitations::noOfInputsLowPrecDivisor
+    const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
-                                                                               : limitations::noOfInputsDivisor;
+                                               ? Limitations::kNoOfInputsLowPrecDivisor
                                               : Limitations::kNoOfInputsDivisor;
    // now this can be run on GNA
    if (squeezedInputOrder[0] < squeezedInputOrder[1]) {  // interleave case
-        if (ALIGN(squeezedInputOrder[1], noOfInputsDivisor) != squeezedInputOrder[1]) {
+        if (ALIGN(squeezedInputOrder[1], num_of_inputs_divisor) != squeezedInputOrder[1]) {
            THROW_GNA_LAYER_EXCEPTION(layer)
-                << "unsupported permute (row size not a multiple of " << noOfInputsDivisor << ")";
+                << "unsupported permute (row size not a multiple of " << num_of_inputs_divisor << ")";
        } else {
            auto& currentComponent = dnnComponents.addComponent(layer->name, "interleave");
            dnn->InitInterleaveComponent(currentComponent,
@ -2299,9 +2301,9 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
        }
    } else {  // deinterleave case
-        if (ALIGN(squeezedInputOrder[0], noOfInputsDivisor) != squeezedInputOrder[0]) {
+        if (ALIGN(squeezedInputOrder[0], num_of_inputs_divisor) != squeezedInputOrder[0]) {
            THROW_GNA_LAYER_EXCEPTION(layer)
-                << "[GNA plugin] unsupported permute (column size not a multiple of " << noOfInputsDivisor << ")";
+                << "[GNA plugin] unsupported permute (column size not a multiple of " << num_of_inputs_divisor << ")";
        } else {
            auto& currentComponent = dnnComponents.addComponent(layer->name, "deinterleave");
            dnn->InitDeinterleaveComponent(currentComponent,
@ -2317,7 +2319,7 @@ void GNAGraphCompiler::PermutePrimitive(InferenceEngine::CNNLayerPtr layer) {
    size_t num_data_bytes_out =
        ALIGN(InferenceEngine::details::product(begin(outputs->getDims()), end(outputs->getDims())),
-              noOfInputsDivisor) *
+              num_of_inputs_divisor) *
        outputs->getPrecision().size();
    size_t num_data_bytes_in = squeezedInputOrder[0] * squeezedInputOrder[1] * inputs->getPrecision().size();
@ -2610,12 +2612,12 @@ ConnectionDetails GNAGraphCompiler::connectInput(CNNLayerPtr layer,
            // if request for allocation less that realTensorInput - we need to extend request
            auto minInput = inputs_ptr_->at(prevLayer->name).get_required_size();
            if (num_data_bytes_in < minInput) {
-                const uint32_t noOfInputsDivisor = gna_config.gnaFlags.input_low_precision
+                const uint32_t num_of_inputs_divisor = gna_config.gnaFlags.input_low_precision
-                                                       ? limitations::noOfInputsLowPrecDivisor
+                                                           ? Limitations::kNoOfInputsLowPrecDivisor
-                                                       : limitations::noOfInputsDivisor;
+                                                           : Limitations::kNoOfInputsDivisor;
                log::debug() << "[INPUT] : requested bytes: " << num_data_bytes_in << ", extended to"
-                             << ALIGN(minInput, noOfInputsDivisor);
+                             << ALIGN(minInput, num_of_inputs_divisor);
-                num_data_bytes_in = ALIGN(minInput, noOfInputsDivisor);
+                num_data_bytes_in = ALIGN(minInput, num_of_inputs_divisor);
            }
            // real allocation pointer will be kept in ptr not in ptr_inputs_global
--- a/src/plugins/intel_gna/src/gna_graph_compiler.hpp
+++ b/src/plugins/intel_gna/src/gna_graph_compiler.hpp
@ -54,20 +54,22 @@ private:
                                                uint32_t num_rows,
                                                uint32_t num_cols);
    std::unique_ptr<const limitations::cnn2d::AbstractValidator> cnn2dValidator;
    bool ShouldUseOnlyConv2DGnaIface() const;
    std::shared_ptr<limitations::cnn2d::AbstractValidator> m_cnn2d_validator;
 public:
    backend::DnnComponents dnnComponents;
    MemoryConnection memory_connection;
    ConcatConnection concat_connection;
    ConstConnections const_connections;
-    GNAGraphCompiler(const Config& gna_config);
+    GNAGraphCompiler(const Config& gna_config,
                     std::shared_ptr<backend::AMIntelDNN> dnn_ptr,
                     std::shared_ptr<GnaInputs> inputs_ptr,
                     std::shared_ptr<limitations::cnn2d::AbstractValidator> cnn2d_validator,
                     std::shared_ptr<gna_memory_type> gna_mem_ptr);
    void setGNAMemoryPtr(std::shared_ptr<gna_memory_type> gnaMemPtr);
    void setDNNPtr(std::shared_ptr<backend::AMIntelDNN> dnnPtr);
    void setInputsPtr(std::shared_ptr<GnaInputs> inputsPtr);
    void fillMemoryConnections(std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs);
@ -93,8 +95,6 @@ public:
                           const uint32_t strideH,
                           const uint32_t strideW) const;
    void SetValidatorTarget(const target::DeviceVersion& target);
    /**
     * Connects either memory output, or generic output to a layer
     * @param layer - layer pointer
--- a/src/plugins/intel_gna/src/gna_plugin.cpp
+++ b/src/plugins/intel_gna/src/gna_plugin.cpp
@ -29,6 +29,7 @@
 #include <vector>
 #include "backend/am_intel_dnn.hpp"
 #include "backend/gna_limitations.hpp"
 #include "common/gna_target.hpp"
 #include "frontend/model_quantizer.hpp"
 #include "frontend/scale_factor_calc.hpp"
@ -55,6 +56,7 @@
 #include "scale_factor_helper.hpp"
 #include "serial/gna_model_serial.hpp"
 using namespace ov::intel_gna::limitations;
 using namespace ov::intel_gna::graph_utils;
 inline uint32_t ToByteSize(const Gna2DataType type) {
@ -357,17 +359,23 @@ void GNAPlugin::PrePostProcess(InferenceEngine::Blob::Ptr input_blob,
    }
 }
-GNAPlugin::GNAPlugin() : graphCompiler(config) {
+GNAPlugin::GNAPlugin() {
    Init();
    UpdateFieldsFromConfig();
    InitGNADevice();
    Limitations::init(config.target->get_effective_compile_target());
    InitGNAMemory();
    InitGraphCompiler();
 }
-GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) : graphCompiler(config) {
+GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
    Init();
    SetConfig(configMap);
    log::set_log_level(gnaFlags->log_level);
    InitGNADevice();
    Limitations::init(config.target->get_effective_compile_target());
    InitGNAMemory();
    InitGraphCompiler();
 }
 void GNAPlugin::Init() {
@ -376,27 +384,36 @@ void GNAPlugin::Init() {
    gnaFlags = std::make_shared<GNAFlags>(GNAFlags());
    inputs_ptr_ = std::make_shared<GnaInputs>(GnaInputs());
    outputs_ = GnaOutputs();
    graphCompiler.setDNNPtr(dnn);
    graphCompiler.setInputsPtr(inputs_ptr_);
    requestWorkerPool_ = std::make_shared<request::WorkerPoolImpl>();
 }
 void GNAPlugin::InitGNADevice() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNADevice");
-    if (gnaFlags->sw_fp32) {
+
-        gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
+    if (!gnaFlags->sw_fp32) {
    } else {
        gnadevice = std::make_shared<GNADeviceHelper>(config.target,
                                                      gnaFlags->performance_counting,
                                                      !config.embedded_export_path.empty());
        gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
                                                     gnadevice->getMemAlignment(),
                                                     limitations::kMemoryPageSize);
    }
-    graphCompiler.setGNAMemoryPtr(gnamem);
+}
 void GNAPlugin::InitGNAMemory() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGNAMemory");
    if (gnaFlags->sw_fp32) {
        gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
    } else {
        gnamem = std::make_shared<gna_memory_device>(memory::GNAAllocator(gnadevice),
                                                     Limitations::get_instance()->get_memory_alignment(),
                                                     Limitations::kMemoryPageSize);
    }
 }
 void GNAPlugin::InitGraphCompiler() {
    OV_ITT_SCOPED_TASK(itt::domains::GNA_LT, "InitGraphCompiler");
    m_graph_compiler = std::make_shared<GNAGraphCompiler>(
        GNAGraphCompiler(config, dnn, inputs_ptr_, Limitations::get_instance()->get_cnn_validator(), gnamem));
 }
 void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network) {
@ -428,8 +445,7 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork& network
            GNAFakeQuantizeLayer fqLayer(next_layer);
            auto inputRange = fqLayer.getInputRange();
            auto outputRange = fqLayer.getOutputRange();
-            if (inputRange.second.size() != 1 || inputRange.second.size() != 1 || outputRange.second.size() != 1 ||
+            if (inputRange.second.size() != 1 || outputRange.second.size() != 1) {
                outputRange.second.size() != 1) {
                THROW_GNA_LAYER_EXCEPTION(next_layer)
                    << "unsupported, per-channel quantization for input layer : " << input.second->name();
            }
@ -552,12 +568,12 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
    };
    // probing gna_primitives
-    auto irLayerAvatar = std::find_if(graphCompiler.dnnComponents.components.begin(),
+    auto irLayerAvatar = std::find_if(m_graph_compiler->dnnComponents.components.begin(),
-                                      graphCompiler.dnnComponents.components.end(),
+                                      m_graph_compiler->dnnComponents.components.end(),
                                      [&layer](const backend::DnnComponents::storage_type::value_type& value) {
                                          return value.name == layer->name;
                                      });
-    if (irLayerAvatar != graphCompiler.dnnComponents.components.end()) {
+    if (irLayerAvatar != m_graph_compiler->dnnComponents.components.end()) {
        initOutput(irLayerAvatar->dnnComponent.orientation_out,
                   irLayerAvatar->dnnComponent.num_bytes_per_output,
                   irLayerAvatar->dnnComponent.num_rows_out,
@ -567,8 +583,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
    // probing concatInfo
    if (LayerInfo(layer).isConcat()) {
-        auto concatConnection = graphCompiler.concat_connection.find(layer->name);
+        auto concatConnection = m_graph_compiler->concat_connection.find(layer->name);
-        if (concatConnection != graphCompiler.concat_connection.end()) {
+        if (concatConnection != m_graph_compiler->concat_connection.end()) {
            auto precision = layer->outData.front()->getPrecision().size();
            initOutput(kDnnInterleavedOrientation,
                       precision,
@ -581,8 +597,8 @@ bool GNAPlugin::TryToInitOutput(const std::string& portName, InferenceEngine::CN
    // probing a constant info, for constant trivial networks support
    if (LayerInfo(layer).isConst()) {
        auto const_blob = layer->blobs["custom"];
-        auto constConnection = graphCompiler.const_connections.find(layer->name);
+        auto constConnection = m_graph_compiler->const_connections.find(layer->name);
-        if (constConnection != graphCompiler.const_connections.end()) {
+        if (constConnection != m_graph_compiler->const_connections.end()) {
            initOutput(kDnnInterleavedOrientation,
                       layer->outData.front()->getPrecision().size(),
                       const_blob->size(),
@ -696,16 +712,13 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
    _network_name = _network.getName();
    std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
    const auto effectiveCompileTarget = config.target->get_effective_compile_target();
    graphCompiler.SetValidatorTarget(effectiveCompileTarget);
    auto transformer = TransformationsPipeline(config);
    if (_network.getFunction()) {
        CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(_network);
        auto model = clonedNetwork.getFunction();
        transformer.apply(model, &m_input_output_subgraphs);
-        limitations::check_all_ops_supported(model, effectiveCompileTarget, config.gnaPrecision);
+        Limitations::get_instance()->check_all_ops_supported(model, config.gnaPrecision);
        convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(model, clonedNetwork);
    }
    IE_SUPPRESS_DEPRECATED_START
@ -717,7 +730,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
    //  Check the network
    std::string error;
-    if (!limitations::AreLayersSupported(network, error)) {
+    if (!Limitations::are_layers_supported(network, error)) {
        THROW_GNA_EXCEPTION << error.c_str();
    }
@ -805,17 +818,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
            memoryPairs[id][generic->GetParamAsInt("index")] = layer;
            continue;
        } else if (layerInfo.isConcat()) {
-            graphCompiler.fillConcatConnections(layer);
+            m_graph_compiler->fillConcatConnections(layer);
        } else if (layerInfo.isSplit() || layerInfo.isSlice()) {
-            graphCompiler.fillSplitConnections(layer);
+            m_graph_compiler->fillSplitConnections(layer);
        }
        sortedNoMem.push_back(layer);
    }
    // fill in extra storage with memory layers
-    graphCompiler.fillMemoryConnections(memoryPairs);
+    m_graph_compiler->fillMemoryConnections(memoryPairs);
-    if (!graphCompiler.memory_connection.empty() && gnaFlags->num_requests != 1) {
+    if (!m_graph_compiler->memory_connection.empty() && gnaFlags->num_requests != 1) {
        gnaFlags->num_requests = 1;
    }
@ -837,17 +850,17 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
    // Creating Layer primitives
    for (auto& layer : sortedNoMem) {
-        graphCompiler.CreateLayerPrimitive(layer);
+        m_graph_compiler->CreateLayerPrimitive(layer);
    }
    for (auto& inputLayer : inputLayers) {
        auto layerInfo = LayerInfo(inputLayer);
        if (layerInfo.isInput() && 0 == inputs_ptr_->at(inputLayer->name).get_allocated_size()) {
-            graphCompiler.connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
+            m_graph_compiler->connectOutput(inputLayer, &inputs_ptr_->at(inputLayer->name).ptrs.front(), 0);
        }
    }
-    if (graphCompiler.dnnComponents.components.empty()) {
+    if (m_graph_compiler->dnnComponents.components.empty()) {
        log::warning() << "No GNA primitives created based on topology. This might indicate trivial topology\n";
        trivialTopology = true;
    }
@ -861,7 +874,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
        // Memory layers are not dnnComponents hence we need to make switch with identity layer
        if (outLayer->type == "Memory") {
            // traverse memory connection to find corresponding output_memory
-            for (auto&& memConnection : graphCompiler.memory_connection) {
+            for (auto&& memConnection : m_graph_compiler->memory_connection) {
                if (memConnection.second.getInput()->name == outLayer->name) {
                    // if connection is found, replace memory input layer with memory output layer
                    outLayer = memConnection.second.getOutput();
@ -909,11 +922,11 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
    dnn->Init(gnamem.get(), gnaFlags->sw_fp32 ? kDnnFloat : kDnnInt, 1);
    // TODO: this copy is unneeded; in fact, we can directly create gna structs from list
-    auto execOrder = graphCompiler.dnnComponents.getExecutionOrder();
+    auto execOrder = m_graph_compiler->dnnComponents.getExecutionOrder();
    dnn->component.insert(dnn->component.begin(), execOrder.begin(), execOrder.end());
    // in fp32 mode last PWL cannot be computed without that
-    if (!graphCompiler.dnnComponents.components.empty()) {
+    if (!m_graph_compiler->dnnComponents.components.empty()) {
        dnn->InitActiveList(NULL);
    }
@ -965,7 +978,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
    for (auto& inputLayer : inputLayers) {
        if (LayerInfo(inputLayer).isInput()) {
            ov::intel_gna::helpers::updateModelInputOrientationWithoutConvolution(*inputLayer,
-                                                                                  graphCompiler.dnnComponents,
+                                                                                  m_graph_compiler->dnnComponents,
                                                                                  *inputs_ptr_);
        }
    }
@ -976,7 +989,7 @@ void GNAPlugin::LoadNetwork(const CNNNetwork& _network) {
        if (outLayer && LayerInfo(outLayer).isOutput()) {
            ov::intel_gna::helpers::updateModelOutputOrientation(outPort.first,
                                                                 outLayer->name,
-                                                                 graphCompiler.dnnComponents,
+                                                                 m_graph_compiler->dnnComponents,
                                                                 outputs_);
        }
    }
@ -1101,7 +1114,7 @@ void GNAPlugin::DumpXNNToFile() const {
 uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result) {
    auto freeWorker = requestWorkerPool_->findFreeModelWorker();
    if (freeWorker == nullptr) {
-        if (!graphCompiler.memory_connection.empty()) {
+        if (!m_graph_compiler->memory_connection.empty()) {
            Wait(requestWorkerPool_->firstWorker().representingIndex());
            freeWorker = requestWorkerPool_->findFreeModelWorker();
            if (freeWorker == nullptr) {
@ -1412,7 +1425,7 @@ RequestStatus GNAPlugin::WaitFor(uint32_t request_idx, int64_t millisTimeout) {
 }
 void GNAPlugin::Reset() {
-    graphCompiler.Reset();
+    m_graph_compiler->Reset();
 }
 bool GNAPlugin::Infer(const InferenceEngine::Blob& input, InferenceEngine::Blob& output) {
@ -1479,9 +1492,9 @@ Blob::Ptr GNAPlugin::GetInputBlob(const std::string& name, InferenceEngine::Prec
 }
 std::vector<InferenceEngine::IVariableStateInternal::Ptr> GNAPlugin::QueryState() {
-    if (memoryStates.size() != graphCompiler.memory_connection.size()) {
+    if (memoryStates.size() != m_graph_compiler->memory_connection.size()) {
        memoryStates.clear();
-        for (auto& connection : graphCompiler.memory_connection) {
+        for (auto& connection : m_graph_compiler->memory_connection) {
            auto state =
                std::make_shared<memory::GNAVariableState>(connection.first,
                                                           std::make_shared<GNAMemoryLayer>(connection.second));
@ -1575,7 +1588,7 @@ InferenceEngine::IExecutableNetworkInternal::Ptr GNAPlugin::ImportNetwork(std::i
        GNAMemoryLayer memoryLayer(nullptr, nullptr, gnaFlags->sw_fp32 ? 4 : 2);
        std::string name;
        std::tie(memoryLayer.gna_ptr, memoryLayer.reserved_size, name, memoryLayer.scale_factor) = memory;
-        graphCompiler.memory_connection.emplace_back(make_pair(name, memoryLayer));
+        m_graph_compiler->memory_connection.emplace_back(make_pair(name, memoryLayer));
    }
    // TODO update documenation to allow exporting tlv with importing cep only for sue creek
@ -1607,7 +1620,7 @@ void GNAPlugin::Export(std::ostream& outStream) {
                      .SetInputRotation(transpose_inputs_info)
                      .SetOutputRotation(transpose_outputs_info);
-    for (auto&& memoryConnection : graphCompiler.memory_connection) {
+    for (auto&& memoryConnection : m_graph_compiler->memory_connection) {
        auto state =
            std::make_shared<memory::GNAVariableState>(memoryConnection.first,
                                                       std::make_shared<GNAMemoryLayer>(memoryConnection.second));
@ -1691,7 +1704,6 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
    Config qn_config(config);
    qn_config.UpdateFromMap(config_map);
    const auto effectiveCompileTarget = qn_config.target->get_effective_compile_target();
    auto model = network.getFunction();
    if (model) {
        auto supported = GetSupportedNodes(
@ -1700,7 +1712,8 @@ InferenceEngine::QueryNetworkResult GNAPlugin::QueryNetwork(
                TransformationsPipeline(qn_config).apply(model);
            },
            [&](const std::shared_ptr<ngraph::Node>& op) {
-                return limitations::is_op_supported(op, effectiveCompileTarget, qn_config.gnaPrecision);
+                const auto res = Limitations::get_instance()->is_op_supported(op, qn_config.gnaPrecision);
                return res;
            });
        for (auto&& op_name : supported) {
            res.supportedLayersMap.emplace(op_name, GetName());
--- a/src/plugins/intel_gna/src/gna_plugin.hpp
+++ b/src/plugins/intel_gna/src/gna_plugin.hpp
@ -47,8 +47,7 @@ protected:
    std::shared_ptr<gna_memory_type> gnamem;
    std::shared_ptr<GnaInputs> inputs_ptr_;
    GnaOutputs outputs_;
-
+    std::shared_ptr<GNAGraphCompiler> m_graph_compiler;
    GNAGraphCompiler graphCompiler;
    uint32_t activeLayerIndex = 0xffffffff;
    // TODO: transpose_inputs_info and transpose_outputs_info should be moved to GNAModelSerial class when ngraph
@ -189,6 +188,8 @@ protected:
    void Init();
    void InitGNADevice();
    void InitGNAMemory();
    void InitGraphCompiler();
    void DumpXNNToFile() const;
    /**
--- a/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
+++ b/src/plugins/intel_gna/src/gna_transformations_pipeline.cpp
@ -83,11 +83,9 @@ void TransformationsPipeline::apply(const std::shared_ptr<ov::Model>& model,
    manager.register_pass<ov::pass::LSTMCellDecomposition>();
    manager.register_pass<ov::intel_gna::pass::ConvertDWSCToScaleShifts>();
    manager.register_pass<ov::intel_gna::pass::ConvertPaddedToValidConv>();
-    manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(effective_compile_target,
+    manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(config.gnaPrecision);
-                                                                                    config.gnaPrecision);
+    manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(config.gnaPrecision);
-    manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(effective_compile_target,
+    manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(config.gnaPrecision);
                                                                                  config.gnaPrecision);
    manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(effective_compile_target, config.gnaPrecision);
    if (!has_convolution) {
        manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithFqToPointWiseConvolution>();
        manager.register_pass<ov::intel_gna::pass::ConvertMatmulWithBiasToPointWiseConvolution>();
--- a/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_layer_info.hpp
@ -385,7 +385,7 @@ public:
        auto cropLayer = dynamic_cast<InferenceEngine::CropLayer*>(layer);
        if (cropLayer != nullptr && !cropLayer->offset.empty()) {
            const auto crop_params = GetCropParams(cropLayer);
-            return limitations::isCropAffinedOffset(crop_params.start_offset);
+            return limitations::Limitations::get_instance()->is_crop_affined_offset(crop_params.start_offset);
        }
        return false;
    }
--- a/src/plugins/intel_gna/src/layers/gna_split_layer.hpp
+++ b/src/plugins/intel_gna/src/layers/gna_split_layer.hpp
@ -50,7 +50,7 @@ public:
 // @brief Returns sizes of split outputs to split the input tensor to aligned parts not greater than the specified size
 inline std::vector<uint32_t> GetAlignedSplitSizes(uint32_t totalSize,
                                                  uint32_t maxSplitSize,
-                                                  uint32_t alignment = limitations::inputByteAlignment) {
+                                                  uint32_t alignment = limitations::Limitations::kInputByteAlignment) {
    std::vector<uint32_t> splitSizes;
    uint32_t maxAlignedSplitSize = std::max(maxSplitSize - maxSplitSize % alignment, alignment);
    uint32_t usedSize = 0;
@ -73,7 +73,7 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
    IE_ASSERT(firstValuableDim != std::end(dims));
    auto splittedElementsSize = *firstValuableDim;
    auto splittedDimIx = std::distance(std::begin(dims), firstValuableDim);
-    auto alignment = limitations::inputByteAlignment;
+    auto alignment = limitations::Limitations::kInputByteAlignment;
    // Split output size should be multiple by 64 to avoid align filters insertion,
    // but we need to check if our input size to split exceeds 64; if not we can always
@ -85,8 +85,9 @@ inline std::pair<int64_t, std::vector<uint32_t>> AlignedSplitSizesPerAxis(Infere
            return {splittedDimIx, splitSizes};
        }
    }
-    splitSizes = GetAlignedSplitSizes(splittedElementsSize,
+    splitSizes =
-                                      limitations::bufferMaxSize * splittedElementsSize / totalElementsSize,
+        GetAlignedSplitSizes(splittedElementsSize,
                             limitations::Limitations::kBufferMaxSize * splittedElementsSize / totalElementsSize,
                             alignment);
    return {splittedDimIx, splitSizes};
 }
--- a/src/plugins/intel_gna/src/log/dump.cpp
+++ b/src/plugins/intel_gna/src/log/dump.cpp
@ -15,11 +15,14 @@
 #include <string>
 #include <vector>
 #include "backend/gna_limitations.hpp"
 #include "gna2-model-api.h"
 #include "gna2_model_helper.hpp"
 #include "gna_device.hpp"
 #include "log.hpp"
 using namespace ov::intel_gna::limitations;
 namespace ov {
 namespace intel_gna {
 namespace dump {
@ -486,8 +489,9 @@ void DumpGna2Model(const Gna2Model& gnaModel,
            }
            dumpFile << "\tOperand " << j << " (" << GetOperandName(operation.Type, j) << ")"
                     << " type: " << GetOperandType(operand.Type) << " shape: " << GetSimpleString(operand.Shape)
-                     << " tag: " << foundName << " offset: " << offset
+                     << " tag: " << foundName << " offset: " << offset << " size: "
-                     << " size: " << Gna2RoundUpTo64(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)))
+                     << Gna2RoundUp(GetGnaShapeSize(operand.Shape, GetTypeByteSize(operand.Type)),
                                    Limitations::get_instance()->get_memory_alignment())
                     << " data: " << operand.Data << " baseAlloc: " << foundPtr << " layout: ";
            DumpCharArray(dumpFile, operand.Layout, GNA2_SHAPE_MAXIMUM_NUMBER_OF_DIMENSIONS);
--- a/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
+++ b/src/plugins/intel_gna/src/optimizer/gna_pass_manager.cpp
@ -50,6 +50,7 @@ using namespace InferenceEngine::details;
 using namespace ov::intel_gna::frontend;
 using namespace ov::intel_gna::common;
 using namespace ov::intel_gna::pre_post_processing;
 using namespace ov::intel_gna::limitations;
 namespace ov {
 namespace intel_gna {
@ -149,10 +150,11 @@ static void insertDiagonalLayerBetween(InferenceEngine::CNNLayerPtr prevLayer,
        return LayerInfo(ptr).isNonValuesChangable();
    });
    IE_ASSERT(inputLayer != nullptr);
-    size_t weightsSize =
+    size_t weightsSize = LayerInfo(prevLayer).has32BOutput()
        LayerInfo(prevLayer).has32BOutput()
                             ? nextLayer->outData[0]->getDims().back()
-            : Get2DReshapedData(nextLayer->outData[0], limitations::GetMinBatchToFitInBuffer(nextLayer->outData[0]), 8)
+                             : Get2DReshapedData(nextLayer->outData[0],
                                                 Limitations::get_min_batch_to_fit_in_buffer(nextLayer->outData[0]),
                                                 8)
                                   ->getDims()[1];
    std::vector<float> weightsValues(weightsSize, fillValue);
    IE_ASSERT(diagLayer != nullptr);
@ -1531,19 +1533,19 @@ void InsertSplitAligningFilterPass::run() {
                    // encodes offset to beginning of split layer input
                    filterLayer->params["offset"] =
-                        std::to_string(aligned64_offset / limitations::bytesPerSplitElement);
+                        std::to_string(aligned64_offset / Limitations::kBytesPerSplitElement);
                    auto dims = splitOutput->getTensorDesc().getDims();
                    if (dims.size() > 3) {
                        THROW_GNA_EXCEPTION << "unsupported split layer dims size: " << dims.size();
                    }
                    const auto offsetOfUnalignment =
-                        (currentOffset - aligned64_offset) / limitations::bytesPerSplitElement;
+                        (currentOffset - aligned64_offset) / Limitations::kBytesPerSplitElement;
                    // TODO consider to use a different number of filters do decrese the number of trailing zeros
                    // (additionalPaddingOfFilter)
-                    const auto numberOfFilters = limitations::convMinFiltersNum;
+                    const auto numberOfFilters = Limitations::kConvMinFiltersNum;
                    const auto filterSize =
-                        ALIGN(offsetOfUnalignment + numberOfFilters, limitations::convFilterSizeDivider);
+                        ALIGN(offsetOfUnalignment + numberOfFilters, Limitations::kConvFilterSizeDivider);
                    // filterWeights: numberOfFilters X (offsetOfUnalignment + additionalPaddingOfFilter +
                    // numberOfFilters) offsetOfUnalignment - the leading zeros in the filter
@ -1598,7 +1600,7 @@ void InsertSplitAligningFilterPass::run() {
            }
            // search data that starts from unaligned location
-            currentOffset += outputSize * limitations::bytesPerSplitElement;
+            currentOffset += outputSize * Limitations::kBytesPerSplitElement;
            splitOutIndex++;
        }
    }
@ -1636,7 +1638,7 @@ void EltwiseSplitOverChannelsPass::run() {
        auto oData = l->outData.front();
        auto oDims = oData->getDims();
        auto totalElementsSize = details::product(std::begin(oDims), std::end(oDims));
-        if (totalElementsSize <= limitations::bufferMaxSize) {
+        if (totalElementsSize <= Limitations::kBufferMaxSize) {
            continue;
        }
        auto splitSizesPerAxis = AlignedSplitSizesPerAxis(oDims);
@ -1747,8 +1749,9 @@ void SubstituteScaleShiftBroadCastPass::run() {
        if (was_reshaped) {
            dataDims = reshaped_data[insData->getName()];
        } else {
-            dataDims = HasTo2DReshapeData(l)
+            dataDims =
-                           ? Get2DReshapedData(insData, limitations::GetMinBatchToFitInBuffer(insData), 8)->getDims()
+                HasTo2DReshapeData(l)
                    ? Get2DReshapedData(insData, Limitations::get_min_batch_to_fit_in_buffer(insData), 8)->getDims()
                    : insData->getDims();
        }
--- a/src/plugins/intel_gna/src/runtime/cnn.cpp
+++ b/src/plugins/intel_gna/src/runtime/cnn.cpp
@ -17,6 +17,7 @@
 #include "log/debug.hpp"
 using namespace ov::intel_gna::gna_convolution_layer;
 using namespace ov::intel_gna::limitations;
 void CNNFilter32(intel_dnn_component_t* component) {
    auto filters = reinterpret_cast<float*>(component->op.conv1D.ptr_filters);
@ -306,7 +307,7 @@ void CNN2DFilter32(intel_dnn_component_t* component) {
            }
        }
        // kernel padded to 16B = 4 * sizeof(float)
-        kernelIndex += ALIGN(kh * kw * kc, ov::intel_gna::limitations::convEachKernelByteAlignment / sizeof(float));
+        kernelIndex += ALIGN(kh * kw * kc, Limitations::kConvEachKernelByteAlignment / sizeof(float));
    }
 }
--- a/src/plugins/intel_gna/src/transformations/convert_matmul_to_pointwise_convolution.cpp
+++ b/src/plugins/intel_gna/src/transformations/convert_matmul_to_pointwise_convolution.cpp
@ -15,6 +15,7 @@
 using namespace ov::intel_gna;
 using namespace ov::intel_gna::pass;
 using namespace ov::intel_gna::limitations;
 static bool BiasValidation(const ngraph::Output<ngraph::Node>& output) {
    auto bias_output_shape = output.get_node()->get_output_shape(0);
@ -49,9 +50,9 @@ static std::tuple<bool, uint32_t, uint32_t, uint32_t> VerifyAndGetConvParams(
    const uint32_t width = input1_shape.front();
    const uint32_t in_channels = input2_shape.back();
    const uint32_t out_channels = input2_shape.front();
-    if (input1_shape.front() <= limitations::affineMaxBatchSize ||
+    if (input1_shape.front() <= Limitations::kAffineMaxBatchSize ||
-        out_channels % limitations::convFiltersNumDivider != 0 || out_channels > limitations::convMaxFiltersNum ||
+        out_channels % Limitations::kConvFiltersNumDivider != 0 || out_channels > Limitations::kConvMaxFiltersNum ||
-        in_channels > limitations::convFilterMaxSize) {
+        in_channels > Limitations::kConvFilterMaxSize) {
        return std::make_tuple(false, 0, 0, 0);
    }
--- a/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.cpp
+++ b/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.cpp
@ -20,6 +20,7 @@
 namespace ov {
 namespace intel_gna {
 using namespace target;
 using namespace limitations;
 namespace pass {
 using namespace helper;
@ -55,7 +56,7 @@ static bool VerifyAndGetConvData(std::shared_ptr<ngraph::opset7::Convolution> co
    size_t filter_height = filters.get_shape()[2];
    size_t filter_width = filters.get_shape()[3];
-    if (filter_width > limitations::copyMaxGrouping || filter_height > limitations::copyMaxGrouping) {
+    if (filter_width > Limitations::kCopyMaxGrouping || filter_height > Limitations::kCopyMaxGrouping) {
        return false;
    }
@ -76,7 +77,7 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
         (max_pool->get_auto_pad() != ngraph::op::PadType::EXPLICIT ||
          max_pool->get_pads_begin() != ngraph::Shape({0, 0}) || max_pool->get_pads_end() != ngraph::Shape({0, 0}))) ||
        pool_filter.size() != 2 || pool_strides.size() != 2 || pool_filter[0] > 1 || pool_strides[0] > 1 ||
-        pool_filter[0] > limitations::maxPoolMaxWindowSize)
+        pool_filter[0] > Limitations::kMaxPoolMaxWindowSize)
        return false;
    graph_data.pool_size_width = pool_filter[1];
@ -84,16 +85,15 @@ static bool VerifyMaxPool(GraphData& graph_data, std::shared_ptr<ngraph::opset7:
    return true;
 }
-static bool GNA30SupportedConv(const DeviceVersion& compile_target,
+static bool GNA30SupportedConv(const InferenceEngine::Precision& gnaPrecision,
                               const InferenceEngine::Precision& gnaPrecision,
                               const GraphData& graph_data,
                               const ConvData& conv_data) {
-    const auto cnn2dValidatorPtr = limitations::cnn2d::AbstractValidator::Create(compile_target);
+    const auto cnn2dValidatorPtr = Limitations::get_instance()->get_cnn_validator();
    if (!cnn2dValidatorPtr) {
        return false;
    }
-    const auto& cnn2dValidator = *cnn2dValidatorPtr;
+    const auto cnnIsValid = cnn2dValidatorPtr->ValidateCnn2D(graph_data.conv->get_friendly_name(),
    const auto cnnIsValid = cnn2dValidator.ValidateCnn2D(graph_data.conv->get_friendly_name(),
                                                             conv_data.input_height,
                                                             conv_data.input_width,
                                                             conv_data.input_channel_count,
@ -112,7 +112,7 @@ static bool GNA30SupportedConv(const DeviceVersion& compile_target,
    if (!graph_data.max_pool) {
        return true;
    }
-    const auto poolingValid = cnn2dValidator.ValidatePooling2D(graph_data.conv->get_friendly_name(),
+    const auto poolingValid = cnn2dValidatorPtr->ValidatePooling2D(graph_data.conv->get_friendly_name(),
                                                                   graph_data.max_pool->get_kernel()[0],
                                                                   graph_data.max_pool->get_kernel()[1],
                                                                   graph_data.max_pool->get_strides()[0],
@ -126,7 +126,7 @@ static size_t CalculateConvCount(const ConvData& conv_data) {
    size_t conv_count = 1;
    size_t total_factorized_conv_channel_count =
        (conv_data.input_channel_count * conv_data.filter_height * conv_data.filter_width);
-    while (total_factorized_conv_channel_count / conv_count > limitations::convFilterMaxSize ||
+    while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
           total_factorized_conv_channel_count % conv_count != 0 || conv_data.filter_channel_count % conv_count != 0)
        conv_count++;
@ -139,7 +139,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvData& conv_data) {
    // Concat (copy) layer limitation allows to split up to a certain limit
    // Currently we are able to split only convolutions without pooling in horizontal dimension
-    if (graph_data.conv_count > limitations::copyMaxGrouping ||
+    if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
        ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
        return false;
@ -561,8 +561,7 @@ static void Decompose(const GraphData& graph_data, ConvData& conv_data) {
    conv_result->set_friendly_name(conv_result_name);
 }
-static bool Convert(const DeviceVersion& compile_target,
+static bool Convert(const InferenceEngine::Precision& gnaPrecision,
                    const InferenceEngine::Precision& gnaPrecision,
                    std::shared_ptr<ngraph::Node> leading_transpose,
                    std::shared_ptr<ngraph::Node> fq_filters,
                    std::shared_ptr<ngraph::Node> conv,
@ -598,7 +597,7 @@ static bool Convert(const DeviceVersion& compile_target,
        return false;
    // If compile target is GNA 3.0 and the convolution is supported on it, then skip decomposition
-    if (GNA30SupportedConv(compile_target, gnaPrecision, graph_data, conv_data))
+    if (GNA30SupportedConv(gnaPrecision, graph_data, conv_data))
        return false;
    // We are looking for Transpose(NHWC->NCHW) => Conv => Transpose(NCHW->NHWC)
@ -618,7 +617,7 @@ static bool Convert(const DeviceVersion& compile_target,
    return true;
 }
-Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision) {
+Decompose2DConv::Decompose2DConv(const InferenceEngine::Precision& gnaPrecision) {
    MATCHER_SCOPE(Decompose2DConv);
    auto const_input = ngraph::pattern::wrap_type<ngraph::opset7::Constant>();
@ -735,8 +734,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
            }
        }
-        return Convert(compile_target,
+        return Convert(gnaPrecision,
                       gnaPrecision,
                       pattern_map.at(leading_transpose).get_node_shared_ptr(),
                       fq_filters_node,
                       pattern_map.at(conv).get_node_shared_ptr(),
@ -755,8 +753,7 @@ Decompose2DConv::Decompose2DConv(const DeviceVersion& compile_target, const Infe
    this->register_matcher(m, callback);
 }
-Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const DeviceVersion& compile_target,
+Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision) {
                                                                     const InferenceEngine::Precision& gnaPrecision) {
    MATCHER_SCOPE(Decompose2DConvTransposedWithBias);
    auto const_input_i64 =
@ -781,8 +778,7 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
                                                   pattern_map.at(bias).get_node_shared_ptr())))
            return false;
-        return Convert(compile_target,
+        return Convert(gnaPrecision,
                       gnaPrecision,
                       pattern_map.at(leading_transpose).get_node_shared_ptr(),
                       nullptr,
                       pattern_map.at(conv).get_node_shared_ptr(),
@ -802,7 +798,6 @@ Decompose2DConvTransposedWithBias::Decompose2DConvTransposedWithBias(const Devic
 }
 Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
    const DeviceVersion& compile_target,
    const InferenceEngine::Precision& gnaPrecision) {
    MATCHER_SCOPE(Decompose2DConvTransposedWithBiasAF);
@ -836,8 +831,7 @@ Decompose2DConvTransposedWithBiasAF::Decompose2DConvTransposedWithBiasAF(
                                                   pattern_map.at(bias).get_node_shared_ptr())))
            return false;
-        return Convert(compile_target,
+        return Convert(gnaPrecision,
                       gnaPrecision,
                       pattern_map.at(leading_transpose).get_node_shared_ptr(),
                       nullptr,
                       pattern_map.at(conv).get_node_shared_ptr(),
--- a/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.hpp
+++ b/src/plugins/intel_gna/src/transformations/decompose_2d_convolution.hpp
@ -35,7 +35,7 @@ namespace pass {
 class Decompose2DConv : public ngraph::pass::MatcherPass {
 public:
    OPENVINO_RTTI("Decompose2DConv", "0");
-    Decompose2DConv(const target::DeviceVersion& compile_target, const InferenceEngine::Precision& gnaPrecision);
+    Decompose2DConv(const InferenceEngine::Precision& gnaPrecision);
 };
 /**
@ -56,8 +56,7 @@ public:
 class Decompose2DConvTransposedWithBias : public ngraph::pass::MatcherPass {
 public:
    OPENVINO_RTTI("Decompose2DConvTransposedWithBias", "0");
-    Decompose2DConvTransposedWithBias(const target::DeviceVersion& compile_target,
+    Decompose2DConvTransposedWithBias(const InferenceEngine::Precision& gnaPrecision);
                                      const InferenceEngine::Precision& gnaPrecision);
 };
 /**
@ -80,8 +79,7 @@ public:
 class Decompose2DConvTransposedWithBiasAF : public ngraph::pass::MatcherPass {
 public:
    OPENVINO_RTTI("Decompose2DConvTransposedWithBiasAF", "0");
-    Decompose2DConvTransposedWithBiasAF(const target::DeviceVersion& compile_target,
+    Decompose2DConvTransposedWithBiasAF(const InferenceEngine::Precision& gnaPrecision);
                                        const InferenceEngine::Precision& gnaPrecision);
 };
 }  // namespace pass
--- a/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp
+++ b/src/plugins/intel_gna/src/transformations/decompose_mvn.cpp
@ -13,6 +13,7 @@
 #include "backend/gna_limitations.hpp"
 using namespace ngraph;
 using namespace ov::intel_gna::limitations;
 namespace ov {
 namespace intel_gna {
@ -81,7 +82,7 @@ static bool GetVerifiedMVNData(const std::shared_ptr<opset8::MVN> mvn, MVNData&
    // Check if average must be split
    mvn_data.num_parts = 1;
-    while (mvn_data.W / mvn_data.num_parts > limitations::convFilterMaxSize) {
+    while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
        mvn_data.num_parts *= 2;
    }
--- a/src/plugins/intel_gna/src/transformations/handle_transposes_around_matmul.cpp
+++ b/src/plugins/intel_gna/src/transformations/handle_transposes_around_matmul.cpp
@ -16,6 +16,7 @@
 #include "backend/gna_limitations.hpp"
 using namespace ov::intel_gna::pass;
 using namespace ov::intel_gna::limitations;
 namespace {
@ -160,7 +161,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
            }
            if (prev_node) {
-                if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) {
+                if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
                    InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
                }
            }
@ -170,7 +171,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
        auto iter = pattern_map.find(fq);
        if (iter != pattern_map.end() || (iter = pattern_map.find(constant)) != pattern_map.end()) {
            auto prev_node = iter->second.get_node_shared_ptr();
-            if (limitations::IsTranspose2d(prev_node->get_output_shape(0))) {
+            if (Limitations::is_transpose_2d(prev_node->get_output_shape(0))) {
                InsertTranspose(prev_node, prev_node->get_friendly_name(), true);
            }
        }
@ -187,7 +188,7 @@ HandleTransposeBeforeMatMul::HandleTransposeBeforeMatMul() {
            }
            if (prev_node) {
-                if (limitations::IsTransposeSupported(prev_node->get_output_shape(0))) {
+                if (Limitations::is_transpose_supported(prev_node->get_output_shape(0))) {
                    InsertTranspose(prev_node, matmul_node->get_friendly_name(), true);
                }
            }
@ -243,7 +244,7 @@ HandleTransposeAfterMatMul::HandleTransposeAfterMatMul() {
            ReplaceTransposeWithReshape(transpose_it->second.get_node_shared_ptr());
        } else {
            auto reshape_node = pattern_map.at(reshape).get_node_shared_ptr();
-            if (!limitations::IsTransposeSupported(reshape_node->get_input_shape(0)))
+            if (!Limitations::is_transpose_supported(reshape_node->get_input_shape(0)))
                return false;
            auto iter = pattern_map.find(act);
            if (iter == pattern_map.end() && (iter = pattern_map.find(fq2)) == pattern_map.end() &&
--- a/src/plugins/intel_gna/src/transformations/remove_in_out_processing.cpp
+++ b/src/plugins/intel_gna/src/transformations/remove_in_out_processing.cpp
@ -4,6 +4,7 @@
 #include "transformations/remove_in_out_processing.hpp"
 #include "backend/gna_limitations.hpp"
 #include "common/graph_utils.hpp"
 #include "openvino/cc/pass/itt.hpp"
 #include "openvino/opsets/opset1.hpp"
@ -17,6 +18,7 @@
 using namespace ov::opset10;
 using namespace ov::intel_gna::pass;
 using namespace ov::intel_gna::limitations;
 namespace {
@ -29,7 +31,7 @@ inline bool is_preprocessing_layer_not_supported(std::shared_ptr<ov::Node>& laye
    // Verify that transpose layer cannot be executed on GNA
    if (std::dynamic_pointer_cast<ov::opset1::Transpose>(layer)) {
-        return !limitations::is_transpose_supported(layer);
+        return !Limitations::is_transpose_supported(layer);
    }
    return false;
--- a/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp
+++ b/src/plugins/intel_gna/src/transformations/split_convolution_with_large_buffer_size.cpp
@ -14,6 +14,8 @@
 #include "layers/gna_convolution_layer.hpp"
 #include "layers/gna_split_layer.hpp"
 using namespace ov::intel_gna::limitations;
 namespace ov {
 namespace intel_gna {
 namespace pass {
@ -56,13 +58,13 @@ static bool Convert(std::shared_ptr<ngraph::Node> conv,
                                      std::end(conv->get_input_shape(0)),
                                      size_t(1),
                                      std::multiplies<size_t>());
-    if (input_size <= limitations::bufferMaxSize) {
+    if (input_size <= Limitations::kBufferMaxSize) {
        return false;
    }
    auto& input = conv->get_input_shape(0);
    uint32_t width = input.back();
    uint32_t in_channels = input.at(1);
-    auto split_sizes = GetAlignedSplitSizes(width, limitations::bufferMaxSize / in_channels);
+    auto split_sizes = GetAlignedSplitSizes(width, Limitations::kBufferMaxSize / in_channels);
    IE_ASSERT(split_sizes.size() > 1);
    std::vector<int64_t> split_sizes_casted(split_sizes.size());
    std::transform(std::begin(split_sizes), std::end(split_sizes), std::begin(split_sizes_casted), [](uint32_t size) {
--- a/src/plugins/intel_gna/src/transformations/split_eltwise.cpp
+++ b/src/plugins/intel_gna/src/transformations/split_eltwise.cpp
@ -15,6 +15,8 @@
 #include "legacy/ngraph_ops/eltwise.hpp"
 #include "log/log.hpp"
 using namespace ov::intel_gna::limitations;
 namespace ov {
 namespace intel_gna {
 namespace pass {
@ -25,7 +27,7 @@ inline bool is_eltwise_has_to_be_splitted(const ngraph::Output<ngraph::Node>& no
        return false;
    auto o_dims = eltwise->get_output_shape(0);
    auto total_elem_size = std::accumulate(std::begin(o_dims), std::end(o_dims), 1, std::multiplies<size_t>());
-    return (total_elem_size > limitations::bufferMaxSize);
+    return (total_elem_size > Limitations::kBufferMaxSize);
 }
 static std::shared_ptr<ngraph::opset9::VariadicSplit> split_input(
--- a/src/plugins/intel_gna/src/transformations/split_eltwise.hpp
+++ b/src/plugins/intel_gna/src/transformations/split_eltwise.hpp
@ -11,7 +11,7 @@ namespace intel_gna {
 namespace pass {
 /**
- * @brief Split over channels for Eltwise to avoid GNA-HW bufferMaxSize limitation per eltwise
+ * @brief Split over channels for Eltwise to avoid GNA-HW kBufferMaxSize limitation per eltwise
 */
 class SplitEltwise : public ov::pass::MatcherPass {
 public:
--- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp
+++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/I8_quantisation_test.cpp
@ -2,20 +2,24 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include <gtest/gtest.h>
-#include <legacy/layer_transform.hpp>
+
 #include "frontend/model_quantizer.hpp"
 #include "frontend/layer_quantizer.hpp"
 #include "gna_matcher.hpp"
 #include <ie_core.hpp>
 #include <legacy/layer_transform.hpp>
 #include <vector>
 #include "backend/gna_limitations.hpp"
 #include "frontend/layer_quantizer.hpp"
 #include "frontend/model_quantizer.hpp"
 #include "gna_matcher.hpp"
 using namespace InferenceEngine;
 using namespace ov::intel_gna::limitations;
 using namespace ov::intel_gna::frontend;
 using namespace GNATestIRs;
 class I8QuantisationTest : public GNATest<> {
- protected:
+protected:
    InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
        auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
        Config gna_config;
@ -26,7 +30,8 @@ class I8QuantisationTest : public GNATest<> {
        return newLayer;
    };
-    InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const {
+    InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
                                                            float scale_factor) const {
        auto scale_factors = std::vector<float>({scale_factor});
        GnaInputs inputs;
@ -41,30 +46,30 @@ class I8QuantisationTest : public GNATest<> {
        auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
-        return ModelQuantizer(transformer).quantize(
+        return ModelQuantizer(transformer).quantize(model, inputs);
            model,
            inputs);
    }
-    void SetUp() override {}
+    void SetUp() override {
        Limitations::init(target::DeviceVersion::Default);
    }
 };
 // TODO: add test for FC weights after quantization
-TEST_F(I8QuantisationTest, canQuantizeFCLayer){
+TEST_F(I8QuantisationTest, canQuantizeFCLayer) {
    auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
    fc->_out_num = 9;
-    auto weights = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC });
+    auto weights = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
    fc->_weights = weights;
-    fc->_biases = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC });
+    fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
    fc->_weights->allocate();
    fc->_biases->allocate();
-    std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({ 1, 1 }), Layout::NC));
+    std::shared_ptr<Data> outData =
        std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
    fc->outData.push_back(outData);
    fc->insData.push_back(outData);
    // actual quantisation algorithm is involved
-    for (auto && w : *weights) {
+    for (auto&& w : *weights) {
        w = MAX_OUT_MULTIPLIER * MAX_VAL_1B_WEIGHT;
    }
@ -73,17 +78,16 @@ TEST_F(I8QuantisationTest, canQuantizeFCLayer){
    ASSERT_NO_THROW(quantize(fc));
 }
-TEST_F(I8QuantisationTest, canQuantizeActivation){
+TEST_F(I8QuantisationTest, canQuantizeActivation) {
-
+    auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
    auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
    sigmoid->params["value"] = 2;
    sigmoid->type = "Activation";
    ASSERT_NO_THROW(quantize(sigmoid));
 }
-TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){
+TEST_F(I8QuantisationTest, inputPrecisionIs16Bits) {
-    auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
    weights->allocate();
    fillWeights(weights);
@ -92,13 +96,15 @@ TEST_F(I8QuantisationTest, inputPrecisionIs16Bits){
    auto newNet = quantize_single_input_model(network, 1000);
    InputsDataMap inputs = newNet.getInputsInfo();
-    auto inputLayer = getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock()).lock();
+    auto inputLayer =
        getCreatorLayer(getInputTo(inputs.begin()->second->getInputData()).begin()->second->insData.front().lock())
            .lock();
    ASSERT_EQ(inputLayer->precision, Precision::I16);
 }
-TEST_F(I8QuantisationTest, FCDimensionIs1){
+TEST_F(I8QuantisationTest, FCDimensionIs1) {
-    auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
    weights->allocate();
    fillWeights(weights);
@ -108,8 +114,8 @@ TEST_F(I8QuantisationTest, FCDimensionIs1){
    ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
 }
-TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
+TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits) {
-    auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
    weights->allocate();
    fillWeights(weights);
@ -124,7 +130,7 @@ TEST_F(I8QuantisationTest, outputAffinePrecisionIs32Bits){
 }
 TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {220}, Layout::C});
    weights->allocate();
    fillWeights(weights);
@ -135,7 +141,7 @@ TEST_F(I8QuantisationTest, fp16tofp32_on_fullyConnected_model) {
 }
 TEST_F(I8QuantisationTest, LSTMCell_quantize) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {33664}, C});
    weights->allocate();
    fillWeights(weights);
@ -146,7 +152,7 @@ TEST_F(I8QuantisationTest, LSTMCell_quantize) {
 }
 TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {3480}, C});
    weights->allocate();
    fillWeights(weights);
@ -157,7 +163,7 @@ TEST_F(I8QuantisationTest, LSTMCell_unaligned_quantize) {
 }
 TEST_F(I8QuantisationTest, TI_quantize) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {249748}, C});
    weights->allocate();
    fillWeights(weights);
--- a/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp
+++ b/src/plugins/intel_gna/tests/deprecated/unit/engines/gna/i16_quantisation_test.cpp
@ -2,23 +2,27 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include <gtest/gtest.h>
-#include <legacy/layer_transform.hpp>
+
 #include "backend/gna_types.hpp"
 #include "frontend/model_quantizer.hpp"
 #include "frontend/layer_quantizer.hpp"
 #include "gna_matcher.hpp"
 #include <ie_core.hpp>
 #include <legacy/layer_transform.hpp>
 #include <vector>
 #include "backend/gna_limitations.hpp"
 #include "backend/gna_types.hpp"
 #include "frontend/layer_quantizer.hpp"
 #include "frontend/model_quantizer.hpp"
 #include "gna_matcher.hpp"
 #include "ngraph_functions/builders.hpp"
 using namespace InferenceEngine;
 using namespace ov::intel_gna::limitations;
 using namespace ov::intel_gna::frontend;
 using namespace GNATestIRs;
 class I16QuantisationTest : public GNATest<> {
- protected:
+protected:
-    InferenceEngine::CNNLayerPtr quantize (InferenceEngine::CNNLayerPtr lp) {
+    InferenceEngine::CNNLayerPtr quantize(InferenceEngine::CNNLayerPtr lp) {
        auto newLayer = InferenceEngine::injectData<QuantizedLayerParams>(lp);
        Config gna_config;
        gna_config.gnaPrecision = InferenceEngine::Precision::I16;
@ -28,7 +32,8 @@ class I16QuantisationTest : public GNATest<> {
        return newLayer;
    };
-    InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model, float scale_factor) const {
+    InferenceEngine::CNNNetwork quantize_single_input_model(const InferenceEngine::CNNNetwork& model,
                                                            float scale_factor) const {
        auto scale_factors = std::vector<float>({scale_factor});
        GnaInputs inputs;
@ -43,21 +48,20 @@ class I16QuantisationTest : public GNATest<> {
        auto transformer = ov::intel_gna::TransformationsPipeline(gna_config);
-        return ModelQuantizer(transformer).quantize(
+        return ModelQuantizer(transformer).quantize(model, inputs);
            model,
            inputs);
    }
    void SetUp() override {
        Limitations::init(target::DeviceVersion::Default);
    }
 };
 template <class T>
 T setWeights(T blob) {
    blob->allocate();
-    // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor of 1
+    // actual quantisation algorithm is involved - we need to provide weights that will be quantized with scale factor
-    for (auto && w : *blob) {
+    // of 1
    for (auto&& w : *blob) {
        w = MAX_VAL_2B_WEIGHT;
    }
    return blob;
@ -75,36 +79,34 @@ TBlob<uint8_t>::Ptr  setWeights(TBlob<uint8_t>::Ptr blob) {
    return blob;
 }
 // TODO: add test for FC weights after quantization
-TEST_F(I16QuantisationTest, canQuantizeFCLayer){
+TEST_F(I16QuantisationTest, canQuantizeFCLayer) {
    auto fc = std::make_shared<FullyConnectedLayer>(LayerParams{"name", "type", Precision::FP32});
    fc->_out_num = 9;
-    fc->_weights = setWeights(make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC }));
+    fc->_weights = setWeights(make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC}));
    fillWeights(fc->_weights);
-    fc->_biases  = make_shared_blob<float>({ Precision::FP32, {1, 1}, Layout::NC });
+    fc->_biases = make_shared_blob<float>({Precision::FP32, {1, 1}, Layout::NC});
    fc->_biases->allocate();
    fillWeights(fc->_biases);
-    std::shared_ptr<Data> outData = std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
+    std::shared_ptr<Data> outData =
        std::make_shared<Data>("data", TensorDesc(Precision::FP32, SizeVector({1, 1}), Layout::NC));
    fc->outData.push_back(outData);
    fc->insData.push_back(outData);
    ASSERT_NO_THROW(quantize(fc));
 }
-TEST_F(I16QuantisationTest, canQuantizeActivation){
+TEST_F(I16QuantisationTest, canQuantizeActivation) {
-
+    auto sigmoid = std::make_shared<GenericLayer>(LayerParams{"name", "type", Precision::FP32});
    auto sigmoid = std::make_shared<GenericLayer >(LayerParams{"name", "type", Precision::FP32});
    sigmoid->params["value"] = 2;
    sigmoid->type = "Activation";
    ASSERT_NO_THROW(quantize(sigmoid));
 }
-TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
+TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {440}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
    weights->allocate();
    fillWeights(weights);
@ -118,10 +120,9 @@ TEST_F(I16QuantisationTest, outputAffinePrecisionIs32Bits){
    ASSERT_EQ(affineDataPtr->getTensorDesc().getPrecision(), Precision::I32);
 }
 TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
-    auto weights = setWeights(make_shared_blob<uint8_t >({ Precision::U8, {440}, C }));
+    auto weights = setWeights(make_shared_blob<uint8_t>({Precision::U8, {440}, C}));
-    //std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
+    // std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), 0);
    Core ie;
    auto network = ie.ReadNetwork(affineToMemoryModel(), weights);
@ -129,13 +130,13 @@ TEST_F(I16QuantisationTest, canQuantizeLstmLikeTopology) {
    ASSERT_NO_THROW(quantize_single_input_model(network, 1000));
 }
-TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
+TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect) {
    const float inputScaleFactorTest = 1000;
    const float weightValueTest = 100;
-    auto weights = make_shared_blob<uint8_t >({ Precision::U8, {440}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {440}, C});
    weights->allocate();
-    fillWeights(weights, { weightValueTest });
+    fillWeights(weights, {weightValueTest});
    Core ie;
    auto network = ie.ReadNetwork(Fc2DOutputModel(), weights);
@ -153,51 +154,70 @@ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion) {
    assert_that()
        .onInferModel(Fc2DOutputModel())
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .inNotCompactMode()
-        .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
+        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_without()
        .pwl_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, OnlyAffine_NoActivationInsertion_ProfilingEnabled) {
    assert_that()
        .onInferModel(Fc2DOutputModel())
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .inNotCompactMode()
-        .gna().propagate_forward().called_without().pwl_inserted_into_nnet().profiling_counters();
+        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_without()
        .pwl_inserted_into_nnet()
        .profiling_counters();
 }
 TEST_F(I16QuantisationTest, OnlyAffineWithNanScaleFactorFails) {
-    gna()
+    gna().onInferModel(Fc2DOutputModel()).withNanScaleFactor().propagate_forward().throws();
        .onInferModel(Fc2DOutputModel())
        .withNanScaleFactor()
        .propagate_forward().throws();
 }
 TEST_F(I16QuantisationTest, OnlyAffineWithInfScaleFactorFails) {
-    gna()
+    gna().onInferModel(Fc2DOutputModel()).withInfScaleFactor().propagate_forward().throws();
        .onInferModel(Fc2DOutputModel())
        .withInfScaleFactor()
        .propagate_forward().throws();
 }
 TEST_F(I16QuantisationTest, AffineToMemoryWillResultInActivationInsertion) {
    assert_that()
        .onInferModel(affineToMemoryModel())
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .inNotCompactMode()
-        .gna().propagate_forward().called_with().pwl_inserted_into_nnet();
+        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, EltwiseToMemoryWithNoOutputActivationInsertion) {
-    assert_that().inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-    .onInferModel(eltwiseToMemoryModelNoOutput(), [](CNNNetwork & net){
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .onInferModel(eltwiseToMemoryModelNoOutput(),
                      [](CNNNetwork& net) {
                          net.addOutput("Eltwise_8");
-        }).gna().propagate_forward().called_with().pwl_inserted_into_nnet();
+                      })
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, EltwiseToMemory_ActivationInsertion) {
-    assert_that().onInferModel(eltwiseToMemoryModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-        .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet();
+        .onInferModel(eltwiseToMemoryModel())
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInsertion) {
    auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 20});
    const auto axis_node = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{}, {1});
@ -205,41 +225,73 @@ TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInserti
    auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[0]);
    auto add = std::make_shared<ngraph::opset8::Add>(split->outputs()[1], tanh);
    auto result = std::make_shared<ngraph::opset8::Result>(add);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
-    assert_that().onInferNgraphModel(function)
+        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
+        .onInferNgraphModel(function)
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .diagonal_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
-    assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(twoFCWithPaddingAfterSliceModel())
-        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .diagonal_inserted_into_nnet();
 }
 // ToDo requires implementation of aligning filter for concat inputs and improvement of
 // qunatization/scaling algorithm for concat
 TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
-    assert_that().onInferModel(doubleConcatModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-        .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
+        .onInferModel(doubleConcatModel())
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .diagonal_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
-    assert_that().onInferModel(eltwiseSummModel()).withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-        .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
+        .onInferModel(eltwiseSummModel())
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet()
        .once();
 }
 TEST_F(I16QuantisationTest, canDetectLeakyRelu) {
-    assert_that().onInferModel(TFLeakyReluModel())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(TFLeakyReluModel())
-        .gna().propagate_forward().called_with().pwl_inserted_into_nnet();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
-    assert_that().onInferModel(maxpoolAfterRelu())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(maxpoolAfterRelu())
-        .gna().propagate_forward().called_with()
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .convolution_inserted_into_nnet()
        .And()
        .pwl_inserted_into_nnet()
@ -248,28 +300,53 @@ TEST_F(I16QuantisationTest, MaxPool_followedAfterActivation) {
 }
 TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
-    assert_that().onInferModel(eltwiseMulModel())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(eltwiseMulModel())
-        .gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet()
        .twice();
 }
 TEST_F(I16QuantisationTest, multiple_inputs_supported) {
    std::string configKey = GNA_CONFIG_KEY(SCALE_FACTOR) + std::string("_");
-    assert_that().onInferModel(two_inputs_to_affine())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(configKey + std::to_string(0), 1.0f)
+        .onInferModel(two_inputs_to_affine())
-        .withGNAConfig(configKey + std::to_string(1), 2.0f).gna().propagate_forward()
+        .inNotCompactMode()
-        .called_with().pwl_inserted_into_nnet().once();
+        .withGNAConfig(configKey + std::to_string(0), 1.0f)
        .withGNAConfig(configKey + std::to_string(1), 2.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet()
        .once();
 }
 TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
-    assert_that().onInferModel(two_inputs_to_concat())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
+        .onInferModel(two_inputs_to_concat())
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet()
        .once();
 }
 TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
-    assert_that().onInferModel(scaleShiftAffineModel())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(scaleShiftAffineModel())
-        .gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet()
        .once();
 }
 TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
@ -277,10 +354,17 @@ TEST_F(I16QuantisationTest, ClampFollowedByTanh_ResultInDiagonalInsertion) {
    auto clamp = std::make_shared<ngraph::opset8::Clamp>(input_params, -50, 50);
    auto tanh = std::make_shared<ngraph::opset8::Tanh>(clamp);
    auto result = std::make_shared<ngraph::opset8::Result>(tanh);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
-    assert_that().onInferNgraphModel(function)
+        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
+        .onInferNgraphModel(function)
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .diagonal_inserted_into_nnet()
        .twice();
 }
 TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiagonalsInsertion) {
@ -296,76 +380,127 @@ TEST_F(I16QuantisationTest, EltwiseWithMemoryAndActivationInput_ResultInTwoDiago
    auto result = std::make_shared<ngraph::opset8::Result>(add);
    mem_w->add_control_dependency(mem_r);
    result->add_control_dependency(mem_w);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
-    assert_that().onInferNgraphModel(function)
+        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet().twice();
+        .onInferNgraphModel(function)
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .diagonal_inserted_into_nnet()
        .twice();
 }
 TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInsertion) {
    // one Identity activation from first FC, and one Identity activation for eltwise
-    assert_that().onInferModel(AffineWith2AffineOutputsModel())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(AffineWith2AffineOutputsModel())
-        .gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwl_inserted_into_nnet()
        .twice();
 }
 TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
-
+    auto& affineWeights = storage<std::vector<uint16_t>>();
    auto & affineWeights = storage<std::vector<uint16_t>>();
    affineWeights = {
-        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        2048,  4096,  6144,  8192,  10240, 12288, 14336, 16384, 2048,  4096,  6144,  8192,  10240, 12288,
-        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        14336, 16384, 2048,  4096,  6144,  8192,  10240, 12288, 14336, 16384, 2048,  4096,  6144,  8192,
-        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        10240, 12288, 14336, 16384, 2048,  4096,  6144,  8192,  10240, 12288, 14336, 16384,
        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
    };
-    assert_that().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f).onInferModel(ScaleShift3DModel())
+    assert_that()
-        .withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
+        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
-        .inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
+        .onInferModel(ScaleShift3DModel())
        .withWeigthsPattern({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f})
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .called_with()
        .affine_weights_eq(affineWeights);
 }
 TEST_F(I16QuantisationTest, MemoryAfterConcat_ResultInCopyInsertion) {
-    assert_that().onInferModel(MemoryAfterConcatModel()).inNotCompactMode().gna().propagate_forward().
+    assert_that()
-        called_with().copy_inserted_into_nnet();
+        .onInferModel(MemoryAfterConcatModel())
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .copy_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, MemoryAndConcatAfterOneNode_ResultInCopyInsertion) {
-    assert_that().onInferModel(MemoryAndConcatAfterOneNode()).inNotCompactMode().gna().propagate_forward().
+    assert_that()
-        called_with().copy_inserted_into_nnet();
+        .onInferModel(MemoryAndConcatAfterOneNode())
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .copy_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
-    auto & affineWeights = storage<std::vector<uint16_t>>();
+    auto& affineWeights = storage<std::vector<uint16_t>>();
    // least likely that width and height both are multiple of 7
    auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
    // here weights are transpozed
-    save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
+    save()
-        .inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
+        .onInferModel(affineAfterConvNoPermute())
        .withWeigthsPattern(weigthsPattern)
        .inNotCompactMode()
        .from()
        .propagate_forward()
        .affine_weights_transpozed({128, 61})
        .to(affineWeights);
    // here weights shouldn't be transposed
-    assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
+    assert_that()
-        .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
+        .onInferModel(affineAfterConvWithPermute())
        .withWeigthsPattern(weigthsPattern)
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .affine_weights_eq(affineWeights);
 }
 TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
-    auto & affineWeights = storage<std::vector<uint16_t>>();
+    auto& affineWeights = storage<std::vector<uint16_t>>();
    // least likely that width and height both are multiple of 7
    auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
-    save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
+    save()
-        .inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
+        .onInferModel(affineAfterConvWithPermute())
        .withWeigthsPattern(weigthsPattern)
        .inNotCompactMode()
        .from()
        .propagate_forward()
        .affine_weights()
        .to(affineWeights);
-    assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
+    assert_that()
-        .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});
+        .onInferModel(affineAfterConvNoPermute())
        .withWeigthsPattern(weigthsPattern)
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .affine_weights_transposed(affineWeights, {128, 61});
 }
 TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {220}, Layout::C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {220}, Layout::C});
    weights->allocate();
    fillWeights(weights);
@ -375,7 +510,8 @@ TEST_F(I16QuantisationTest, fp16tofp32_on_fullyConnected_model) {
    quantize_single_input_model(network, 1000);
 }
-TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
+TEST_F(I16QuantisationTest,
       MultipleActivationsAfterAffineWithIdentityActivation_MultipleDiagonalLayersWithActivaitons) {
    auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
    const auto constant = ngraph::opset8::Constant::create(ngraph::element::f32, ngraph::Shape{10, 10}, {1});
    auto matmul1 = std::make_shared<ngraph::opset8::MatMul>(input_params, constant);
@ -386,11 +522,17 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffineWithIdentityActivation
    auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
    auto add2 = std::make_shared<ngraph::opset8::Add>(add, mul);
    auto result = std::make_shared<ngraph::opset8::Result>(add);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
    // identiy came from automatic insertion due to
-    assert_that().onInferNgraphModel(function)
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferNgraphModel(function)
-        .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwls_inserted_into_nnet({kActSigmoid, kActRelu, kActIdentity, kActIdentity});
 }
 TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiagonalLayersWithActivaitons) {
@ -401,24 +543,36 @@ TEST_F(I16QuantisationTest, MultipleActivationsAfterAffine_ResultInMultipleDiago
    auto relu = std::make_shared<ngraph::opset8::Relu>(matmul);
    auto mul = std::make_shared<ngraph::opset8::Multiply>(sigmoid, relu);
    auto result = std::make_shared<ngraph::opset8::Result>(mul);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
    // extra identity inserted for affine
-    assert_that().onInferNgraphModel(function)
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferNgraphModel(function)
-        .gna().propagate_forward().called_with()
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        // 1 diag for second activation, 1 for eltwise
-        .pwls_inserted_into_nnet({kActRelu, kActSigmoid}).diagonal_inserted_into_nnet().times(3);
+        .pwls_inserted_into_nnet({kActRelu, kActSigmoid})
        .diagonal_inserted_into_nnet()
        .times(3);
 }
 // TODO: build a regression test on top of it using real quantisation accuracy checking
 TEST_F(I16QuantisationTest, ConcatWithConstInputPropagatedForward) {
-    assert_that().onInferModel(concatModelWithConstLayer())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(concatModelWithConstLayer())
-        .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwls_inserted_into_nnet({kActIdentity});
 }
 TEST_F(I16QuantisationTest, LSTMCell_quantize) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {33664}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {33664}, C});
    weights->allocate();
    fillWeights(weights);
@ -429,7 +583,7 @@ TEST_F(I16QuantisationTest, LSTMCell_quantize) {
 }
 TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {3480}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {3480}, C});
    weights->allocate();
    fillWeights(weights);
@ -440,15 +594,27 @@ TEST_F(I16QuantisationTest, LSTMCell_unaligned_quantize) {
 }
 TEST_F(I16QuantisationTest, EltwisetWithConstInputPropagatedForward) {
-    assert_that().onInferModel(eltwiseSumModelWithConstLayer())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(eltwiseSumModelWithConstLayer())
-        .gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .diagonal_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, PowerWithScaleFactorPropagateForward) {
-    assert_that().onInferModel(PowerWithScaleFactor1())
+    assert_that()
-        .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+        .onInferModel(PowerWithScaleFactor1())
-        .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity}).And().diagonal_inserted_into_nnet();
+        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwls_inserted_into_nnet({kActIdentity})
        .And()
        .diagonal_inserted_into_nnet();
 }
 TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward) {
@ -459,14 +625,20 @@ TEST_F(I16QuantisationTest, ConcatWithDifferentInputScaleFactorsPropagateForward
    auto tanh = std::make_shared<ngraph::opset8::Tanh>(split->outputs()[1]);
    auto concat = std::make_shared<ngraph::opset8::Concat>(ngraph::OutputVector{sigmoid, tanh}, 1);
    auto result = std::make_shared<ngraph::opset8::Result>(concat);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
-    assert_that().onInferNgraphModel(function)
+        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
-            .inNotCompactMode().withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
+    assert_that()
-            .gna().propagate_forward().called_with().pwls_inserted_into_nnet({kActIdentity});
+        .onInferNgraphModel(function)
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
        .propagate_forward()
        .called_with()
        .pwls_inserted_into_nnet({kActIdentity});
 }
 TEST_F(I16QuantisationTest, TI_quantize) {
-    auto weights = make_shared_blob<uint8_t>({ Precision::U8, {249748}, C });
+    auto weights = make_shared_blob<uint8_t>({Precision::U8, {249748}, C});
    weights->allocate();
    fillWeights(weights);
@ -477,40 +649,52 @@ TEST_F(I16QuantisationTest, TI_quantize) {
 }
 TEST_F(I16QuantisationTest, TI_PropagateForward) {
-    auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ 1, 10 });
+    auto input_params = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{1, 10});
-    auto mul = std::make_shared<ngraph::opset8::Multiply>(input_params,
+    auto mul = std::make_shared<ngraph::opset8::Multiply>(
-        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{ 1, 10 }));
+        input_params,
-    auto add = std::make_shared<ngraph::opset8::Add>(mul,
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
-        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{ 1, 10 }));
+    auto add = std::make_shared<ngraph::opset8::Add>(
-    auto reshape = std::make_shared<ngraph::opset8::Reshape>(add,
+        mul,
-        std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{ 3 }, std::vector<size_t>{ 1, 1, 10 }), false);
+        std::make_shared<ngraph::op::Constant>(ngraph::element::f32, ngraph::Shape{1, 10}));
    auto reshape = std::make_shared<ngraph::opset8::Reshape>(
        add,
        std::make_shared<ngraph::op::Constant>(ngraph::element::i64, ngraph::Shape{3}, std::vector<size_t>{1, 1, 10}),
        false);
    auto reshape_shape = reshape->output(0).get_shape();
    const size_t batch_size = 1;
    const size_t hiddenSize = 10;
-    auto H_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, { batch_size, hiddenSize }, {}, true);
+    auto H_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, {batch_size, hiddenSize}, {}, true);
-    auto C_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, { batch_size, hiddenSize }, {}, true);
+    auto C_init = ngraph::builder::makeConstant<float>(ngraph::element::f32, {batch_size, hiddenSize}, {}, true);
-    auto H_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize });
+    auto H_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
-    auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, hiddenSize });
+    auto C_t = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{batch_size, hiddenSize});
-    //Body
+    // Body
-    auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32, ngraph::Shape{ batch_size, 1, reshape_shape[2] });
+    auto X = std::make_shared<ngraph::opset8::Parameter>(ngraph::element::f32,
-    auto weightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, reshape_shape[2] }, {}, true);
+                                                         ngraph::Shape{batch_size, 1, reshape_shape[2]});
-    auto reccurrenceWeightsNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, { 4 * hiddenSize, hiddenSize }, {}, true);
+    auto weightsNode =
        ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, reshape_shape[2]}, {}, true);
    auto reccurrenceWeightsNode =
        ngraph::builder::makeConstant<float>(ngraph::element::f32, {4 * hiddenSize, hiddenSize}, {}, true);
    // lstm
-    auto constantX = ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{ 2 }, { batch_size, reshape_shape[2] });
+    auto constantX =
-    auto lstm1 = std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
+        ngraph::opset8::Constant::create(ngraph::element::i64, ngraph::Shape{2}, {batch_size, reshape_shape[2]});
-        H_t, C_t,
+    auto lstm1 =
-        weightsNode, reccurrenceWeightsNode, hiddenSize);
+        std::make_shared<ngraph::opset8::LSTMCell>(std::make_shared<ngraph::opset8::Reshape>(X, constantX, false),
                                                   H_t,
                                                   C_t,
                                                   weightsNode,
                                                   reccurrenceWeightsNode,
                                                   hiddenSize);
    auto H_o = lstm1->output(0);
    auto C_o = lstm1->output(1);
-    auto body = std::make_shared<ngraph::Function>(
+    auto body =
-        ngraph::OutputVector{ H_o, C_o }, ngraph::ParameterVector{ X, H_t, C_t });
+        std::make_shared<ngraph::Function>(ngraph::OutputVector{H_o, C_o}, ngraph::ParameterVector{X, H_t, C_t});
    auto tensor_iterator = std::make_shared<ngraph::opset8::TensorIterator>();
    tensor_iterator->set_body(body);
@ -522,16 +706,29 @@ TEST_F(I16QuantisationTest, TI_PropagateForward) {
    auto out0 = tensor_iterator->get_iter_value(H_o, -1);
    const size_t output_size = 12;
-    auto fc = ngraph::builder::makeFullyConnected(out0, ngraph::element::f32, output_size, true, { hiddenSize, output_size }, { 1 }, { 1 });
+    auto fc = ngraph::builder::makeFullyConnected(out0,
                                                  ngraph::element::f32,
                                                  output_size,
                                                  true,
                                                  {hiddenSize, output_size},
                                                  {1},
                                                  {1});
    auto result = std::make_shared<ngraph::opset8::Result>(fc);
-    auto function = std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
+    auto function =
-    assert_that().onInferNgraphModel(function).withWeigthsPattern({0.1f})
+        std::make_shared<ngraph::Function>(ngraph::ResultVector{result}, ngraph::ParameterVector{input_params});
-        .inNotCompactMode().gna().propagate_forward()
+    assert_that()
-        .called_with().pwls_inserted_into_nnet({kActIdentity});
+        .onInferNgraphModel(function)
        .withWeigthsPattern({0.1f})
        .inNotCompactMode()
        .gna()
        .propagate_forward()
        .called_with()
        .pwls_inserted_into_nnet({kActIdentity});
 }
 TEST_F(I16QuantisationTest, SplitToConcatWith2Inputs1360NotAlignedNoFC) {
-    assert_that().onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
+    assert_that()
        .onInferModel(SplitToConcatWith2Inputs1360NotAlignedNoFC())
        .inNotCompactMode()
        .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), 1.0f)
        .gna()
--- a/src/plugins/intel_gna/tests/unit/backend/gna_limitations_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/backend/gna_limitations_test.cpp
@ -282,10 +282,12 @@ struct Validatecnn2dParams {
 class GNAcnn2dValidatorTest : public ::testing::TestWithParam<GNACnn2DValidatorTestParam> {
 protected:
    void SetUp() override {
-        validator = cnn2d::AbstractValidator::Create(GetParam().target);
+        Limitations::init(GetParam().target);
-        ASSERT_TRUE(validator != nullptr);
+        validator = Limitations::get_instance()->get_cnn_validator();
        ASSERT_TRUE(validator);
    }
-    std::unique_ptr<cnn2d::AbstractValidator> validator;
+
    std::shared_ptr<cnn2d::AbstractValidator> validator;
 };
 class GNAcnn2dValidatorTestPadding : public GNAcnn2dValidatorTest {
--- a/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_executable_network_metrics_test.cpp
@ -18,7 +18,7 @@ class GNAPluginForNetworkMetricsTest : public GNAPlugin {
 public:
    GNAPluginForNetworkMetricsTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
        gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
-        graphCompiler.setGNAMemoryPtr(gnamem);
+        m_graph_compiler->setGNAMemoryPtr(gnamem);
        gnadevice.reset();
    }
 };
--- a/src/plugins/intel_gna/tests/unit/gna_extra_pwl_segments_tests.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_extra_pwl_segments_tests.cpp
@ -81,11 +81,11 @@ class GNAPluginForPWLExtraSegmentsTest : public GNAPlugin {
 public:
    GNAPluginForPWLExtraSegmentsTest(const std::map<std::string, std::string>& config) : GNAPlugin(config) {
        gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
-        graphCompiler.setGNAMemoryPtr(gnamem);
+        m_graph_compiler->setGNAMemoryPtr(gnamem);
        gnadevice.reset();
    }
    void Test(const size_t expected_segments) {
-        for (const auto& component : graphCompiler.dnnComponents.components) {
+        for (const auto& component : m_graph_compiler->dnnComponents.components) {
            if (component.dnnComponent.operation == kDnnPiecewiselinearOp) {
                EXPECT_EQ(expected_segments, component.dnnComponent.op.pwl.num_segments);
            }
--- a/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_get_aligned_split_sizes.cpp
@ -58,7 +58,7 @@ TEST(CheckSplitSupported, CheckVariadicSplitSupported) {
            ngraph::opset9::Constant::create(ngraph::element::i64,
                                             ngraph::Shape({split_lengths.size()}),
                                             split_lengths));
-        ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
+        ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
    }
 }
@ -86,7 +86,7 @@ TEST(CheckSplitSupported, CheckSplitSupported) {
            std::make_shared<ngraph::opset9::Parameter>(ngraph::element::f32, input_shape),
            ngraph::opset9::Constant::create(ngraph::element::i64, ngraph::Shape({}), {axis}),
            num_splits);
-        ASSERT_TRUE(ov::intel_gna::limitations::is_split_supported(split, false) == result);
+        ASSERT_TRUE(ov::intel_gna::limitations::Limitations::is_split_supported(split, false) == result);
    }
 }
 }  // namespace
--- a/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_hw_precision_test.cpp
@ -17,7 +17,7 @@ class GNAPluginForPrecisionTest : public GNAPlugin {
 public:
    GNAPluginForPrecisionTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
        gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{}));
-        graphCompiler.setGNAMemoryPtr(gnamem);
+        m_graph_compiler->setGNAMemoryPtr(gnamem);
        gnadevice.reset();
    }
    std::vector<intel_dnn_component_t> get_components() {
--- a/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_memory_alignment.cpp
@ -45,9 +45,9 @@ public:
    GNAPluginForMemoryAlignmentTest(const std::map<std::string, std::string>& configMap) : GNAPlugin(configMap) {
        if (gnadevice) {
            gnamem.reset(new gna_memory_float(memory::GNAFloatAllocator{},
-                                              gnadevice->getMemAlignment(),
+                                              Limitations::get_instance()->get_memory_alignment(),
-                                              limitations::kMemoryPageSize));
+                                              Limitations::kMemoryPageSize));
-            graphCompiler.setGNAMemoryPtr(gnamem);
+            m_graph_compiler->setGNAMemoryPtr(gnamem);
            gnadevice.reset();
        }
    }
@ -149,16 +149,14 @@ INSTANTIATE_TEST_SUITE_P(MemoryAlignment_GNA_4_0,
 class MemoryAlignmentTest : public ::testing::Test {};
-TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_ExpectExceptionWhenTargetIsUnset) {
+TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_5) {
-    EXPECT_ANY_THROW(getMemoryAlignmentBytes(DeviceVersion::NotSet));
+    Limitations::init(DeviceVersion::GNA3_5);
-}
+    EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 64);
 TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect64ByteAlignmentWhenTargetIsGNA3_0) {
    EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_0), 64);
 }
 TEST(MemoryAlignmentTest, getMemoryAlignmentBytes_Expect16ByteAlignmentWhenTargetIsGNA3_6) {
-    EXPECT_EQ(getMemoryAlignmentBytes(DeviceVersion::GNA3_6), 16);
+    Limitations::init(DeviceVersion::GNA3_6);
    EXPECT_EQ(Limitations::get_instance()->get_memory_alignment(), 16);
 }
 }  // namespace testing
--- a/src/plugins/intel_gna/tests/unit/gna_memory_compact_test.cpp
+++ b/src/plugins/intel_gna/tests/unit/gna_memory_compact_test.cpp
@ -297,7 +297,7 @@ public:
    GNAPluginTested() : GNAPlugin() {
        gnamem_t = std::make_shared<GNAMemoryTested>();
        gnamem = gnamem_t;
-        graphCompiler.setGNAMemoryPtr(gnamem);
+        m_graph_compiler->setGNAMemoryPtr(gnamem);
        gnadevice.reset();
    }
    void Test() {
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_2d_convolution.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_2d_convolution.cpp
@ -15,6 +15,7 @@
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "transformations/decompose_2d_convolution.hpp"
 using namespace ov::intel_gna::limitations;
 namespace testing {
 namespace {
@ -312,6 +313,8 @@ void Decompose2DConvTestInvalidFixture::SetUp() {
    std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
        params;
    Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
    function = get_initial_function(fq,
                                    model,
                                    input_shape,
@ -342,6 +345,7 @@ class Decompose2DConvTestFixture : public CommonTestUtils::TestsCommon,
                                   public ::testing::WithParamInterface<fqDecompose2DConvParams> {
 public:
    void SetUp() override;
    std::shared_ptr<ngraph::Function> get_reference(const bool& fq,
                                                    const modelType& model,
                                                    const ngraph::PartialShape& input_shape,
@ -365,6 +369,8 @@ void Decompose2DConvTestFixture::SetUp() {
    std::tie(model, input_shape, filters_shape, conv_stride, conv_dilation, bias_shape, maxpool_stride, maxpool_shape) =
        params;
    Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
    function = get_initial_function(fq,
                                    model,
                                    input_shape,
@ -779,7 +785,7 @@ static size_t CalculateConvCount(const ConvParams& conv_params) {
    size_t conv_count = 1;
    size_t total_factorized_conv_channel_count =
        (conv_params.input_channel_count * conv_params.filter_height * conv_params.filter_width);
-    while (total_factorized_conv_channel_count / conv_count > ov::intel_gna::limitations::convFilterMaxSize ||
+    while (total_factorized_conv_channel_count / conv_count > Limitations::kConvFilterMaxSize ||
           total_factorized_conv_channel_count % conv_count != 0 || conv_params.filter_channel_count % conv_count != 0)
        conv_count++;
@ -792,7 +798,7 @@ static bool ShouldDecompose(GraphData& graph_data, const ConvParams& conv_params
    // Concat (copy) layer limitation allows to split up to a certain limit
    // Currently we are able to split only convolutions without pooling in horizontal dimension
-    if (graph_data.conv_count > ov::intel_gna::limitations::copyMaxGrouping ||
+    if (graph_data.conv_count > Limitations::kCopyMaxGrouping ||
        ((graph_data.pool_size_width > 1 || graph_data.pool_stride_width > 1) && graph_data.conv_count > 1))
        return false;
@ -884,18 +890,13 @@ void execute_test(modelType model,
    case modelType::TranspConvBcastAddMaxPoolTransp:
    case modelType::TranspConvBcastAddActTransp:
    case modelType::TranspConvBcastAddMaxPoolActTransp:
-        manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(ov::intel_gna::target::DeviceVersion::Default,
+        manager.register_pass<ov::intel_gna::pass::Decompose2DConv>(gnaPrecision);
                                                                    gnaPrecision);
        break;
    case modelType::TranspConvTranspBcastAdd:
-        manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(
+        manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBias>(gnaPrecision);
            ov::intel_gna::target::DeviceVersion::Default,
            gnaPrecision);
        break;
    case modelType::TranspConvTranspBcastAddAct:
-        manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(
+        manager.register_pass<ov::intel_gna::pass::Decompose2DConvTransposedWithBiasAF>(gnaPrecision);
            ov::intel_gna::target::DeviceVersion::Default,
            gnaPrecision);
        break;
    }
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_mvn.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_decompose_mvn.cpp
@ -13,6 +13,8 @@
 #include "transformations/decompose_mvn.hpp"
 #include "transformations/op_conversions/convert_mvn1_to_mvn6.hpp"
 using namespace ov::intel_gna::limitations;
 namespace decomposeMVN {
 typedef std::tuple<ngraph::Shape,                // Input shape
@ -264,7 +266,7 @@ std::shared_ptr<ngraph::Function> getReferenceFunction(const ngraph::Shape& inpu
    mvn_data.normalize_variance = normalize_variance;
    mvn_data.num_parts = 1;
-    while (mvn_data.W / mvn_data.num_parts > ov::intel_gna::limitations::convFilterMaxSize) {
+    while (mvn_data.W / mvn_data.num_parts > Limitations::kConvFilterMaxSize) {
        mvn_data.num_parts *= 2;
    }
--- a/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp
+++ b/src/plugins/intel_gna/tests/unit/transformations/gna_insert_copy_layer.cpp
@ -11,6 +11,7 @@
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
 #include "backend/gna_limitations.hpp"
 #include "common_test_utils/ngraph_test_utils.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "ops/copy.hpp"
@ -54,10 +55,10 @@ void InsertCopyLayerTest::Validate() {
 void InsertCopyLayerTest::SetUp() {
    std::tie(m_axis, m_inputs_num) = this->GetParam();
    ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
 }
 void InsertCopyLayerTest::Run() {
    SetUp();
    Validate();
 }
@ -176,6 +177,11 @@ public:
    }
 };
 void RunPasses(ngraph::pass::Manager& m, std::shared_ptr<ov::Model> func) {
    ov::intel_gna::limitations::Limitations::init(ov::intel_gna::target::DeviceVersion::Default);
    m.run_passes(func);
 }
 //      [Parameter]            [Parameter]
 //        \     /       =>         |
 //       [Concat]                [Copy]
@ -211,7 +217,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -263,7 +269,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -324,7 +330,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMultiNFLConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -382,7 +388,7 @@ TEST(TransformationTests, InsertCopyLayerMultiConstConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -442,7 +448,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -510,7 +516,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerNFLConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -573,7 +579,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -633,7 +639,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamConcatMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -705,7 +711,7 @@ TEST(TransformationTests, InsertCopyLayerMultiParamNFLConcatMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleMultiConnectedLayerToConcatAndMemory>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -776,7 +782,7 @@ TEST(TransformationTests, InsertCopyLayerMultiLayerConcatMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -851,7 +857,7 @@ TEST(TransformationTests, InsertCopyLayerCropMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -918,7 +924,7 @@ TEST(TransformationTests, InsertCopyLayerCropNFLMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -987,7 +993,7 @@ TEST(TransformationTests, InsertCopyLayerConcatMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1060,7 +1066,7 @@ TEST(TransformationTests, InsertCopyLayerConcatNFLMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1122,7 +1128,7 @@ TEST(TransformationTests, InsertCopyLayerSplitMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1189,7 +1195,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLMemoryTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeAssignLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1244,7 +1250,7 @@ TEST(TransformationTests, InsertCopyLayerCropConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1289,7 +1295,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1338,7 +1344,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoSubgraphsTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1385,7 +1391,7 @@ TEST(TransformationTests, InsertCopyLayerNonfuncTwoResultsTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1442,7 +1448,7 @@ TEST(TransformationTests, InsertCopyLayerNFLBranchTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1499,7 +1505,7 @@ TEST(TransformationTests, InsertCopyLayerNFLvsFLSubgraphTestt) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::HandleNonFunctionalSubgraphs>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));
@ -1550,7 +1556,7 @@ TEST(TransformationTests, InsertCopyLayerSplitNFLConcatTest) {
    ngraph::pass::Manager m;
    m.register_pass<ov::pass::InitNodeInfo>();
    m.register_pass<ov::intel_gna::pass::InsertCopyBeforeConcatLayer>();
-    m.run_passes(func);
+    RunPasses(m, func);
    ASSERT_NO_THROW(check_rt_info(func));