From 8ccabc546f7ac9d91dff4478bf53aecf693d15bd Mon Sep 17 00:00:00 2001
From: Szymon Irzabek <szymon.jakub.irzabek@intel.com>
Date: Tue, 13 Dec 2022 06:02:03 +0100
Subject: [PATCH] [GNA] Remove internal overload correction algorithm (#14428)

---
 .../src/frontend/scale_factor_calc.cpp        | 71 +++++--------------
 .../convert_matmul_to_pointwise_conv.cpp      | 34 ++++++---
 .../fq_fusion_with_multiple_weights.cpp       |  4 +-
 .../pass_tests/fq_maxpool_reordering.cpp      | 10 +--
 .../matmul_overload_correction.cpp            | 26 +++++--
 .../subgraph_tests/conv_fq_eltwise.cpp        |  8 ++-
 .../subgraph_tests/conv_fq_relu.cpp           |  8 ++-
 .../subgraph/conv_fq_eltwise.hpp              |  3 +-
 .../subgraph/conv_fq_relu.hpp                 |  3 +-
 .../src/subgraph/conv_fq_eltwise.cpp          | 13 ++--
 .../src/subgraph/conv_fq_relu.cpp             | 13 ++--
 11 files changed, 105 insertions(+), 88 deletions(-)
diff --git a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
index 7033a52c8d0..e7d461c5d68 100644
--- a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
+++ b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
@@ -24,6 +24,7 @@ constexpr float k_identity = 6;
 constexpr double pow_domain = 16;
 constexpr float min_search_weights_val = 1.0f;
 constexpr float max_search_weights_val = 1024.0f;
+constexpr double initial_weights_reducer_val = 1.0;
 
 float GetScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) {
     IE_ASSERT(layer != nullptr);
@@ -207,10 +208,9 @@ std::vector<float> ScaleFactorCalculator::generateScaleFactors(float startRange,
 double ScaleFactorCalculator::calculateWeightsReducerFromDstStats(QuantizationParams dst_quant) {
     auto maxAbsVal = std::max(std::abs(dst_quant.GetMinValues().front()),
         std::abs(dst_quant.GetMaxValues().front()));
-
     auto maxIntVal = static_cast<int64_t>(maxAbsVal * dst_quant.GetScale() + 0.5f);
     double weightsReducer = static_cast<double>(maxIntVal) / std::numeric_limits<int32_t>::max();
-    weightsReducer = std::max(1.0, weightsReducer);
+    weightsReducer = std::max(initial_weights_reducer_val, weightsReducer);
     return weightsReducer;
 }
 
@@ -894,16 +894,15 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
                 if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
                     return true;
                 }
-                // we unable to rescale the input - results might be bad
+                // Unable to rescale the input - results might be bad
                 log::warning() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
             }
 
             if (!quantData->_dst_quant.IsStatsSet()) {
                 return true;
             }
-
             auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
-            if (!common::fp32eq(weightsReducer, 1.0f)) {
+            if (weightsReducer > initial_weights_reducer_val) {
                 float newOutputScale = quantParams1->_dst_quant.GetScale() / weightsReducer;
                 if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
                     return true;
@@ -912,8 +911,10 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
             }
         }
         break;
+
         default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
     }
+
     return true;
 }
 
@@ -1277,68 +1278,34 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
             }
         }
 
-        auto weightsReducer = calculateWeightsReducerFromDstStats(quant->_dst_quant);
-        if (!common::fp32eq(weightsReducer, 1.0f)) {
-            quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weightsReducer);
+        if (calculateWeightsReducerFromDstStats(quant->_dst_quant) > initial_weights_reducer_val) {
+            log::warning() << "Potential overload correction issue at layer " << wl->name;
         }
-
-        if (common::fp32eq(quant->_weights_quant.GetScale(), 0.0f) || std::isinf(quant->_weights_quant.GetScale())) {
-            quant->_weights_quant.SetScale(1.0f);
-        }
-
-        quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
     }
 
     return true;
 }
 
-bool ScaleFactorCalculator::ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer,
+bool ScaleFactorCalculator::ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gl,
                          ScaleFactorUpdateResult& result,
                          int infiniteLoopCount,
                          const Config& gna_config) const {
-    if ( !gemmLayer ) {
+    if (!gl) {
         THROW_GNA_EXCEPTION << "Incorrect Gemm Layer pointer \n";
     }
-    auto in0 = InferenceEngine::CNNNetPrevLayer(gemmLayer, 0);
-    auto in1 = InferenceEngine::CNNNetPrevLayer(gemmLayer, 1);
+    auto in0 = InferenceEngine::CNNNetPrevLayer(gl, 0);
+    auto in1 = InferenceEngine::CNNNetPrevLayer(gl, 1);
 
-    auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*gemmLayer);
+    auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*gl);
     auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
     auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
-    quantData->_src_quant.SetScale(quantParams0->_dst_quant.GetScale());
-    quantData->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
-    quantData->_dst_quant.SetScale(
-            quantData->_src_quant.GetScale() * quantData->_weights_quant.GetScale());
+    quant->_src_quant.SetScale(quantParams0->_dst_quant.GetScale());
+    quant->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
+    quant->_dst_quant.SetScale(quant->_src_quant.GetScale() * quant->_weights_quant.GetScale());
 
-    if (!quantData->_dst_quant.IsStatsSet()) {
-        return true;
-    }
-
-    // Adjust weights scale factor if output values exceed int32 maximum value
-    auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
-    if (LayerInfo(in0).isConst()) {
-        if (!common::fp32eq(weightsReducer, 1.0f)) {
-            quantParams0->_dst_quant.SetScale(quantData->_src_quant.GetScale() / weightsReducer);
-            quantData->_src_quant.SetScale(quantData->_src_quant.GetScale() / weightsReducer);
-        }
-        if (common::fp32eq(quantData->_src_quant.GetScale(), 0.0f) || std::isinf(quantData->_src_quant.GetScale())) {
-            quantParams0->_dst_quant.SetScale(1.0f);
-            quantData->_src_quant.SetScale(1.0f);
-        }
-
-        quantData->_dst_quant.SetScale(quantData->_weights_quant.GetScale() * quantData->_src_quant.GetScale());
-    } else {
-        if (!common::fp32eq(weightsReducer, 1.0f)) {
-            for (int i = 0; i < 2; ++i) {
-                auto input = InferenceEngine::CNNNetPrevLayer(gemmLayer, i);
-                auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(input);
-                float newOutputScale = quantParams->_dst_quant.GetScale() / weightsReducer;
-                if (requantizeInput(input, newOutputScale, result, infiniteLoopCount)) {
-                    return true;
-                }
-            }
-            THROW_GNA_EXCEPTION << "Unable to quantize " << gemmLayer->name;
-        }
+    if (quant->_dst_quant.IsStatsSet() &&
+        calculateWeightsReducerFromDstStats(quant->_dst_quant) > initial_weights_reducer_val) {
+        log::warning() << "Potential overload correction issue at layer " << gl->name;
     }
 
     return true;
diff --git a/src/plugins/intel_gna/tests/functional/pass_tests/convert_matmul_to_pointwise_conv.cpp b/src/plugins/intel_gna/tests/functional/pass_tests/convert_matmul_to_pointwise_conv.cpp
index 1a3d117004a..104bcf221ca 100644
--- a/src/plugins/intel_gna/tests/functional/pass_tests/convert_matmul_to_pointwise_conv.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/convert_matmul_to_pointwise_conv.cpp
@@ -8,6 +8,8 @@
 #include <vector>
 #include <string>
 
+#include <gmock/gmock.h>
+
 #include <ie_core.hpp>
 
 #include "common_test_utils/common_utils.hpp"
@@ -32,7 +34,7 @@ typedef std::tuple<
     std::map<std::string, std::string>, // Configuration
     std::vector<size_t>,                // Input Shape
     std::pair<float, float>             // Input Min and Max
-> convertMatmulToPointwiseConvWithFqParams;
+> ConvertMatmulToPointwiseConvWithFqNegParams;
 
 namespace LayerTestsDefinitions {
 
@@ -96,14 +98,14 @@ protected:
     }
 };
 
-class ConvertMatmulToPointwiseConvWithFq : public testing::WithParamInterface<convertMatmulToPointwiseConvWithFqParams>,
+class ConvertMatmulToPointwiseConvWithFqNeg : public testing::WithParamInterface<ConvertMatmulToPointwiseConvWithFqNegParams>,
     public LayerTestsUtils::LayerTestsCommon {
     float inputDataMin = -10.0f;
     float inputDataMax = 10.0f;
     float inputDataResolution = 1.0f;
 
 public:
-    static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvWithFqParams> obj) {
+    static std::string getTestCaseName(testing::TestParamInfo<ConvertMatmulToPointwiseConvWithFqNegParams> obj) {
         InferenceEngine::Precision netPrecision;
         std::string targetDevice;
         std::map<std::string, std::string> configuration;
@@ -184,8 +186,17 @@ TEST_P(ConvertMatmulToPointwiseConv, CompareWithRefImpl) {
     Run();
 };
 
-TEST_P(ConvertMatmulToPointwiseConvWithFq, CompareWithRefImpl) {
-    Run();
+TEST_P(ConvertMatmulToPointwiseConvWithFqNeg, CompareWithRefImpl) {
+    std::stringstream what;
+    std::streambuf* sbuf = std::cout.rdbuf();
+    std::streambuf* ebuf = std::cerr.rdbuf();
+    std::cout.rdbuf(what.rdbuf());
+    std::cerr.rdbuf(what.rdbuf());
+    LoadNetwork();
+    const auto expected = "Potential overload correction issue at layer ";
+    EXPECT_THAT(what.str(), ::testing::HasSubstr(expected));
+    std::cout.rdbuf(sbuf);
+    std::cerr.rdbuf(ebuf);
 };
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
@@ -194,8 +205,15 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };
 
 const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+    }
+};
+
+const std::vector<std::map<std::string, std::string>> configs_neg = {
     {
         {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"LOG_LEVEL", "LOG_WARNING"}
     }
 };
 
@@ -217,13 +235,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulTo
         ::testing::ValuesIn(inputShape)),
     ConvertMatmulToPointwiseConv::getTestCaseName);
 
-INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq,
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFqNeg,
     ::testing::Combine(
         ::testing::ValuesIn(netPrecisions),
         ::testing::Values(CommonTestUtils::DEVICE_GNA),
-        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(configs_neg),
         ::testing::ValuesIn(inputShape),
         ::testing::ValuesIn(fqStats)),
-    ConvertMatmulToPointwiseConvWithFq::getTestCaseName);
+    ConvertMatmulToPointwiseConvWithFqNeg::getTestCaseName);
 
 } // namespace LayerTestsDefinitions
diff --git a/src/plugins/intel_gna/tests/functional/pass_tests/fq_fusion_with_multiple_weights.cpp b/src/plugins/intel_gna/tests/functional/pass_tests/fq_fusion_with_multiple_weights.cpp
index 34645bc060f..37ada7aec0d 100644
--- a/src/plugins/intel_gna/tests/functional/pass_tests/fq_fusion_with_multiple_weights.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/fq_fusion_with_multiple_weights.cpp
@@ -72,8 +72,8 @@ protected:
         auto weights = ngraph::builder::makeConstant<float>(ngPrc, {outChannels, inputShape[1], 1, kernelSize},
             CommonTestUtils::generate_float_numbers(outChannels * inputShape[1] * kernelSize,
                                                     weightsMinMax.first, weightsMinMax.second));
-        auto weightsLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.first });
-        auto weightsHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.second });
+        auto weightsLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.first * 2 });
+        auto weightsHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.second * 2 });
         auto weightsFQ = std::make_shared<ngraph::opset7::FakeQuantize>(weights,
             weightsLowNode, weightsHighNode, weightsLowNode, weightsHighNode, levels);
 
diff --git a/src/plugins/intel_gna/tests/functional/pass_tests/fq_maxpool_reordering.cpp b/src/plugins/intel_gna/tests/functional/pass_tests/fq_maxpool_reordering.cpp
index 9d97cf9f41f..4ee58c12cc4 100644
--- a/src/plugins/intel_gna/tests/functional/pass_tests/fq_maxpool_reordering.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/fq_maxpool_reordering.cpp
@@ -96,8 +96,8 @@ protected:
             inputLowNode1, inputHighNode1, inputLowNode1, inputHighNode1, levels);
 
         auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
-        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1});
-        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1});
+        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1 * 35});
+        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1 * 35});
         auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
             convLowNode, convHighNode, convLowNode, convHighNode, levels);
         auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
@@ -148,7 +148,7 @@ const std::vector<std::map<std::string, std::string>> configs = {
 
 const std::vector<std::vector<size_t>> inputShape = {
     {1, 1, 1, 1024},
-    {1, 8, 1, 168},
+    {1, 8, 1, 168}
 };
 
 const std::vector<std::pair<float, float>> inputMinMax = {
@@ -156,11 +156,11 @@ const std::vector<std::pair<float, float>> inputMinMax = {
     {-2, 2},
     {-8, 8},
     {-5, 5},
-    {-17.5, 17.5},
+    {-17.5, 17.5}
 };
 
 const std::vector<size_t> levels = {
-    65535,
+    65535
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
diff --git a/src/plugins/intel_gna/tests/functional/scale_factors_tests/matmul_overload_correction.cpp b/src/plugins/intel_gna/tests/functional/scale_factors_tests/matmul_overload_correction.cpp
index 32ed52d44f1..44503926419 100644
--- a/src/plugins/intel_gna/tests/functional/scale_factors_tests/matmul_overload_correction.cpp
+++ b/src/plugins/intel_gna/tests/functional/scale_factors_tests/matmul_overload_correction.cpp
@@ -10,6 +10,8 @@
 
 #include <ie_core.hpp>
 
+#include <gmock/gmock.h>
+
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "shared_test_classes/base/layer_test_utils.hpp"
@@ -30,7 +32,7 @@ typedef std::tuple<
 
 namespace LayerTestsDefinitions {
 
-class MatMulOverloadCorrectionTest : public testing::WithParamInterface<matmulOverloadCorrectionParams>,
+class MatMulOverloadCorrectionNegTest : public testing::WithParamInterface<matmulOverloadCorrectionParams>,
     public LayerTestsUtils::LayerTestsCommon {
 public:
     static std::string getTestCaseName(testing::TestParamInfo<matmulOverloadCorrectionParams> obj) {
@@ -111,8 +113,17 @@ protected:
     const size_t levels32 = std::numeric_limits<uint32_t>::max();
 };
 
-TEST_P(MatMulOverloadCorrectionTest, CompareWithRefImpl) {
-    Run();
+TEST_P(MatMulOverloadCorrectionNegTest, CompareWithRefImpl) {
+    std::stringstream what;
+    std::streambuf* sbuf = std::cout.rdbuf();
+    std::streambuf* ebuf = std::cerr.rdbuf();
+    std::cout.rdbuf(what.rdbuf());
+    std::cerr.rdbuf(what.rdbuf());
+    LoadNetwork();
+    const auto expected = "Potential overload correction issue at layer ";
+    EXPECT_THAT(what.str(), ::testing::HasSubstr(expected));
+    std::cout.rdbuf(sbuf);
+    std::cerr.rdbuf(ebuf);
 };
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
@@ -122,7 +133,8 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 
 const std::vector<std::map<std::string, std::string>> configs = {
     {
-        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"LOG_LEVEL", "LOG_WARNING"}
     }
 };
 
@@ -131,13 +143,13 @@ const std::vector<std::vector<size_t>> inputShapes = {
     {1, 256}
 };
 
-INSTANTIATE_TEST_SUITE_P(smoke_base, MatMulOverloadCorrectionTest,
+INSTANTIATE_TEST_SUITE_P(smoke_base, MatMulOverloadCorrectionNegTest,
     ::testing::Combine(
         ::testing::ValuesIn(netPrecisions),
         ::testing::Values(CommonTestUtils::DEVICE_GNA),
         ::testing::ValuesIn(configs),
         ::testing::ValuesIn(inputShapes),
-        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true}),
         ::testing::ValuesIn({true, false})),
-    MatMulOverloadCorrectionTest::getTestCaseName);
+    MatMulOverloadCorrectionNegTest::getTestCaseName);
 } // namespace LayerTestsDefinitions
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_eltwise.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_eltwise.cpp
index b1111bbe26f..e8d30020d0d 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_eltwise.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_eltwise.cpp
@@ -13,7 +13,8 @@ using namespace SubgraphTestsDefinitions;
 namespace {
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
 };
 
 const std::vector<std::map<std::string, std::string>> configs = {
@@ -29,9 +30,12 @@ const size_t levels = 65535;
 
 const std::vector<std::vector<float>> inputParams = {{-10, 10, 1}};
 
+const float convFQValue = 2.0f;
+
 const auto fqParams = ::testing::Combine(
         ::testing::Values(levels),
-        ::testing::ValuesIn(inputParams)
+        ::testing::ValuesIn(inputParams),
+        ::testing::Values(convFQValue)
 );
 
 const std::vector<std::vector<size_t>> kernels = {{1, 3}};
diff --git a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_relu.cpp b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_relu.cpp
index ad58fa790df..415b3ac1368 100644
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_relu.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_relu.cpp
@@ -13,7 +13,8 @@ using namespace SubgraphTestsDefinitions;
 namespace {
 
 const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
 };
 
 const std::vector<std::map<std::string, std::string>> configs = {
@@ -29,9 +30,12 @@ const size_t levels = 65535;
 
 const std::vector<std::vector<float>> inputParams = {{-100, 100, 1}};
 
+const float convFQValue = 2.0f;
+
 const auto fqParams = ::testing::Combine(
         ::testing::Values(levels),
-        ::testing::ValuesIn(inputParams)
+        ::testing::ValuesIn(inputParams),
+        ::testing::Values(convFQValue)
 );
 
 const std::vector<std::vector<size_t>> kernels = {{1, 3}};
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_eltwise.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_eltwise.hpp
index d1f01752c8e..426fab10ddb 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_eltwise.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_eltwise.hpp
@@ -17,7 +17,8 @@ namespace SubgraphTestsDefinitions {
 
 typedef std::tuple<
         size_t,                           // levels
-        std::vector<float>                // input generator data: low, high, resolution
+        std::vector<float>,               // input generator data: low, high, resolution
+        float                             // convolution weights' FQ min and max value
 > FqSpecificParams;
 
 typedef std::tuple<
diff --git a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_relu.hpp b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_relu.hpp
index 6f0f1283e36..483133da3b0 100644
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_relu.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_relu.hpp
@@ -17,7 +17,8 @@ namespace SubgraphTestsDefinitions {
 
 typedef std::tuple<
         size_t,                           // levels
-        std::vector<float>                // input generator data: low, high, resolution
+        std::vector<float>,               // input generator data: low, high, resolution
+        float                             // convolution weights' FQ min and max value
 > FqSpecificParams;
 
 typedef std::tuple<
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_eltwise.cpp b/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_eltwise.cpp
index 9944d6c20b9..f4db6b6862f 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_eltwise.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_eltwise.cpp
@@ -17,7 +17,8 @@ std::string ConvFqEltwiseTest::getTestCaseName(const testing::TestParamInfo<Conv
 
     size_t levels;
     std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;
 
     std::vector<size_t> kernelShape;
     std::vector<size_t> strides;
@@ -36,6 +37,7 @@ std::string ConvFqEltwiseTest::getTestCaseName(const testing::TestParamInfo<Conv
      if (inputArg.size() == 3) {
         result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
     }
+    result << "_convFQ=" << convFQValue;
     result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
     result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
     result << "IC=" << inputChannels << "_";
@@ -54,7 +56,8 @@ void ConvFqEltwiseTest::SetUp() {
 
     size_t levels;
     std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;
     if (inputArg.size() == 3) {
         inputDataMin = inputArg[0];
         inputDataMax = inputArg[1];
@@ -80,8 +83,10 @@ void ConvFqEltwiseTest::SetUp() {
     float weightVal = 0.2;
     auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outputChannels, inputChannels, kernelShape[0], kernelShape[1]},
                                                                   { weightVal });
-    auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{-weightVal});
-    auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{weightVal});
+    auto convLowNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{-convFQValue});
+    auto convHighNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{convFQValue});
     auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
         convLowNode, convHighNode, convLowNode, convHighNode, levels);
     auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
diff --git a/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_relu.cpp b/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_relu.cpp
index e0a2cdd7e80..c2d7d923209 100644
--- a/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_relu.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_relu.cpp
@@ -17,7 +17,8 @@ std::string ConvFqReluTest::getTestCaseName(const testing::TestParamInfo<ConvFqR
 
     size_t levels;
     std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;
 
     std::vector<size_t> kernelShape;
     std::vector<size_t> strides;
@@ -36,6 +37,7 @@ std::string ConvFqReluTest::getTestCaseName(const testing::TestParamInfo<ConvFqR
      if (inputArg.size() == 3) {
         result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
     }
+    result << "_convFQ=" << convFQValue;
     result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
     result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
     result << "IC=" << inputChannels << "_";
@@ -54,7 +56,8 @@ void ConvFqReluTest::SetUp() {
 
     size_t levels;
     std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;
     if (inputArg.size() == 3) {
         inputDataMin = inputArg[0];
         inputDataMax = inputArg[1];
@@ -80,8 +83,10 @@ void ConvFqReluTest::SetUp() {
     float weightVal = 0.2;
     auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outputChannels, inputChannels, kernelShape[0], kernelShape[1]},
                                                                   { weightVal });
-    auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{-weightVal});
-    auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{weightVal});
+    auto convLowNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{-convFQValue});
+    auto convHighNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{convFQValue});
     auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
         convLowNode, convHighNode, convLowNode, convHighNode, levels);
     auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);