[GNA] Remove internal overload correction algorithm (#14428)

2022-12-13 06:02:03 +01:00 · 2022-12-13 06:02:03 +01:00 · 8ccabc546f
commit 8ccabc546f
parent 9cdea2aa73
11 changed files with 105 additions and 88 deletions
--- a/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
+++ b/src/plugins/intel_gna/src/frontend/scale_factor_calc.cpp
@ -24,6 +24,7 @@ constexpr float k_identity = 6;
 constexpr double pow_domain = 16;
 constexpr float min_search_weights_val = 1.0f;
 constexpr float max_search_weights_val = 1024.0f;
+constexpr double initial_weights_reducer_val = 1.0;

 float GetScaleFactor(InferenceEngine::CNNLayerPtr layer, QuantizedDataType data_type) {
    IE_ASSERT(layer != nullptr);
@ -207,10 +208,9 @@ std::vector<float> ScaleFactorCalculator::generateScaleFactors(float startRange,
 double ScaleFactorCalculator::calculateWeightsReducerFromDstStats(QuantizationParams dst_quant) {
    auto maxAbsVal = std::max(std::abs(dst_quant.GetMinValues().front()),
        std::abs(dst_quant.GetMaxValues().front()));
-
    auto maxIntVal = static_cast<int64_t>(maxAbsVal * dst_quant.GetScale() + 0.5f);
    double weightsReducer = static_cast<double>(maxIntVal) / std::numeric_limits<int32_t>::max();
-    weightsReducer = std::max(1.0, weightsReducer);
+    weightsReducer = std::max(initial_weights_reducer_val, weightsReducer);
    return weightsReducer;
 }

@ -894,16 +894,15 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
                if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
                    return true;
                }
-                // we unable to rescale the input - results might be bad
+                // Unable to rescale the input - results might be bad
                log::warning() << "[INFO] weights saturated for " << eltwiseLayer->name << "\n";
            }

            if (!quantData->_dst_quant.IsStatsSet()) {
                return true;
            }
-
            auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
-            if (!common::fp32eq(weightsReducer, 1.0f)) {
+            if (weightsReducer > initial_weights_reducer_val) {
                float newOutputScale = quantParams1->_dst_quant.GetScale() / weightsReducer;
                if (requantizeInput(in1, newOutputScale, result, infiniteLoopCount)) {
                    return true;
@ -912,8 +911,10 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerEltwise(InferenceEngine::EltwiseL
            }
        }
        break;
+
        default : THROW_GNA_EXCEPTION << "Unsupported Eltwise layer for quantisation: " << eltwiseLayer->_operation;
    }
+
    return true;
 }

@ -1277,68 +1278,34 @@ bool ScaleFactorCalculator::ScaleFactorPerLayerWeightable(InferenceEngine::Weigh
            }
        }

-        auto weightsReducer = calculateWeightsReducerFromDstStats(quant->_dst_quant);
-        if (!common::fp32eq(weightsReducer, 1.0f)) {
-            quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weightsReducer);
+        if (calculateWeightsReducerFromDstStats(quant->_dst_quant) > initial_weights_reducer_val) {
+            log::warning() << "Potential overload correction issue at layer " << wl->name;
        }
-
-        if (common::fp32eq(quant->_weights_quant.GetScale(), 0.0f) || std::isinf(quant->_weights_quant.GetScale())) {
-            quant->_weights_quant.SetScale(1.0f);
-        }
-
-        quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
    }

    return true;
 }

-bool ScaleFactorCalculator::ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gemmLayer,
+bool ScaleFactorCalculator::ScaleFactorPerLayerGemm(InferenceEngine::GemmLayer* gl,
                         ScaleFactorUpdateResult& result,
                         int infiniteLoopCount,
                         const Config& gna_config) const {
-    if ( !gemmLayer ) {
+    if (!gl) {
        THROW_GNA_EXCEPTION << "Incorrect Gemm Layer pointer \n";
    }
-    auto in0 = InferenceEngine::CNNNetPrevLayer(gemmLayer, 0);
-    auto in1 = InferenceEngine::CNNNetPrevLayer(gemmLayer, 1);
+    auto in0 = InferenceEngine::CNNNetPrevLayer(gl, 0);
+    auto in1 = InferenceEngine::CNNNetPrevLayer(gl, 1);

-    auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*gemmLayer);
+    auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*gl);
    auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
    auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
-    quantData->_src_quant.SetScale(quantParams0->_dst_quant.GetScale());
-    quantData->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
-    quantData->_dst_quant.SetScale(
-            quantData->_src_quant.GetScale() * quantData->_weights_quant.GetScale());
+    quant->_src_quant.SetScale(quantParams0->_dst_quant.GetScale());
+    quant->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
+    quant->_dst_quant.SetScale(quant->_src_quant.GetScale() * quant->_weights_quant.GetScale());

-    if (!quantData->_dst_quant.IsStatsSet()) {
-        return true;
-    }
-
-    // Adjust weights scale factor if output values exceed int32 maximum value
-    auto weightsReducer = calculateWeightsReducerFromDstStats(quantData->_dst_quant);
-    if (LayerInfo(in0).isConst()) {
-        if (!common::fp32eq(weightsReducer, 1.0f)) {
-            quantParams0->_dst_quant.SetScale(quantData->_src_quant.GetScale() / weightsReducer);
-            quantData->_src_quant.SetScale(quantData->_src_quant.GetScale() / weightsReducer);
-        }
-        if (common::fp32eq(quantData->_src_quant.GetScale(), 0.0f) || std::isinf(quantData->_src_quant.GetScale())) {
-            quantParams0->_dst_quant.SetScale(1.0f);
-            quantData->_src_quant.SetScale(1.0f);
-        }
-
-        quantData->_dst_quant.SetScale(quantData->_weights_quant.GetScale() * quantData->_src_quant.GetScale());
-    } else {
-        if (!common::fp32eq(weightsReducer, 1.0f)) {
-            for (int i = 0; i < 2; ++i) {
-                auto input = InferenceEngine::CNNNetPrevLayer(gemmLayer, i);
-                auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(input);
-                float newOutputScale = quantParams->_dst_quant.GetScale() / weightsReducer;
-                if (requantizeInput(input, newOutputScale, result, infiniteLoopCount)) {
-                    return true;
-                }
-            }
-            THROW_GNA_EXCEPTION << "Unable to quantize " << gemmLayer->name;
-        }
+    if (quant->_dst_quant.IsStatsSet() &&
+        calculateWeightsReducerFromDstStats(quant->_dst_quant) > initial_weights_reducer_val) {
+        log::warning() << "Potential overload correction issue at layer " << gl->name;
    }

    return true;
--- a/src/plugins/intel_gna/tests/functional/pass_tests/convert_matmul_to_pointwise_conv.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/convert_matmul_to_pointwise_conv.cpp
@ -8,6 +8,8 @@
 #include <vector>
 #include <string>

+#include <gmock/gmock.h>
+
 #include <ie_core.hpp>

 #include "common_test_utils/common_utils.hpp"
@ -32,7 +34,7 @@ typedef std::tuple<
    std::map<std::string, std::string>, // Configuration
    std::vector<size_t>,                // Input Shape
    std::pair<float, float>             // Input Min and Max
-> convertMatmulToPointwiseConvWithFqParams;
+> ConvertMatmulToPointwiseConvWithFqNegParams;

 namespace LayerTestsDefinitions {

@ -96,14 +98,14 @@ protected:
    }
 };

-class ConvertMatmulToPointwiseConvWithFq : public testing::WithParamInterface<convertMatmulToPointwiseConvWithFqParams>,
+class ConvertMatmulToPointwiseConvWithFqNeg : public testing::WithParamInterface<ConvertMatmulToPointwiseConvWithFqNegParams>,
    public LayerTestsUtils::LayerTestsCommon {
    float inputDataMin = -10.0f;
    float inputDataMax = 10.0f;
    float inputDataResolution = 1.0f;

 public:
-    static std::string getTestCaseName(testing::TestParamInfo<convertMatmulToPointwiseConvWithFqParams> obj) {
+    static std::string getTestCaseName(testing::TestParamInfo<ConvertMatmulToPointwiseConvWithFqNegParams> obj) {
        InferenceEngine::Precision netPrecision;
        std::string targetDevice;
        std::map<std::string, std::string> configuration;
@ -184,8 +186,17 @@ TEST_P(ConvertMatmulToPointwiseConv, CompareWithRefImpl) {
    Run();
 };

-TEST_P(ConvertMatmulToPointwiseConvWithFq, CompareWithRefImpl) {
-    Run();
+TEST_P(ConvertMatmulToPointwiseConvWithFqNeg, CompareWithRefImpl) {
+    std::stringstream what;
+    std::streambuf* sbuf = std::cout.rdbuf();
+    std::streambuf* ebuf = std::cerr.rdbuf();
+    std::cout.rdbuf(what.rdbuf());
+    std::cerr.rdbuf(what.rdbuf());
+    LoadNetwork();
+    const auto expected = "Potential overload correction issue at layer ";
+    EXPECT_THAT(what.str(), ::testing::HasSubstr(expected));
+    std::cout.rdbuf(sbuf);
+    std::cerr.rdbuf(ebuf);
 };

 const std::vector<InferenceEngine::Precision> netPrecisions = {
@ -194,8 +205,15 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {
 };

 const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+    }
+};
+
+const std::vector<std::map<std::string, std::string>> configs_neg = {
    {
        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"LOG_LEVEL", "LOG_WARNING"}
    }
 };

@ -217,13 +235,13 @@ INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulTo
        ::testing::ValuesIn(inputShape)),
    ConvertMatmulToPointwiseConv::getTestCaseName);

-INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFq,
+INSTANTIATE_TEST_SUITE_P(smoke_ConvertMatmulToPointwiseConvTest, ConvertMatmulToPointwiseConvWithFqNeg,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
        ::testing::Values(CommonTestUtils::DEVICE_GNA),
-        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(configs_neg),
        ::testing::ValuesIn(inputShape),
        ::testing::ValuesIn(fqStats)),
-    ConvertMatmulToPointwiseConvWithFq::getTestCaseName);
+    ConvertMatmulToPointwiseConvWithFqNeg::getTestCaseName);

 } // namespace LayerTestsDefinitions
--- a/src/plugins/intel_gna/tests/functional/pass_tests/fq_fusion_with_multiple_weights.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/fq_fusion_with_multiple_weights.cpp
@ -72,8 +72,8 @@ protected:
        auto weights = ngraph::builder::makeConstant<float>(ngPrc, {outChannels, inputShape[1], 1, kernelSize},
            CommonTestUtils::generate_float_numbers(outChannels * inputShape[1] * kernelSize,
                                                    weightsMinMax.first, weightsMinMax.second));
-        auto weightsLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.first });
-        auto weightsHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.second });
+        auto weightsLowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.first * 2 });
+        auto weightsHighNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { weightsMinMax.second * 2 });
        auto weightsFQ = std::make_shared<ngraph::opset7::FakeQuantize>(weights,
            weightsLowNode, weightsHighNode, weightsLowNode, weightsHighNode, levels);

--- a/src/plugins/intel_gna/tests/functional/pass_tests/fq_maxpool_reordering.cpp
+++ b/src/plugins/intel_gna/tests/functional/pass_tests/fq_maxpool_reordering.cpp
@ -96,8 +96,8 @@ protected:
            inputLowNode1, inputHighNode1, inputLowNode1, inputHighNode1, levels);

        auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {8, inputShape[1], 1, 8}, { 1.0f });
-        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1});
-        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1});
+        auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMin1 * 35});
+        auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{inputDataMax1 * 35});
        auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
            convLowNode, convHighNode, convLowNode, convHighNode, levels);
        auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
@ -148,7 +148,7 @@ const std::vector<std::map<std::string, std::string>> configs = {

 const std::vector<std::vector<size_t>> inputShape = {
    {1, 1, 1, 1024},
-    {1, 8, 1, 168},
+    {1, 8, 1, 168}
 };

 const std::vector<std::pair<float, float>> inputMinMax = {
@ -156,11 +156,11 @@ const std::vector<std::pair<float, float>> inputMinMax = {
    {-2, 2},
    {-8, 8},
    {-5, 5},
-    {-17.5, 17.5},
+    {-17.5, 17.5}
 };

 const std::vector<size_t> levels = {
-    65535,
+    65535
 };

 INSTANTIATE_TEST_SUITE_P(smoke_fq_maxpool_reordering, FQMaxpoolReordering,
--- a/src/plugins/intel_gna/tests/functional/scale_factors_tests/matmul_overload_correction.cpp
+++ b/src/plugins/intel_gna/tests/functional/scale_factors_tests/matmul_overload_correction.cpp
@ -10,6 +10,8 @@

 #include <ie_core.hpp>

+#include <gmock/gmock.h>
+
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "shared_test_classes/base/layer_test_utils.hpp"
@ -30,7 +32,7 @@ typedef std::tuple<

 namespace LayerTestsDefinitions {

-class MatMulOverloadCorrectionTest : public testing::WithParamInterface<matmulOverloadCorrectionParams>,
+class MatMulOverloadCorrectionNegTest : public testing::WithParamInterface<matmulOverloadCorrectionParams>,
    public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<matmulOverloadCorrectionParams> obj) {
@ -111,8 +113,17 @@ protected:
    const size_t levels32 = std::numeric_limits<uint32_t>::max();
 };

-TEST_P(MatMulOverloadCorrectionTest, CompareWithRefImpl) {
-    Run();
+TEST_P(MatMulOverloadCorrectionNegTest, CompareWithRefImpl) {
+    std::stringstream what;
+    std::streambuf* sbuf = std::cout.rdbuf();
+    std::streambuf* ebuf = std::cerr.rdbuf();
+    std::cout.rdbuf(what.rdbuf());
+    std::cerr.rdbuf(what.rdbuf());
+    LoadNetwork();
+    const auto expected = "Potential overload correction issue at layer ";
+    EXPECT_THAT(what.str(), ::testing::HasSubstr(expected));
+    std::cout.rdbuf(sbuf);
+    std::cerr.rdbuf(ebuf);
 };

 const std::vector<InferenceEngine::Precision> netPrecisions = {
@ -122,7 +133,8 @@ const std::vector<InferenceEngine::Precision> netPrecisions = {

 const std::vector<std::map<std::string, std::string>> configs = {
    {
-        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+        {"LOG_LEVEL", "LOG_WARNING"}
    }
 };

@ -131,13 +143,13 @@ const std::vector<std::vector<size_t>> inputShapes = {
    {1, 256}
 };

-INSTANTIATE_TEST_SUITE_P(smoke_base, MatMulOverloadCorrectionTest,
+INSTANTIATE_TEST_SUITE_P(smoke_base, MatMulOverloadCorrectionNegTest,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
        ::testing::Values(CommonTestUtils::DEVICE_GNA),
        ::testing::ValuesIn(configs),
        ::testing::ValuesIn(inputShapes),
-        ::testing::ValuesIn({true, false}),
+        ::testing::ValuesIn({true}),
        ::testing::ValuesIn({true, false})),
-    MatMulOverloadCorrectionTest::getTestCaseName);
+    MatMulOverloadCorrectionNegTest::getTestCaseName);
 } // namespace LayerTestsDefinitions
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_eltwise.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_eltwise.cpp
@ -13,7 +13,8 @@ using namespace SubgraphTestsDefinitions;
 namespace {

 const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16
 };

 const std::vector<std::map<std::string, std::string>> configs = {
@ -29,9 +30,12 @@ const size_t levels = 65535;

 const std::vector<std::vector<float>> inputParams = {{-10, 10, 1}};

+const float convFQValue = 2.0f;
+
 const auto fqParams = ::testing::Combine(
        ::testing::Values(levels),
-        ::testing::ValuesIn(inputParams)
+        ::testing::ValuesIn(inputParams),
+        ::testing::Values(convFQValue)
 );

 const std::vector<std::vector<size_t>> kernels = {{1, 3}};
--- a/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_relu.cpp
+++ b/src/plugins/intel_gna/tests/functional/shared_tests_instances/subgraph_tests/conv_fq_relu.cpp
@ -13,7 +13,8 @@ using namespace SubgraphTestsDefinitions;
 namespace {

 const std::vector<InferenceEngine::Precision> netPrecisions = {
-        InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16,
+        InferenceEngine::Precision::FP32,
+        InferenceEngine::Precision::FP16,
 };

 const std::vector<std::map<std::string, std::string>> configs = {
@ -29,9 +30,12 @@ const size_t levels = 65535;

 const std::vector<std::vector<float>> inputParams = {{-100, 100, 1}};

+const float convFQValue = 2.0f;
+
 const auto fqParams = ::testing::Combine(
        ::testing::Values(levels),
-        ::testing::ValuesIn(inputParams)
+        ::testing::ValuesIn(inputParams),
+        ::testing::Values(convFQValue)
 );

 const std::vector<std::vector<size_t>> kernels = {{1, 3}};
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_eltwise.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_eltwise.hpp
@ -17,7 +17,8 @@ namespace SubgraphTestsDefinitions {

 typedef std::tuple<
        size_t,                           // levels
-        std::vector<float>                // input generator data: low, high, resolution
+        std::vector<float>,               // input generator data: low, high, resolution
+        float                             // convolution weights' FQ min and max value
 > FqSpecificParams;

 typedef std::tuple<
--- a/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_relu.hpp
+++ b/src/tests/functional/shared_test_classes/include/shared_test_classes/subgraph/conv_fq_relu.hpp
@ -17,7 +17,8 @@ namespace SubgraphTestsDefinitions {

 typedef std::tuple<
        size_t,                           // levels
-        std::vector<float>                // input generator data: low, high, resolution
+        std::vector<float>,               // input generator data: low, high, resolution
+        float                             // convolution weights' FQ min and max value
 > FqSpecificParams;

 typedef std::tuple<
--- a/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_eltwise.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_eltwise.cpp
@ -17,7 +17,8 @@ std::string ConvFqEltwiseTest::getTestCaseName(const testing::TestParamInfo<Conv

    size_t levels;
    std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;

    std::vector<size_t> kernelShape;
    std::vector<size_t> strides;
@ -36,6 +37,7 @@ std::string ConvFqEltwiseTest::getTestCaseName(const testing::TestParamInfo<Conv
     if (inputArg.size() == 3) {
        result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
    }
+    result << "_convFQ=" << convFQValue;
    result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
    result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
    result << "IC=" << inputChannels << "_";
@ -54,7 +56,8 @@ void ConvFqEltwiseTest::SetUp() {

    size_t levels;
    std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;
    if (inputArg.size() == 3) {
        inputDataMin = inputArg[0];
        inputDataMax = inputArg[1];
@ -80,8 +83,10 @@ void ConvFqEltwiseTest::SetUp() {
    float weightVal = 0.2;
    auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outputChannels, inputChannels, kernelShape[0], kernelShape[1]},
                                                                  { weightVal });
-    auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{-weightVal});
-    auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{weightVal});
+    auto convLowNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{-convFQValue});
+    auto convHighNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{convFQValue});
    auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
        convLowNode, convHighNode, convLowNode, convHighNode, levels);
    auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);
--- a/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_relu.cpp
+++ b/src/tests/functional/shared_test_classes/src/subgraph/conv_fq_relu.cpp
@ -17,7 +17,8 @@ std::string ConvFqReluTest::getTestCaseName(const testing::TestParamInfo<ConvFqR

    size_t levels;
    std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;

    std::vector<size_t> kernelShape;
    std::vector<size_t> strides;
@ -36,6 +37,7 @@ std::string ConvFqReluTest::getTestCaseName(const testing::TestParamInfo<ConvFqR
     if (inputArg.size() == 3) {
        result << "_inputArg=" << inputArg[0] << "_" << inputArg[1] << "_" << inputArg[2];
    }
+    result << "_convFQ=" << convFQValue;
    result << "_KERNEL=" << CommonTestUtils::vec2str(kernelShape) << "_";
    result << "STRIDES=" << CommonTestUtils::vec2str(strides) << "_";
    result << "IC=" << inputChannels << "_";
@ -54,7 +56,8 @@ void ConvFqReluTest::SetUp() {

    size_t levels;
    std::vector<float> inputArg;
-    std::tie(levels, inputArg) = fqParams;
+    float convFQValue;
+    std::tie(levels, inputArg, convFQValue) = fqParams;
    if (inputArg.size() == 3) {
        inputDataMin = inputArg[0];
        inputDataMax = inputArg[1];
@ -80,8 +83,10 @@ void ConvFqReluTest::SetUp() {
    float weightVal = 0.2;
    auto filterWeightsNode = ngraph::builder::makeConstant<float>(ngPrc, {outputChannels, inputChannels, kernelShape[0], kernelShape[1]},
                                                                  { weightVal });
-    auto convLowNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{-weightVal});
-    auto convHighNode = ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{ 1 }, std::vector<float>{weightVal});
+    auto convLowNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{-convFQValue});
+    auto convHighNode =
+        ngraph::builder::makeConstant(ngraph::element::f32, std::vector<size_t>{1}, std::vector<float>{convFQValue});
    auto convWeightsFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(filterWeightsNode,
        convLowNode, convHighNode, convLowNode, convHighNode, levels);
    auto convWeightsFQ = std::dynamic_pointer_cast<ngraph::opset1::FakeQuantize>(convWeightsFQNode);