[GNA] More precized calculation of Log pwl and pwl with sf < 1 (#7884)

* [GNA] More precized calculation of Log pwl and pwl with sf < 1 * [GNA] Added tests * [GNA] Types correction * [GNA] Added comment for absolute threshold calculation in the test
2021-10-13 14:14:52 +03:00 · 2021-10-13 14:14:52 +03:00 · b746c734f8
commit b746c734f8
parent 02f3a175d0
2 changed files with 195 additions and 18 deletions
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@ -238,6 +238,57 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
    const double pow_domain = 16;
 protected :
    /**
     * @brief Adjust output scale factor to get the most precise PWL slope.
     * NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
     *       For all other layers, it does not improve accuracy.
     * @param sf Scale factor to be adjusted
     * @param layer Layer information
     * @param quantizedParams Quantization parameters
     * @return the adjusted scale factor
     */
    float adjustScaleFactor(float sf, InferenceEngine::CNNLayer const* cnnLayer,
                            GNAPluginNS::LayerInfo const& layer,
                            QuantizedLayerParams* quantizedParams) {
        auto get_rank = [](uint32_t value) {
            uint8_t rank = 0;
            while (value >= 1) {
                ++rank;
                value /= 10;
            }
            return rank;
        };
        auto pow_10 = [](uint8_t degree) {
            uint32_t value = 1;
            for (uint8_t i = 0; i < degree; ++i) {
                value *= 10;
            }
            return value;
        };
        auto slopes = getPWLSlopes(layer);
        if (!slopes.empty()) {
            auto div = 10;
            auto startRange = sf > 1.0f ? static_cast<uint32_t>(sf) : sf;
            auto endRange = startRange - startRange / div;
            endRange = endRange > 1.0f ? static_cast<uint32_t>(endRange) : endRange;
            uint32_t steps = 10000;
            uint32_t rangeSize = static_cast<uint32_t>(startRange - endRange);
            if (rangeSize >= 1) {
                steps *= rangeSize / pow_10(get_rank(rangeSize) - 1);
            }
            auto scaleFactors = generateScaleFactors(startRange, endRange, steps);
            auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
            if (!fp32eq(sf, newScaleFactor) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
                gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
                    << " from: " << sf << " to: " << newScaleFactor << "\n";
                sf = newScaleFactor;
            }
        }
        return sf;
    }
    float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
                             GNAPluginNS::LayerInfo const& layer,
                             int inputsSize,
@ -418,24 +469,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
                }
            }
-            // Adjust output scale factor to get the most precise PWL slope.
+            if (!usePrevScaleFactor) {
-            // NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
+                result = adjustScaleFactor(result, cnnLayer, layer, quantizedParams);
            //       For all other layers, it does not improve accuracy.
            auto slopes = getPWLSlopes(layer);
            if (!slopes.empty() && !usePrevScaleFactor) {
                auto div = 10;
                auto mul = 10;
                auto startRange = result > 1.0f ? static_cast<int32_t>(result) : result;
                auto endRange = startRange - startRange / div;
                endRange = endRange > 1.0f ? static_cast<int32_t>(endRange) : endRange;
                auto scaleFactors = generateScaleFactors(startRange, endRange, static_cast<int32_t>(startRange - endRange) * mul);
                auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
                if (!fp32eq(result, newScaleFactor) &&
                    !fp32eq(newScaleFactor, 1.0f) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
                    gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
                        << " from: " << result << " to: " << newScaleFactor << "\n";
                    result = newScaleFactor;
                }
            }
        }
--- a/inference-engine/tests/functional/plugin/gna/scale_factors_tests/test_fq_scale_factors.cpp
+++ b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/test_fq_scale_factors.cpp
@ -0,0 +1,142 @@
 // Copyright (C) 2021 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include <vector>
 #include <memory>
 #include <tuple>
 #include <vector>
 #include <string>
 #include <ie_core.hpp>
 #include "common_test_utils/common_utils.hpp"
 #include "functional_test_utils/plugin_cache.hpp"
 #include "shared_test_classes/base/layer_test_utils.hpp"
 #include "functional_test_utils/blob_utils.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 #include "ngraph_functions/builders.hpp"
 #include "ngraph_functions/pass/convert_prc.hpp"
 typedef std::tuple<
    InferenceEngine::Precision,         // Network Precision
    std::string,                        // Target Device
    std::map<std::string, std::string>, // Configuration
    std::pair<float, float>             // Input values
 > fqScaleFactorParams;
 namespace LayerTestsDefinitions {
 class TestFQScaleFactorsTest : public testing::WithParamInterface<fqScaleFactorParams>,
    public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<fqScaleFactorParams> obj) {
        InferenceEngine::Precision netPrecision;
        std::string targetDevice;
        std::map<std::string, std::string> configuration;
        std::pair<float, float> inputValues;
        std::tie(netPrecision, targetDevice, configuration, inputValues) = obj.param;
        std::ostringstream result;
        result << "netPRC=" << netPrecision.name() << "_";
        result << "targetDevice=" << targetDevice << "_";
        for (auto const& configItem : configuration) {
            result << "_configItem=" << configItem.first << "_" << configItem.second;
        }
        result << "_range=(" << inputValues.first << ", " << inputValues.second << ")";
        return result.str();
    }
 protected:
    void SetUp() override {
        InferenceEngine::Precision netPrecision;
        std::pair<float, float> inputValues;
        std::tie(netPrecision, targetDevice, configuration, inputValues) = this->GetParam();
        std::tie(inputDataMin, inputDataMax) = inputValues;
        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
        const ngraph::Shape shape = {1, 128};
        auto params = ngraph::builder::makeParams(ngPrc, {shape});
        auto lowNodeIn = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMin });
        auto highNodeIn = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax });
        auto fqIn = std::make_shared<ngraph::opset8::FakeQuantize>(params[0], lowNodeIn, highNodeIn,
            lowNodeIn, highNodeIn, levels);
        auto mul = std::make_shared<ngraph::opset8::Multiply>(fqIn, params[0]);
        auto lowNodeOut = ngraph::builder::makeConstant<float>(ngPrc, {1}, { -inputDataMin * inputDataMin });
        auto highNodeOut = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax * inputDataMax });
        auto fqOut = std::make_shared<ngraph::opset8::FakeQuantize>(mul, lowNodeOut, highNodeOut,
            lowNodeOut, highNodeOut, levels);
        ngraph::ResultVector results{std::make_shared<ngraph::opset8::Result>(fqOut)};
        function = std::make_shared<ngraph::Function>(results, params, "FQWithSmallScaleFactor");
        functionRefs = ngraph::clone_function(*function);
    }
    float inputDataMax = 1.0;
    float inputDataMin = -1.0;
    size_t levels = std::numeric_limits<uint16_t>::max();
 };
 TEST_P(TestFQScaleFactorsTest, CompareWithRefImpl) {
    LoadNetwork();
    GenerateInputs();
    Infer();
    auto refs = CalculateRefs();
    auto results = GetOutputs();
    const auto expected = reinterpret_cast<const float*>(refs.front().second.data());
    size_t size = results.front()->size();
    auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(results.front());
    IE_ASSERT(memory);
    const auto lockedMemory = memory->wmap();
    const auto actualBuffer = lockedMemory.as<const float*>();
    /* the absolute threshold is calculated as 1.25 * (1 / last_fq_out_scale_factor) = 1.25 * (2 * maxValue) / (levels - 1),
    the most of accuracy degradation in this model is introduced by the output scale factor of FakeQuantize,
    1 / sf is a part of the value which can be represented by one level, so we can't get more accurate resolution than this part,
    maxValue = inputDataMax * inputDataMax since this model multiplies input values with itself,
    1.25 is a reserve factor to cover other errors in this model */
    abs_threshold = 2.5 * inputDataMax * inputDataMax / (levels - 1);
    for (size_t i = 0; i < size; ++i) {
        const auto &ref = expected[i];
        const auto &res = actualBuffer[i];
        if (CommonTestUtils::ie_abs(res - ref) > abs_threshold) {
            IE_THROW() << "Absolute comparison of values expected: " << ref << " and actual: " << res
                        << " at index " << i << " with absolute threshold " << abs_threshold
                        << " failed";
        }
    }
 };
 const std::vector<InferenceEngine::Precision> netPrecisions = {
    InferenceEngine::Precision::FP32,
    InferenceEngine::Precision::FP16
 };
 const std::vector<std::map<std::string, std::string>> configs = {
    {
        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
    }
 };
 const std::vector<std::pair<float, float>> inputValues = {
    {-188.0, 188.0},
    {-90.0, 90.0},
    {-20.0, 20.0},
    {-10.0, 10.0}
 };
 INSTANTIATE_TEST_SUITE_P(smoke_base, TestFQScaleFactorsTest,
    ::testing::Combine(
        ::testing::ValuesIn(netPrecisions),
        ::testing::Values(CommonTestUtils::DEVICE_GNA),
        ::testing::ValuesIn(configs),
        ::testing::ValuesIn(inputValues)),
    TestFQScaleFactorsTest::getTestCaseName);
 } // namespace LayerTestsDefinitions