diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp index 419a14252fc..64df0442f29 100644 --- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp +++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp @@ -238,6 +238,57 @@ class ScaleFactorPerLayer { const double pow_domain = 16; protected : + /** + * @brief Adjust output scale factor to get the most precise PWL slope. + * NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers. + * For all other layers, it does not improve accuracy. + * @param sf Scale factor to be adjusted + * @param layer Layer information + * @param quantizedParams Quantization parameters + * @return the adjusted scale factor + */ + float adjustScaleFactor(float sf, InferenceEngine::CNNLayer const* cnnLayer, + GNAPluginNS::LayerInfo const& layer, + QuantizedLayerParams* quantizedParams) { + auto get_rank = [](uint32_t value) { + uint8_t rank = 0; + while (value >= 1) { + ++rank; + value /= 10; + } + return rank; + }; + auto pow_10 = [](uint8_t degree) { + uint32_t value = 1; + for (uint8_t i = 0; i < degree; ++i) { + value *= 10; + } + return value; + }; + + auto slopes = getPWLSlopes(layer); + if (!slopes.empty()) { + auto div = 10; + auto startRange = sf > 1.0f ? static_cast(sf) : sf; + auto endRange = startRange - startRange / div; + endRange = endRange > 1.0f ? static_cast(endRange) : endRange; + uint32_t steps = 10000; + uint32_t rangeSize = static_cast(startRange - endRange); + if (rangeSize >= 1) { + steps *= rangeSize / pow_10(get_rank(rangeSize) - 1); + } + + auto scaleFactors = generateScaleFactors(startRange, endRange, steps); + auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes); + if (!fp32eq(sf, newScaleFactor) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) { + gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name + << " from: " << sf << " to: " << newScaleFactor << "\n"; + sf = newScaleFactor; + } + } + return sf; + } + float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer, GNAPluginNS::LayerInfo const& layer, int inputsSize, @@ -418,24 +469,8 @@ class ScaleFactorPerLayer { } } - // Adjust output scale factor to get the most precise PWL slope. - // NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers. - // For all other layers, it does not improve accuracy. - auto slopes = getPWLSlopes(layer); - if (!slopes.empty() && !usePrevScaleFactor) { - auto div = 10; - auto mul = 10; - auto startRange = result > 1.0f ? static_cast(result) : result; - auto endRange = startRange - startRange / div; - endRange = endRange > 1.0f ? static_cast(endRange) : endRange; - auto scaleFactors = generateScaleFactors(startRange, endRange, static_cast(startRange - endRange) * mul); - auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes); - if (!fp32eq(result, newScaleFactor) && - !fp32eq(newScaleFactor, 1.0f) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) { - gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name - << " from: " << result << " to: " << newScaleFactor << "\n"; - result = newScaleFactor; - } + if (!usePrevScaleFactor) { + result = adjustScaleFactor(result, cnnLayer, layer, quantizedParams); } } diff --git a/inference-engine/tests/functional/plugin/gna/scale_factors_tests/test_fq_scale_factors.cpp b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/test_fq_scale_factors.cpp new file mode 100644 index 00000000000..599e5692324 --- /dev/null +++ b/inference-engine/tests/functional/plugin/gna/scale_factors_tests/test_fq_scale_factors.cpp @@ -0,0 +1,142 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include + +#include "common_test_utils/common_utils.hpp" +#include "functional_test_utils/plugin_cache.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "functional_test_utils/blob_utils.hpp" +#include "ngraph_functions/utils/ngraph_helpers.hpp" +#include "ngraph_functions/builders.hpp" + +#include "ngraph_functions/pass/convert_prc.hpp" + +typedef std::tuple< + InferenceEngine::Precision, // Network Precision + std::string, // Target Device + std::map, // Configuration + std::pair // Input values +> fqScaleFactorParams; + +namespace LayerTestsDefinitions { + +class TestFQScaleFactorsTest : public testing::WithParamInterface, + public LayerTestsUtils::LayerTestsCommon { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + InferenceEngine::Precision netPrecision; + std::string targetDevice; + std::map configuration; + std::pair inputValues; + std::tie(netPrecision, targetDevice, configuration, inputValues) = obj.param; + + std::ostringstream result; + result << "netPRC=" << netPrecision.name() << "_"; + result << "targetDevice=" << targetDevice << "_"; + for (auto const& configItem : configuration) { + result << "_configItem=" << configItem.first << "_" << configItem.second; + } + result << "_range=(" << inputValues.first << ", " << inputValues.second << ")"; + + return result.str(); + } + +protected: + void SetUp() override { + InferenceEngine::Precision netPrecision; + std::pair inputValues; + + std::tie(netPrecision, targetDevice, configuration, inputValues) = this->GetParam(); + std::tie(inputDataMin, inputDataMax) = inputValues; + auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision); + + const ngraph::Shape shape = {1, 128}; + auto params = ngraph::builder::makeParams(ngPrc, {shape}); + + auto lowNodeIn = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMin }); + auto highNodeIn = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMax }); + auto fqIn = std::make_shared(params[0], lowNodeIn, highNodeIn, + lowNodeIn, highNodeIn, levels); + + auto mul = std::make_shared(fqIn, params[0]); + + auto lowNodeOut = ngraph::builder::makeConstant(ngPrc, {1}, { -inputDataMin * inputDataMin }); + auto highNodeOut = ngraph::builder::makeConstant(ngPrc, {1}, { inputDataMax * inputDataMax }); + auto fqOut = std::make_shared(mul, lowNodeOut, highNodeOut, + lowNodeOut, highNodeOut, levels); + + ngraph::ResultVector results{std::make_shared(fqOut)}; + function = std::make_shared(results, params, "FQWithSmallScaleFactor"); + functionRefs = ngraph::clone_function(*function); + } + + float inputDataMax = 1.0; + float inputDataMin = -1.0; + size_t levels = std::numeric_limits::max(); +}; + +TEST_P(TestFQScaleFactorsTest, CompareWithRefImpl) { + LoadNetwork(); + GenerateInputs(); + Infer(); + auto refs = CalculateRefs(); + auto results = GetOutputs(); + const auto expected = reinterpret_cast(refs.front().second.data()); + size_t size = results.front()->size(); + auto memory = InferenceEngine::as(results.front()); + IE_ASSERT(memory); + const auto lockedMemory = memory->wmap(); + const auto actualBuffer = lockedMemory.as(); + + /* the absolute threshold is calculated as 1.25 * (1 / last_fq_out_scale_factor) = 1.25 * (2 * maxValue) / (levels - 1), + the most of accuracy degradation in this model is introduced by the output scale factor of FakeQuantize, + 1 / sf is a part of the value which can be represented by one level, so we can't get more accurate resolution than this part, + maxValue = inputDataMax * inputDataMax since this model multiplies input values with itself, + 1.25 is a reserve factor to cover other errors in this model */ + abs_threshold = 2.5 * inputDataMax * inputDataMax / (levels - 1); + + for (size_t i = 0; i < size; ++i) { + const auto &ref = expected[i]; + const auto &res = actualBuffer[i]; + if (CommonTestUtils::ie_abs(res - ref) > abs_threshold) { + IE_THROW() << "Absolute comparison of values expected: " << ref << " and actual: " << res + << " at index " << i << " with absolute threshold " << abs_threshold + << " failed"; + } + } +}; + +const std::vector netPrecisions = { + InferenceEngine::Precision::FP32, + InferenceEngine::Precision::FP16 +}; + +const std::vector> configs = { + { + {"GNA_DEVICE_MODE", "GNA_SW_EXACT"}, + } +}; + +const std::vector> inputValues = { + {-188.0, 188.0}, + {-90.0, 90.0}, + {-20.0, 20.0}, + {-10.0, 10.0} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_base, TestFQScaleFactorsTest, + ::testing::Combine( + ::testing::ValuesIn(netPrecisions), + ::testing::Values(CommonTestUtils::DEVICE_GNA), + ::testing::ValuesIn(configs), + ::testing::ValuesIn(inputValues)), + TestFQScaleFactorsTest::getTestCaseName); +} // namespace LayerTestsDefinitions \ No newline at end of file