[GNA] More precized calculation of Log pwl and pwl with sf < 1 (#7884)

* [GNA] More precized calculation of Log pwl and pwl with sf < 1

* [GNA] Added tests

* [GNA] Types correction

* [GNA] Added comment for absolute threshold calculation in the test
This commit is contained in:
Elizaveta Lobanova 2021-10-13 14:14:52 +03:00 committed by GitHub
parent 02f3a175d0
commit b746c734f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 195 additions and 18 deletions

View File

@ -238,6 +238,57 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
const double pow_domain = 16;
protected :
/**
* @brief Adjust output scale factor to get the most precise PWL slope.
* NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
* For all other layers, it does not improve accuracy.
* @param sf Scale factor to be adjusted
* @param layer Layer information
* @param quantizedParams Quantization parameters
* @return the adjusted scale factor
*/
float adjustScaleFactor(float sf, InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
QuantizedLayerParams* quantizedParams) {
auto get_rank = [](uint32_t value) {
uint8_t rank = 0;
while (value >= 1) {
++rank;
value /= 10;
}
return rank;
};
auto pow_10 = [](uint8_t degree) {
uint32_t value = 1;
for (uint8_t i = 0; i < degree; ++i) {
value *= 10;
}
return value;
};
auto slopes = getPWLSlopes(layer);
if (!slopes.empty()) {
auto div = 10;
auto startRange = sf > 1.0f ? static_cast<uint32_t>(sf) : sf;
auto endRange = startRange - startRange / div;
endRange = endRange > 1.0f ? static_cast<uint32_t>(endRange) : endRange;
uint32_t steps = 10000;
uint32_t rangeSize = static_cast<uint32_t>(startRange - endRange);
if (rangeSize >= 1) {
steps *= rangeSize / pow_10(get_rank(rangeSize) - 1);
}
auto scaleFactors = generateScaleFactors(startRange, endRange, steps);
auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
if (!fp32eq(sf, newScaleFactor) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
<< " from: " << sf << " to: " << newScaleFactor << "\n";
sf = newScaleFactor;
}
}
return sf;
}
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
GNAPluginNS::LayerInfo const& layer,
int inputsSize,
@ -418,24 +469,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
}
}
// Adjust output scale factor to get the most precise PWL slope.
// NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
// For all other layers, it does not improve accuracy.
auto slopes = getPWLSlopes(layer);
if (!slopes.empty() && !usePrevScaleFactor) {
auto div = 10;
auto mul = 10;
auto startRange = result > 1.0f ? static_cast<int32_t>(result) : result;
auto endRange = startRange - startRange / div;
endRange = endRange > 1.0f ? static_cast<int32_t>(endRange) : endRange;
auto scaleFactors = generateScaleFactors(startRange, endRange, static_cast<int32_t>(startRange - endRange) * mul);
auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
if (!fp32eq(result, newScaleFactor) &&
!fp32eq(newScaleFactor, 1.0f) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
<< " from: " << result << " to: " << newScaleFactor << "\n";
result = newScaleFactor;
}
if (!usePrevScaleFactor) {
result = adjustScaleFactor(result, cnnLayer, layer, quantizedParams);
}
}

View File

@ -0,0 +1,142 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string>, // Configuration
std::pair<float, float> // Input values
> fqScaleFactorParams;
namespace LayerTestsDefinitions {
class TestFQScaleFactorsTest : public testing::WithParamInterface<fqScaleFactorParams>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<fqScaleFactorParams> obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::pair<float, float> inputValues;
std::tie(netPrecision, targetDevice, configuration, inputValues) = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice << "_";
for (auto const& configItem : configuration) {
result << "_configItem=" << configItem.first << "_" << configItem.second;
}
result << "_range=(" << inputValues.first << ", " << inputValues.second << ")";
return result.str();
}
protected:
void SetUp() override {
InferenceEngine::Precision netPrecision;
std::pair<float, float> inputValues;
std::tie(netPrecision, targetDevice, configuration, inputValues) = this->GetParam();
std::tie(inputDataMin, inputDataMax) = inputValues;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
const ngraph::Shape shape = {1, 128};
auto params = ngraph::builder::makeParams(ngPrc, {shape});
auto lowNodeIn = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMin });
auto highNodeIn = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax });
auto fqIn = std::make_shared<ngraph::opset8::FakeQuantize>(params[0], lowNodeIn, highNodeIn,
lowNodeIn, highNodeIn, levels);
auto mul = std::make_shared<ngraph::opset8::Multiply>(fqIn, params[0]);
auto lowNodeOut = ngraph::builder::makeConstant<float>(ngPrc, {1}, { -inputDataMin * inputDataMin });
auto highNodeOut = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax * inputDataMax });
auto fqOut = std::make_shared<ngraph::opset8::FakeQuantize>(mul, lowNodeOut, highNodeOut,
lowNodeOut, highNodeOut, levels);
ngraph::ResultVector results{std::make_shared<ngraph::opset8::Result>(fqOut)};
function = std::make_shared<ngraph::Function>(results, params, "FQWithSmallScaleFactor");
functionRefs = ngraph::clone_function(*function);
}
float inputDataMax = 1.0;
float inputDataMin = -1.0;
size_t levels = std::numeric_limits<uint16_t>::max();
};
TEST_P(TestFQScaleFactorsTest, CompareWithRefImpl) {
LoadNetwork();
GenerateInputs();
Infer();
auto refs = CalculateRefs();
auto results = GetOutputs();
const auto expected = reinterpret_cast<const float*>(refs.front().second.data());
size_t size = results.front()->size();
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(results.front());
IE_ASSERT(memory);
const auto lockedMemory = memory->wmap();
const auto actualBuffer = lockedMemory.as<const float*>();
/* the absolute threshold is calculated as 1.25 * (1 / last_fq_out_scale_factor) = 1.25 * (2 * maxValue) / (levels - 1),
the most of accuracy degradation in this model is introduced by the output scale factor of FakeQuantize,
1 / sf is a part of the value which can be represented by one level, so we can't get more accurate resolution than this part,
maxValue = inputDataMax * inputDataMax since this model multiplies input values with itself,
1.25 is a reserve factor to cover other errors in this model */
abs_threshold = 2.5 * inputDataMax * inputDataMax / (levels - 1);
for (size_t i = 0; i < size; ++i) {
const auto &ref = expected[i];
const auto &res = actualBuffer[i];
if (CommonTestUtils::ie_abs(res - ref) > abs_threshold) {
IE_THROW() << "Absolute comparison of values expected: " << ref << " and actual: " << res
<< " at index " << i << " with absolute threshold " << abs_threshold
<< " failed";
}
}
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
}
};
const std::vector<std::pair<float, float>> inputValues = {
{-188.0, 188.0},
{-90.0, 90.0},
{-20.0, 20.0},
{-10.0, 10.0}
};
INSTANTIATE_TEST_SUITE_P(smoke_base, TestFQScaleFactorsTest,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs),
::testing::ValuesIn(inputValues)),
TestFQScaleFactorsTest::getTestCaseName);
} // namespace LayerTestsDefinitions