[GNA] More precized calculation of Log pwl and pwl with sf < 1 (#7884)
* [GNA] More precized calculation of Log pwl and pwl with sf < 1 * [GNA] Added tests * [GNA] Types correction * [GNA] Added comment for absolute threshold calculation in the test
This commit is contained in:
parent
02f3a175d0
commit
b746c734f8
@ -238,6 +238,57 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
|
|||||||
const double pow_domain = 16;
|
const double pow_domain = 16;
|
||||||
|
|
||||||
protected :
|
protected :
|
||||||
|
/**
|
||||||
|
* @brief Adjust output scale factor to get the most precise PWL slope.
|
||||||
|
* NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
|
||||||
|
* For all other layers, it does not improve accuracy.
|
||||||
|
* @param sf Scale factor to be adjusted
|
||||||
|
* @param layer Layer information
|
||||||
|
* @param quantizedParams Quantization parameters
|
||||||
|
* @return the adjusted scale factor
|
||||||
|
*/
|
||||||
|
float adjustScaleFactor(float sf, InferenceEngine::CNNLayer const* cnnLayer,
|
||||||
|
GNAPluginNS::LayerInfo const& layer,
|
||||||
|
QuantizedLayerParams* quantizedParams) {
|
||||||
|
auto get_rank = [](uint32_t value) {
|
||||||
|
uint8_t rank = 0;
|
||||||
|
while (value >= 1) {
|
||||||
|
++rank;
|
||||||
|
value /= 10;
|
||||||
|
}
|
||||||
|
return rank;
|
||||||
|
};
|
||||||
|
auto pow_10 = [](uint8_t degree) {
|
||||||
|
uint32_t value = 1;
|
||||||
|
for (uint8_t i = 0; i < degree; ++i) {
|
||||||
|
value *= 10;
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
};
|
||||||
|
|
||||||
|
auto slopes = getPWLSlopes(layer);
|
||||||
|
if (!slopes.empty()) {
|
||||||
|
auto div = 10;
|
||||||
|
auto startRange = sf > 1.0f ? static_cast<uint32_t>(sf) : sf;
|
||||||
|
auto endRange = startRange - startRange / div;
|
||||||
|
endRange = endRange > 1.0f ? static_cast<uint32_t>(endRange) : endRange;
|
||||||
|
uint32_t steps = 10000;
|
||||||
|
uint32_t rangeSize = static_cast<uint32_t>(startRange - endRange);
|
||||||
|
if (rangeSize >= 1) {
|
||||||
|
steps *= rangeSize / pow_10(get_rank(rangeSize) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto scaleFactors = generateScaleFactors(startRange, endRange, steps);
|
||||||
|
auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
|
||||||
|
if (!fp32eq(sf, newScaleFactor) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
|
||||||
|
gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
|
||||||
|
<< " from: " << sf << " to: " << newScaleFactor << "\n";
|
||||||
|
sf = newScaleFactor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sf;
|
||||||
|
}
|
||||||
|
|
||||||
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
|
float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
|
||||||
GNAPluginNS::LayerInfo const& layer,
|
GNAPluginNS::LayerInfo const& layer,
|
||||||
int inputsSize,
|
int inputsSize,
|
||||||
@ -418,24 +469,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer*, QUANT_DESC> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Adjust output scale factor to get the most precise PWL slope.
|
if (!usePrevScaleFactor) {
|
||||||
// NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
|
result = adjustScaleFactor(result, cnnLayer, layer, quantizedParams);
|
||||||
// For all other layers, it does not improve accuracy.
|
|
||||||
auto slopes = getPWLSlopes(layer);
|
|
||||||
if (!slopes.empty() && !usePrevScaleFactor) {
|
|
||||||
auto div = 10;
|
|
||||||
auto mul = 10;
|
|
||||||
auto startRange = result > 1.0f ? static_cast<int32_t>(result) : result;
|
|
||||||
auto endRange = startRange - startRange / div;
|
|
||||||
endRange = endRange > 1.0f ? static_cast<int32_t>(endRange) : endRange;
|
|
||||||
auto scaleFactors = generateScaleFactors(startRange, endRange, static_cast<int32_t>(startRange - endRange) * mul);
|
|
||||||
auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
|
|
||||||
if (!fp32eq(result, newScaleFactor) &&
|
|
||||||
!fp32eq(newScaleFactor, 1.0f) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
|
|
||||||
gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
|
|
||||||
<< " from: " << result << " to: " << newScaleFactor << "\n";
|
|
||||||
result = newScaleFactor;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,142 @@
|
|||||||
|
// Copyright (C) 2021 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <ie_core.hpp>
|
||||||
|
|
||||||
|
#include "common_test_utils/common_utils.hpp"
|
||||||
|
#include "functional_test_utils/plugin_cache.hpp"
|
||||||
|
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||||
|
#include "functional_test_utils/blob_utils.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
|
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||||
|
|
||||||
|
typedef std::tuple<
|
||||||
|
InferenceEngine::Precision, // Network Precision
|
||||||
|
std::string, // Target Device
|
||||||
|
std::map<std::string, std::string>, // Configuration
|
||||||
|
std::pair<float, float> // Input values
|
||||||
|
> fqScaleFactorParams;
|
||||||
|
|
||||||
|
namespace LayerTestsDefinitions {
|
||||||
|
|
||||||
|
class TestFQScaleFactorsTest : public testing::WithParamInterface<fqScaleFactorParams>,
|
||||||
|
public LayerTestsUtils::LayerTestsCommon {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<fqScaleFactorParams> obj) {
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
std::string targetDevice;
|
||||||
|
std::map<std::string, std::string> configuration;
|
||||||
|
std::pair<float, float> inputValues;
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputValues) = obj.param;
|
||||||
|
|
||||||
|
std::ostringstream result;
|
||||||
|
result << "netPRC=" << netPrecision.name() << "_";
|
||||||
|
result << "targetDevice=" << targetDevice << "_";
|
||||||
|
for (auto const& configItem : configuration) {
|
||||||
|
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||||
|
}
|
||||||
|
result << "_range=(" << inputValues.first << ", " << inputValues.second << ")";
|
||||||
|
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override {
|
||||||
|
InferenceEngine::Precision netPrecision;
|
||||||
|
std::pair<float, float> inputValues;
|
||||||
|
|
||||||
|
std::tie(netPrecision, targetDevice, configuration, inputValues) = this->GetParam();
|
||||||
|
std::tie(inputDataMin, inputDataMax) = inputValues;
|
||||||
|
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
|
const ngraph::Shape shape = {1, 128};
|
||||||
|
auto params = ngraph::builder::makeParams(ngPrc, {shape});
|
||||||
|
|
||||||
|
auto lowNodeIn = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMin });
|
||||||
|
auto highNodeIn = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax });
|
||||||
|
auto fqIn = std::make_shared<ngraph::opset8::FakeQuantize>(params[0], lowNodeIn, highNodeIn,
|
||||||
|
lowNodeIn, highNodeIn, levels);
|
||||||
|
|
||||||
|
auto mul = std::make_shared<ngraph::opset8::Multiply>(fqIn, params[0]);
|
||||||
|
|
||||||
|
auto lowNodeOut = ngraph::builder::makeConstant<float>(ngPrc, {1}, { -inputDataMin * inputDataMin });
|
||||||
|
auto highNodeOut = ngraph::builder::makeConstant<float>(ngPrc, {1}, { inputDataMax * inputDataMax });
|
||||||
|
auto fqOut = std::make_shared<ngraph::opset8::FakeQuantize>(mul, lowNodeOut, highNodeOut,
|
||||||
|
lowNodeOut, highNodeOut, levels);
|
||||||
|
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset8::Result>(fqOut)};
|
||||||
|
function = std::make_shared<ngraph::Function>(results, params, "FQWithSmallScaleFactor");
|
||||||
|
functionRefs = ngraph::clone_function(*function);
|
||||||
|
}
|
||||||
|
|
||||||
|
float inputDataMax = 1.0;
|
||||||
|
float inputDataMin = -1.0;
|
||||||
|
size_t levels = std::numeric_limits<uint16_t>::max();
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(TestFQScaleFactorsTest, CompareWithRefImpl) {
|
||||||
|
LoadNetwork();
|
||||||
|
GenerateInputs();
|
||||||
|
Infer();
|
||||||
|
auto refs = CalculateRefs();
|
||||||
|
auto results = GetOutputs();
|
||||||
|
const auto expected = reinterpret_cast<const float*>(refs.front().second.data());
|
||||||
|
size_t size = results.front()->size();
|
||||||
|
auto memory = InferenceEngine::as<InferenceEngine::MemoryBlob>(results.front());
|
||||||
|
IE_ASSERT(memory);
|
||||||
|
const auto lockedMemory = memory->wmap();
|
||||||
|
const auto actualBuffer = lockedMemory.as<const float*>();
|
||||||
|
|
||||||
|
/* the absolute threshold is calculated as 1.25 * (1 / last_fq_out_scale_factor) = 1.25 * (2 * maxValue) / (levels - 1),
|
||||||
|
the most of accuracy degradation in this model is introduced by the output scale factor of FakeQuantize,
|
||||||
|
1 / sf is a part of the value which can be represented by one level, so we can't get more accurate resolution than this part,
|
||||||
|
maxValue = inputDataMax * inputDataMax since this model multiplies input values with itself,
|
||||||
|
1.25 is a reserve factor to cover other errors in this model */
|
||||||
|
abs_threshold = 2.5 * inputDataMax * inputDataMax / (levels - 1);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; ++i) {
|
||||||
|
const auto &ref = expected[i];
|
||||||
|
const auto &res = actualBuffer[i];
|
||||||
|
if (CommonTestUtils::ie_abs(res - ref) > abs_threshold) {
|
||||||
|
IE_THROW() << "Absolute comparison of values expected: " << ref << " and actual: " << res
|
||||||
|
<< " at index " << i << " with absolute threshold " << abs_threshold
|
||||||
|
<< " failed";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||||
|
InferenceEngine::Precision::FP32,
|
||||||
|
InferenceEngine::Precision::FP16
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::map<std::string, std::string>> configs = {
|
||||||
|
{
|
||||||
|
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<std::pair<float, float>> inputValues = {
|
||||||
|
{-188.0, 188.0},
|
||||||
|
{-90.0, 90.0},
|
||||||
|
{-20.0, 20.0},
|
||||||
|
{-10.0, 10.0}
|
||||||
|
};
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_base, TestFQScaleFactorsTest,
|
||||||
|
::testing::Combine(
|
||||||
|
::testing::ValuesIn(netPrecisions),
|
||||||
|
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||||
|
::testing::ValuesIn(configs),
|
||||||
|
::testing::ValuesIn(inputValues)),
|
||||||
|
TestFQScaleFactorsTest::getTestCaseName);
|
||||||
|
} // namespace LayerTestsDefinitions
|
Loading…
Reference in New Issue
Block a user