[GNA] Fixed calculation of input scale factor and search of the next layer for FQ (#7246)

* [GNA] Fixed search of the next layer for FQ

* [GNA] Fixed calculation of input scale factor for POT-quantized model in the case if the first layer after input is activation
This commit is contained in:
Elizaveta Lobanova
2021-08-26 15:02:23 +03:00
committed by GitHub
parent 1a656f4e44
commit 2dcd09055f
4 changed files with 122 additions and 11 deletions

View File

@@ -483,7 +483,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
auto fp32eq = [](float p1, float p2) -> bool {
return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
};
float scaleInput = (fqLayer.getLevels() - 1) / (inputRange.second[0] - inputRange.first[0]);
// GNA input is always quantized to int16, so number of levels can't be greater than max uint16
size_t levels = std::min(fqLayer.getLevels(), static_cast<size_t>(std::numeric_limits<uint16_t>::max()));
float scaleInput = (levels - 1) / (inputRange.second[0] - inputRange.first[0]);
auto minAbsVal = std::min(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
auto maxAbsVal = std::max(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
if (fp32eq(minAbsVal, 0.0f) && !fp32eq(maxAbsVal, 0.0f)) {

View File

@@ -1967,8 +1967,8 @@ void FuseFQIntoWeightsPass::run() {
layers_connected_to_fq_count = inputTo.size();
}
for (int index = 0; index < layers_connected_to_fq_count; index++) {
auto weightableLayer = CNNNetGetNextLayerSkipCertain(layerBeforeWeightable, 0, index, isNonFunctional).first;
if (!LayerInfo(weightableLayer).isWeightable()) {
auto weightableLayer = CNNNetCheckNextLayerSkipCertain(layerBeforeWeightable, 0, index, true, isNonFunctional).first;
if (!weightableLayer || !LayerInfo(weightableLayer).isWeightable()) {
continue;
}
if (weightableLayer->insData.size() < 2) {

View File

@@ -25,7 +25,7 @@ typedef std::tuple<
std::map<std::string, std::string>, // Configuration
std::vector<size_t>, // Input Shape
std::pair<float, float>, // Input Min and Max
size_t // Levels
std::pair<size_t, size_t> // Levels for input and output FQs
> fqActivationParams;
namespace LayerTestsDefinitions {
@@ -43,7 +43,7 @@ public:
std::map<std::string, std::string> configuration;
std::vector<size_t> inputShape;
std::pair<float, float> inputMinMax;
size_t levels = 0;
std::pair<size_t, size_t> levels;
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
std::ostringstream result;
@@ -54,7 +54,7 @@ public:
}
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
result << "_levels=" << levels;
result << "_levels=" << levels.first << "," << levels.second;
return result.str();
}
@@ -69,20 +69,21 @@ protected:
std::vector<size_t> inputShape;
std::pair<float, float> inputMinMax;
size_t levels = 0;
std::pair<size_t, size_t> levels;
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
std::tie(inputDataMin, inputDataMax) = inputMinMax;
auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.first });
auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.second });
auto inputVector = ngraph::builder::makeParams(ngPrc, { inputShape });
auto inputFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels.first);
auto relu = ngraph::builder::makeActivation(inputFQNode, ngraph::element::f32, ngraph::helpers::ActivationTypes::Relu);
auto reluFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(relu,
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels.second);
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reluFQNode) };
function = std::make_shared<ngraph::Function>(results, inputVector, "FQActivation");
@@ -118,8 +119,9 @@ const std::vector<std::pair<float, float>> inputMinMax = {
{-100, 100},
};
const std::vector<size_t> levels = {
65535,
const std::vector<std::pair<size_t, size_t>> levels = {
{std::numeric_limits<uint16_t>::max(), std::numeric_limits<uint16_t>::max()},
{std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint16_t>::max()}
};
INSTANTIATE_TEST_CASE_P(smoke_fq_activation, FQActivation,

View File

@@ -0,0 +1,107 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
typedef std::tuple<
InferenceEngine::Precision, // Network Precision
std::string, // Target Device
std::map<std::string, std::string> // Configuration
> fqWithMultipleOutConnectionsParams;
namespace LayerTestsDefinitions {
class FQWithMultipleOutConnections : public testing::WithParamInterface<fqWithMultipleOutConnectionsParams>,
public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<fqWithMultipleOutConnectionsParams> obj) {
InferenceEngine::Precision netPrecision;
std::string targetDevice;
std::map<std::string, std::string> configuration;
std::tie(netPrecision, targetDevice, configuration) = obj.param;
std::ostringstream result;
result << "netPRC=" << netPrecision.name() << "_";
result << "targetDevice=" << targetDevice << "_";
for (auto const& configItem : configuration) {
result << "_configItem=" << configItem.first << "_" << configItem.second;
}
return result.str();
}
protected:
void SetUp() override {
InferenceEngine::Precision netPrecision;
std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
const ngraph::Shape shape = {1, 128};
auto params = ngraph::builder::makeParams(ngPrc, {shape});
auto pattern1 = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{3},
ngraph::Shape{1, 2, 64});
auto reshape1 = std::make_shared<ngraph::opset7::Reshape>(params[0], pattern1, false);
auto relu1 = std::make_shared<ngraph::opset7::Relu>(reshape1);
auto lowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { -10.0f });
auto highNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { 10.0f });
auto fq = std::make_shared<ngraph::opset7::FakeQuantize>(relu1, lowNode, highNode, lowNode, highNode,
std::numeric_limits<uint16_t>::max());
auto pattern2 = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{shape.size()},
shape);
auto reshape2 = std::make_shared<ngraph::opset7::Reshape>(fq, pattern2, false);
auto relu2 = std::make_shared<ngraph::opset7::Relu>(fq);
auto reshape3 = std::make_shared<ngraph::opset7::Reshape>(relu2, pattern2, false);
ngraph::ResultVector results{std::make_shared<ngraph::opset7::Result>(reshape2),
std::make_shared<ngraph::opset7::Result>(reshape3)};
function = std::make_shared<ngraph::Function>(results, params, "FQFusionWithMultipleWeights");
}
};
TEST_P(FQWithMultipleOutConnections, CompareWithRefImpl) {
Run();
};
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<std::map<std::string, std::string>> configs = {
{
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
},
{
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
}
};
INSTANTIATE_TEST_CASE_P(smoke_fq_fusion, FQWithMultipleOutConnections,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(CommonTestUtils::DEVICE_GNA),
::testing::ValuesIn(configs)),
FQWithMultipleOutConnections::getTestCaseName);
} // namespace LayerTestsDefinitions