[GNA] Fixed calculation of input scale factor and search of the next layer for FQ (#7246)
* [GNA] Fixed search of the next layer for FQ * [GNA] Fixed calculation of input scale factor for POT-quantized model in the case if the first layer after input is activation
This commit is contained in:
committed by
GitHub
parent
1a656f4e44
commit
2dcd09055f
@@ -483,7 +483,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
|
||||
auto fp32eq = [](float p1, float p2) -> bool {
|
||||
return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
|
||||
};
|
||||
float scaleInput = (fqLayer.getLevels() - 1) / (inputRange.second[0] - inputRange.first[0]);
|
||||
// GNA input is always quantized to int16, so number of levels can't be greater than max uint16
|
||||
size_t levels = std::min(fqLayer.getLevels(), static_cast<size_t>(std::numeric_limits<uint16_t>::max()));
|
||||
float scaleInput = (levels - 1) / (inputRange.second[0] - inputRange.first[0]);
|
||||
auto minAbsVal = std::min(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
|
||||
auto maxAbsVal = std::max(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
|
||||
if (fp32eq(minAbsVal, 0.0f) && !fp32eq(maxAbsVal, 0.0f)) {
|
||||
|
||||
@@ -1967,8 +1967,8 @@ void FuseFQIntoWeightsPass::run() {
|
||||
layers_connected_to_fq_count = inputTo.size();
|
||||
}
|
||||
for (int index = 0; index < layers_connected_to_fq_count; index++) {
|
||||
auto weightableLayer = CNNNetGetNextLayerSkipCertain(layerBeforeWeightable, 0, index, isNonFunctional).first;
|
||||
if (!LayerInfo(weightableLayer).isWeightable()) {
|
||||
auto weightableLayer = CNNNetCheckNextLayerSkipCertain(layerBeforeWeightable, 0, index, true, isNonFunctional).first;
|
||||
if (!weightableLayer || !LayerInfo(weightableLayer).isWeightable()) {
|
||||
continue;
|
||||
}
|
||||
if (weightableLayer->insData.size() < 2) {
|
||||
|
||||
@@ -25,7 +25,7 @@ typedef std::tuple<
|
||||
std::map<std::string, std::string>, // Configuration
|
||||
std::vector<size_t>, // Input Shape
|
||||
std::pair<float, float>, // Input Min and Max
|
||||
size_t // Levels
|
||||
std::pair<size_t, size_t> // Levels for input and output FQs
|
||||
> fqActivationParams;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
@@ -43,7 +43,7 @@ public:
|
||||
std::map<std::string, std::string> configuration;
|
||||
std::vector<size_t> inputShape;
|
||||
std::pair<float, float> inputMinMax;
|
||||
size_t levels = 0;
|
||||
std::pair<size_t, size_t> levels;
|
||||
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
@@ -54,7 +54,7 @@ public:
|
||||
}
|
||||
result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
|
||||
result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
|
||||
result << "_levels=" << levels;
|
||||
result << "_levels=" << levels.first << "," << levels.second;
|
||||
|
||||
return result.str();
|
||||
}
|
||||
@@ -69,20 +69,21 @@ protected:
|
||||
|
||||
std::vector<size_t> inputShape;
|
||||
std::pair<float, float> inputMinMax;
|
||||
size_t levels = 0;
|
||||
std::pair<size_t, size_t> levels;
|
||||
std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
std::tie(inputDataMin, inputDataMax) = inputMinMax;
|
||||
auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.first });
|
||||
auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.second });
|
||||
|
||||
auto inputVector = ngraph::builder::makeParams(ngPrc, { inputShape });
|
||||
auto inputFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
|
||||
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
|
||||
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels.first);
|
||||
|
||||
auto relu = ngraph::builder::makeActivation(inputFQNode, ngraph::element::f32, ngraph::helpers::ActivationTypes::Relu);
|
||||
auto reluFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(relu,
|
||||
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
|
||||
inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels.second);
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reluFQNode) };
|
||||
function = std::make_shared<ngraph::Function>(results, inputVector, "FQActivation");
|
||||
@@ -118,8 +119,9 @@ const std::vector<std::pair<float, float>> inputMinMax = {
|
||||
{-100, 100},
|
||||
};
|
||||
|
||||
const std::vector<size_t> levels = {
|
||||
65535,
|
||||
const std::vector<std::pair<size_t, size_t>> levels = {
|
||||
{std::numeric_limits<uint16_t>::max(), std::numeric_limits<uint16_t>::max()},
|
||||
{std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint16_t>::max()}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_fq_activation, FQActivation,
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <ie_core.hpp>
|
||||
|
||||
#include "common_test_utils/common_utils.hpp"
|
||||
#include "functional_test_utils/plugin_cache.hpp"
|
||||
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||
#include "functional_test_utils/blob_utils.hpp"
|
||||
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||
#include "ngraph_functions/builders.hpp"
|
||||
|
||||
#include "ngraph_functions/pass/convert_prc.hpp"
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision, // Network Precision
|
||||
std::string, // Target Device
|
||||
std::map<std::string, std::string> // Configuration
|
||||
> fqWithMultipleOutConnectionsParams;
|
||||
|
||||
namespace LayerTestsDefinitions {
|
||||
|
||||
class FQWithMultipleOutConnections : public testing::WithParamInterface<fqWithMultipleOutConnectionsParams>,
|
||||
public LayerTestsUtils::LayerTestsCommon {
|
||||
public:
|
||||
static std::string getTestCaseName(testing::TestParamInfo<fqWithMultipleOutConnectionsParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
std::string targetDevice;
|
||||
std::map<std::string, std::string> configuration;
|
||||
std::tie(netPrecision, targetDevice, configuration) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << "netPRC=" << netPrecision.name() << "_";
|
||||
result << "targetDevice=" << targetDevice << "_";
|
||||
for (auto const& configItem : configuration) {
|
||||
result << "_configItem=" << configItem.first << "_" << configItem.second;
|
||||
}
|
||||
|
||||
return result.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp() override {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
|
||||
std::tie(netPrecision, targetDevice, configuration) = this->GetParam();
|
||||
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
const ngraph::Shape shape = {1, 128};
|
||||
auto params = ngraph::builder::makeParams(ngPrc, {shape});
|
||||
|
||||
auto pattern1 = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{3},
|
||||
ngraph::Shape{1, 2, 64});
|
||||
auto reshape1 = std::make_shared<ngraph::opset7::Reshape>(params[0], pattern1, false);
|
||||
|
||||
auto relu1 = std::make_shared<ngraph::opset7::Relu>(reshape1);
|
||||
|
||||
auto lowNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { -10.0f });
|
||||
auto highNode = ngraph::builder::makeConstant<float>(ngPrc, {1}, { 10.0f });
|
||||
auto fq = std::make_shared<ngraph::opset7::FakeQuantize>(relu1, lowNode, highNode, lowNode, highNode,
|
||||
std::numeric_limits<uint16_t>::max());
|
||||
|
||||
auto pattern2 = std::make_shared<ngraph::opset7::Constant>(ngraph::element::Type_t::i64, ngraph::Shape{shape.size()},
|
||||
shape);
|
||||
auto reshape2 = std::make_shared<ngraph::opset7::Reshape>(fq, pattern2, false);
|
||||
|
||||
auto relu2 = std::make_shared<ngraph::opset7::Relu>(fq);
|
||||
auto reshape3 = std::make_shared<ngraph::opset7::Reshape>(relu2, pattern2, false);
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset7::Result>(reshape2),
|
||||
std::make_shared<ngraph::opset7::Result>(reshape3)};
|
||||
function = std::make_shared<ngraph::Function>(results, params, "FQFusionWithMultipleWeights");
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(FQWithMultipleOutConnections, CompareWithRefImpl) {
|
||||
Run();
|
||||
};
|
||||
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
};
|
||||
|
||||
const std::vector<std::map<std::string, std::string>> configs = {
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
|
||||
},
|
||||
{
|
||||
{"GNA_DEVICE_MODE", "GNA_SW_FP32"},
|
||||
}
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(smoke_fq_fusion, FQWithMultipleOutConnections,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GNA),
|
||||
::testing::ValuesIn(configs)),
|
||||
FQWithMultipleOutConnections::getTestCaseName);
|
||||
} // namespace LayerTestsDefinitions
|
||||
Reference in New Issue
Block a user