[GNA] FQ accuracy fixes (#6924) (#7061)

2021-08-15 22:36:15 +03:00
parent f03763defe
commit 543ea75813
3 changed files with 46 additions and 6 deletions
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -19,6 +19,7 @@
 #include "gna_slope_scale.h"
 #include "runtime/pwl.h"
 #include "gna_data_types.hpp"
+#include "round_float_define.hpp"

 namespace GNAPluginNS {
 namespace frontend {
@@ -41,8 +42,8 @@ struct ScaleFactorUpdateResult {
 * @param p2 Second float value
 * @return Returns true if two float values are equal
 */
-static bool fp32eq(float p1, float p2) {
-    return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+static bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
+    return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
 }

 /**
@@ -73,14 +74,14 @@ static float selectBestOutputScaleFactors(float inScale, std::vector<float> outS
        auto sd = 0.0;
        for (size_t j = 0; j < slopes.size(); ++j) {
            auto s = gna_slope(slopes[j], inScale, outScale);
-            auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
-            if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) && slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) {
+            auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
+            if (slope < std::numeric_limits<int16_t>::min() || slope > std::numeric_limits<int16_t>::max()) {
                sd += std::numeric_limits<int8_t>::max();
                continue;
            }

            auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
-            if (fp32eq(testSlope, slopes[j])) {
+            if (fp32eq(testSlope, slopes[j], 1.0E-6)) {
                return outScale;
            }

--- a/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/fc_bias_fusion.cpp
+++ b/inference-engine/src/legacy_api/src/transformations/convert_opset1_to_legacy/fc_bias_fusion.cpp
@@ -43,7 +43,9 @@ ngraph::pass::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
        Shape bias_shape(bias->get_shape());
        Shape output_shape(fc->get_shape());
        size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
-        if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) {
+        if (bias_shape.empty() ||
+            (bias_shape.back() != output_shape.back() && bias_shape.back() != 1) ||
+            bias_shape.back() != bias_size) {
            return false;
        }

--- a/inference-engine/tests/functional/inference_engine/transformations/fc_bias_fusion_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/fc_bias_fusion_test.cpp
@@ -95,6 +95,43 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest2D) {
    ASSERT_TRUE(res.first) << res.second;
 }

+
+TEST(TransformationTests, FullyConnectedBiasFusionTestBias1x1) {
+    std::shared_ptr<ngraph::Function> function(nullptr), function_ref(nullptr);
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 128});
+
+        auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786, 128}, {1});
+        auto empty_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {0});
+        auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, empty_bias, ngraph::Shape{1, 786});
+
+        auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1}, {1});
+        auto add = std::make_shared<ngraph::opset1::Add>(fc, const_bias);
+
+        function = std::make_shared<ngraph::Function>(ngraph::NodeVector{add}, ngraph::ParameterVector{input1});
+        ngraph::pass::Manager manager;
+        manager.register_pass<ngraph::pass::InitNodeInfo>();
+        manager.register_pass<ngraph::pass::FullyConnectedBiasFusion>();
+        manager.register_pass<ngraph::pass::InjectionPass>([](std::shared_ptr<ngraph::Function> function) {
+            check_rt_info(function);
+        });
+        manager.register_pass<ngraph::pass::ConstantFolding>();
+        ASSERT_NO_THROW(manager.run_passes(function));
+    }
+
+    {
+        auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 128});
+        auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786, 128}, {1});
+        auto bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {1});
+        auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, bias, ngraph::Shape{1, 786});
+
+        function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fc}, ngraph::ParameterVector{input1});
+    }
+
+    auto res = compare_functions(function, function_ref);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
 TEST(TransformationTests, FullyConnectedBiasFusionDynamic) {
    auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
    auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786, 128}, {1});