[GNA] FQ accuracy fixes (#6924) (#7061)

This commit is contained in:
Mikhail Ryzhov
2021-08-15 22:36:15 +03:00
committed by GitHub
parent f03763defe
commit 543ea75813
3 changed files with 46 additions and 6 deletions

View File

@@ -19,6 +19,7 @@
#include "gna_slope_scale.h"
#include "runtime/pwl.h"
#include "gna_data_types.hpp"
#include "round_float_define.hpp"
namespace GNAPluginNS {
namespace frontend {
@@ -41,8 +42,8 @@ struct ScaleFactorUpdateResult {
* @param p2 Second float value
* @return Returns true if two float values are equal
*/
static bool fp32eq(float p1, float p2) {
return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
static bool fp32eq(float p1, float p2, float accuracy = 0.00001f) {
return (std::abs(p1 - p2) <= accuracy * std::min(std::abs(p1), std::abs(p2)));
}
/**
@@ -73,14 +74,14 @@ static float selectBestOutputScaleFactors(float inScale, std::vector<float> outS
auto sd = 0.0;
for (size_t j = 0; j < slopes.size(); ++j) {
auto s = gna_slope(slopes[j], inScale, outScale);
auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
if (slope < static_cast<uint32_t>(std::numeric_limits<int16_t>::min()) && slope > static_cast<uint32_t>(std::numeric_limits<int16_t>::max())) {
auto slope = FLOAT_TO_INT16(s.slope * s.slope_scale);
if (slope < std::numeric_limits<int16_t>::min() || slope > std::numeric_limits<int16_t>::max()) {
sd += std::numeric_limits<int8_t>::max();
continue;
}
auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
if (fp32eq(testSlope, slopes[j])) {
if (fp32eq(testSlope, slopes[j], 1.0E-6)) {
return outScale;
}

View File

@@ -43,7 +43,9 @@ ngraph::pass::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
Shape bias_shape(bias->get_shape());
Shape output_shape(fc->get_shape());
size_t bias_size = std::accumulate(bias_shape.begin(), bias_shape.end(), size_t{1}, std::multiplies<int64_t>());
if (bias_shape.empty() || bias_shape.back() != output_shape.back() || bias_shape.back() != bias_size) {
if (bias_shape.empty() ||
(bias_shape.back() != output_shape.back() && bias_shape.back() != 1) ||
bias_shape.back() != bias_size) {
return false;
}

View File

@@ -95,6 +95,43 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest2D) {
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, FullyConnectedBiasFusionTestBias1x1) {
std::shared_ptr<ngraph::Function> function(nullptr), function_ref(nullptr);
{
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 128});
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786, 128}, {1});
auto empty_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {0});
auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, empty_bias, ngraph::Shape{1, 786});
auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1}, {1});
auto add = std::make_shared<ngraph::opset1::Add>(fc, const_bias);
function = std::make_shared<ngraph::Function>(ngraph::NodeVector{add}, ngraph::ParameterVector{input1});
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();
manager.register_pass<ngraph::pass::FullyConnectedBiasFusion>();
manager.register_pass<ngraph::pass::InjectionPass>([](std::shared_ptr<ngraph::Function> function) {
check_rt_info(function);
});
manager.register_pass<ngraph::pass::ConstantFolding>();
ASSERT_NO_THROW(manager.run_passes(function));
}
{
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::Shape{1, 128});
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786, 128}, {1});
auto bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {1});
auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, bias, ngraph::Shape{1, 786});
function_ref = std::make_shared<ngraph::Function>(ngraph::NodeVector{fc}, ngraph::ParameterVector{input1});
}
auto res = compare_functions(function, function_ref);
ASSERT_TRUE(res.first) << res.second;
}
TEST(TransformationTests, FullyConnectedBiasFusionDynamic) {
auto input1 = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f32, ngraph::PartialShape::dynamic());
auto weights = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786, 128}, {1});