[IE][nGraph] resolve division by zero for FP16 (#8676)

* initial solution

* changed namespaces from ngraph -> ov; completed unit-tests

* added Power with negative exponent into the pattern

* division to -> division by; aligned with OV new folder structure; some minor corrections

* corrected pattern

* changed get_pattern_value_map -> get_pattern_map
This commit is contained in:
Pavel Esir 2021-12-07 22:53:12 +03:00 committed by GitHub
parent d55e67736b
commit b6a5532c27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 328 additions and 0 deletions

View File

@ -13,6 +13,7 @@
#include <pot_transformations.hpp>
#include <pruning.hpp>
#include <transformations/common_optimizations/compress_float_constants.hpp>
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
#include <transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp>
#include <transformations/common_optimizations/moc_transformations.hpp>
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
@ -60,6 +61,7 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork
void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) {
ngraph::pass::Manager manager;
manager.register_pass<ov::pass::DivisionByZeroFP16Resolver>();
manager.register_pass<ov::pass::MarkPrecisionSensitiveSubgraphs>();
manager.register_pass<ov::pass::CompressFloatConstants>();
manager.run_passes(network.actual->getFunction());

View File

@ -0,0 +1,210 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <string>
#include <memory>
#include <ngraph/function.hpp>
#include <openvino/opsets/opset4.hpp>
#include <openvino/pass/manager.hpp>
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
#include <transformations/init_node_info.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ov;
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value
TEST_F(TransformationTestsF, DivisionByZeroMinimalPattern) {
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}
TEST_F(TransformationTestsF, PowWithNegativeExponent) {
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}
TEST_F(TransformationTestsF, PowWithPositiveExponent) {
// graph should be left unchanged
const float eps_value = 1.e-12;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}
TEST_F(TransformationTestsF, DivisionByZeroMinimalPatternUnchanged) {
// if eps_value is greater than normalized_fp16_min then leave graph unchanged
const float eps_value = 0.0001f;
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}
{
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
auto divide = std::make_shared<opset4::Divide>(input_1, add);
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}
TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithMax) {
const float eps_value = 1.e-12;
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value});
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(max);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min});
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(max);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}
TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithAdd) {
const float eps_value = 1.e-12;
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(add);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
}
{
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
auto pow = std::make_shared<opset4::Power>(input, exp);
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
auto sqrt = std::make_shared<opset4::Sqrt>(add);
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
}

View File

@ -0,0 +1,35 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <utility>
#include <memory>
#include <transformations_visibility.hpp>
#include <ngraph/pass/graph_rewrite.hpp>
#include "ngraph/pattern/matcher.hpp"
namespace ov {
namespace pass {
class TRANSFORMATIONS_API DivisionByZeroFP16Resolver;
} // namespace pass
} // namespace ov
/**
* @ingroup ie_transformation_common_api
* @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps); input_1/Add(input_2, eps);
* and input_1*Pow(Maximum[Add](input_2, eps), -z) patterns to prevent division by zero.
*
* eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero.
* We should keep in such patterns eps >= fp16 minimal normalized value so that
* CompressFloatConstants should not cast them into zero during compression into f16.
*/
class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass {
public:
NGRAPH_RTTI_DECLARATION;
DivisionByZeroFP16Resolver();
};

View File

@ -0,0 +1,81 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "itt.hpp"
#include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp"
#include "transformations/utils/utils.hpp"
#include <memory>
#include <vector>
#include <openvino/opsets/opset8.hpp>
#include "ngraph/rt_info.hpp"
#include <openvino/pass/pattern/op/wrap_type.hpp>
#include <openvino/pass/pattern/op/or.hpp>
NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0);
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value
using namespace ov;
ov::pass::DivisionByZeroFP16Resolver::DivisionByZeroFP16Resolver() {
MATCHER_SCOPE(DivisionByZeroFP16Resolver);
// to detect the following patterns where eps is used to prevent division by zero:
// input_1/Maximum(input_2, eps)
// input_1/Add(input_2, eps)
// input_1/Sqrt(Maximum(input_2, eps))
// input_1/Sqrt(Add(input_2, eps))
// input_1*Pow(Maximum(input_2, eps), -z)
// input_1*Pow(Add(input_2, eps), -z)
auto input_1 = pattern::any_input();
auto input_2 = pattern::any_input();
auto eps_const_pattern = pattern::wrap_type<opset8::Constant>();
auto max = std::make_shared<opset8::Maximum>(input_2, eps_const_pattern);
auto add = std::make_shared<opset8::Add>(input_2, eps_const_pattern);
auto max_or_add = std::make_shared<pattern::op::Or>(OutputVector{max, add});
auto sqrt = std::make_shared<opset8::Sqrt>(max_or_add);
auto sqrt_or_max_add = std::make_shared<pattern::op::Or>(OutputVector{max_or_add, sqrt});
// whether is divided directly or after sqrt (e.g. in L2Norm after sqrt, in MVN is divided directly)
auto divide = std::make_shared<opset8::Divide>(input_1, sqrt_or_max_add);
auto pow_exp = pattern::wrap_type<opset8::Constant>();
auto pow_pattern = std::make_shared<opset8::Power>(max_or_add, pow_exp);
auto mul_pattern = std::make_shared<opset8::Multiply>(input_1, pow_pattern);
auto div_or_mul_to_negative_pow = std::make_shared<pattern::op::Or>(OutputVector{divide, mul_pattern});
matcher_pass_callback callback = [=](pattern::Matcher& m) {
const auto& pattern_to_output = m.get_pattern_map();
const auto mul = std::dynamic_pointer_cast<opset8::Multiply>(m.get_match_root());
if (mul) {
// pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
const auto pow_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(pow_exp));
for (float val : pow_const->get_vector<float>())
if (val >= 0) // continue only if exponent is negative (z < 0)
return false;
}
const auto eps_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(eps_const_pattern));
if (!eps_const || eps_const->get_element_type() != ov::element::f32)
return false;
for (float val : eps_const->get_vector<float>())
if (val >= normalized_fp16_min)
return false;
auto new_constant = std::make_shared<opset8::Constant>(eps_const->get_element_type(),
eps_const->get_shape(),
normalized_fp16_min);
copy_runtime_info(eps_const, new_constant);
replace_node(eps_const, new_constant);
return true;
};
auto m = std::make_shared<pattern::Matcher>(div_or_mul_to_negative_pow, matcher_name);
register_matcher(m, callback);
}