[IE][nGraph] resolve division by zero for FP16 (#8676)
* initial solution * changed namespaces from ngraph -> ov; completed unit-tests * added Power with negative exponent into the pattern * division to -> division by; aligned with OV new folder structure; some minor corrections * corrected pattern * changed get_pattern_value_map -> get_pattern_map
This commit is contained in:
parent
d55e67736b
commit
b6a5532c27
@ -13,6 +13,7 @@
|
||||
#include <pot_transformations.hpp>
|
||||
#include <pruning.hpp>
|
||||
#include <transformations/common_optimizations/compress_float_constants.hpp>
|
||||
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
|
||||
#include <transformations/common_optimizations/mark_precision_sensitive_subgraphs.hpp>
|
||||
#include <transformations/common_optimizations/moc_transformations.hpp>
|
||||
#include <transformations/control_flow/unroll_tensor_iterator.hpp>
|
||||
@ -60,6 +61,7 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork
|
||||
|
||||
void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) {
|
||||
ngraph::pass::Manager manager;
|
||||
manager.register_pass<ov::pass::DivisionByZeroFP16Resolver>();
|
||||
manager.register_pass<ov::pass::MarkPrecisionSensitiveSubgraphs>();
|
||||
manager.register_pass<ov::pass::CompressFloatConstants>();
|
||||
manager.run_passes(network.actual->getFunction());
|
||||
|
@ -0,0 +1,210 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include <ngraph/function.hpp>
|
||||
#include <openvino/opsets/opset4.hpp>
|
||||
#include <openvino/pass/manager.hpp>
|
||||
#include <transformations/common_optimizations/division_by_zero_fp16_resolver.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value
|
||||
|
||||
|
||||
TEST_F(TransformationTestsF, DivisionByZeroMinimalPattern) {
|
||||
const float eps_value = 1.e-12;
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<opset4::Divide>(input_1, add);
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
|
||||
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<opset4::Divide>(input_1, add);
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, PowWithNegativeExponent) {
|
||||
const float eps_value = 1.e-12;
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
|
||||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
|
||||
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77});
|
||||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, PowWithPositiveExponent) {
|
||||
// graph should be left unchanged
|
||||
const float eps_value = 1.e-12;
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
|
||||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
|
||||
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77});
|
||||
auto pow = std::make_shared<opset4::Power>(add, pow_exp_const);
|
||||
auto mul = std::make_shared<opset4::Multiply>(input_1, pow);
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{mul}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, DivisionByZeroMinimalPatternUnchanged) {
|
||||
// if eps_value is greater than normalized_fp16_min then leave graph unchanged
|
||||
const float eps_value = 0.0001f;
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<opset4::Divide>(input_1, add);
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
|
||||
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input_1 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto input_2 = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(input_2, eps_const);
|
||||
auto divide = std::make_shared<opset4::Divide>(input_1, add);
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input_1, input_2});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithMax) {
|
||||
const float eps_value = 1.e-12;
|
||||
{
|
||||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto pow = std::make_shared<opset4::Power>(input, exp);
|
||||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
|
||||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value});
|
||||
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
|
||||
auto sqrt = std::make_shared<opset4::Sqrt>(max);
|
||||
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto pow = std::make_shared<opset4::Power>(input, exp);
|
||||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
|
||||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min});
|
||||
auto max = std::make_shared<opset4::Maximum>(reduce_sum, eps_const);
|
||||
auto sqrt = std::make_shared<opset4::Sqrt>(max);
|
||||
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
||||
|
||||
|
||||
TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithAdd) {
|
||||
const float eps_value = 1.e-12;
|
||||
{
|
||||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto pow = std::make_shared<opset4::Power>(input, exp);
|
||||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
|
||||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value});
|
||||
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
|
||||
auto sqrt = std::make_shared<opset4::Sqrt>(add);
|
||||
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::DivisionByZeroFP16Resolver>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<opset4::Parameter>(element::f32, PartialShape::dynamic(3));
|
||||
auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f});
|
||||
auto pow = std::make_shared<opset4::Power>(input, exp);
|
||||
auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1});
|
||||
auto reduce_sum = std::make_shared<opset4::ReduceSum>(pow, axes_const);
|
||||
auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min});
|
||||
auto add = std::make_shared<opset4::Add>(reduce_sum, eps_const);
|
||||
auto sqrt = std::make_shared<opset4::Sqrt>(add);
|
||||
auto divide = std::make_shared<opset4::Divide>(input, sqrt);
|
||||
|
||||
function_ref = std::make_shared<Function>(NodeVector{divide}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES);
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
#include <transformations_visibility.hpp>
|
||||
#include <ngraph/pass/graph_rewrite.hpp>
|
||||
#include "ngraph/pattern/matcher.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API DivisionByZeroFP16Resolver;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/**
|
||||
* @ingroup ie_transformation_common_api
|
||||
* @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps); input_1/Add(input_2, eps);
|
||||
* and input_1*Pow(Maximum[Add](input_2, eps), -z) patterns to prevent division by zero.
|
||||
*
|
||||
* eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero.
|
||||
* We should keep in such patterns eps >= fp16 minimal normalized value so that
|
||||
* CompressFloatConstants should not cast them into zero during compression into f16.
|
||||
*/
|
||||
class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
DivisionByZeroFP16Resolver();
|
||||
};
|
@ -0,0 +1,81 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <openvino/opsets/opset8.hpp>
|
||||
#include "ngraph/rt_info.hpp"
|
||||
#include <openvino/pass/pattern/op/wrap_type.hpp>
|
||||
#include <openvino/pass/pattern/op/or.hpp>
|
||||
|
||||
NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0);
|
||||
|
||||
constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value
|
||||
|
||||
using namespace ov;
|
||||
|
||||
ov::pass::DivisionByZeroFP16Resolver::DivisionByZeroFP16Resolver() {
|
||||
MATCHER_SCOPE(DivisionByZeroFP16Resolver);
|
||||
|
||||
// to detect the following patterns where eps is used to prevent division by zero:
|
||||
// input_1/Maximum(input_2, eps)
|
||||
// input_1/Add(input_2, eps)
|
||||
// input_1/Sqrt(Maximum(input_2, eps))
|
||||
// input_1/Sqrt(Add(input_2, eps))
|
||||
// input_1*Pow(Maximum(input_2, eps), -z)
|
||||
// input_1*Pow(Add(input_2, eps), -z)
|
||||
auto input_1 = pattern::any_input();
|
||||
auto input_2 = pattern::any_input();
|
||||
|
||||
auto eps_const_pattern = pattern::wrap_type<opset8::Constant>();
|
||||
auto max = std::make_shared<opset8::Maximum>(input_2, eps_const_pattern);
|
||||
auto add = std::make_shared<opset8::Add>(input_2, eps_const_pattern);
|
||||
auto max_or_add = std::make_shared<pattern::op::Or>(OutputVector{max, add});
|
||||
|
||||
auto sqrt = std::make_shared<opset8::Sqrt>(max_or_add);
|
||||
auto sqrt_or_max_add = std::make_shared<pattern::op::Or>(OutputVector{max_or_add, sqrt});
|
||||
// whether is divided directly or after sqrt (e.g. in L2Norm after sqrt, in MVN is divided directly)
|
||||
auto divide = std::make_shared<opset8::Divide>(input_1, sqrt_or_max_add);
|
||||
|
||||
auto pow_exp = pattern::wrap_type<opset8::Constant>();
|
||||
auto pow_pattern = std::make_shared<opset8::Power>(max_or_add, pow_exp);
|
||||
auto mul_pattern = std::make_shared<opset8::Multiply>(input_1, pow_pattern);
|
||||
auto div_or_mul_to_negative_pow = std::make_shared<pattern::op::Or>(OutputVector{divide, mul_pattern});
|
||||
|
||||
matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
const auto& pattern_to_output = m.get_pattern_map();
|
||||
|
||||
const auto mul = std::dynamic_pointer_cast<opset8::Multiply>(m.get_match_root());
|
||||
if (mul) {
|
||||
// pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched
|
||||
const auto pow_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(pow_exp));
|
||||
for (float val : pow_const->get_vector<float>())
|
||||
if (val >= 0) // continue only if exponent is negative (z < 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto eps_const = std::dynamic_pointer_cast<opset8::Constant>(pattern_to_output.at(eps_const_pattern));
|
||||
if (!eps_const || eps_const->get_element_type() != ov::element::f32)
|
||||
return false;
|
||||
|
||||
for (float val : eps_const->get_vector<float>())
|
||||
if (val >= normalized_fp16_min)
|
||||
return false;
|
||||
|
||||
auto new_constant = std::make_shared<opset8::Constant>(eps_const->get_element_type(),
|
||||
eps_const->get_shape(),
|
||||
normalized_fp16_min);
|
||||
copy_runtime_info(eps_const, new_constant);
|
||||
replace_node(eps_const, new_constant);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pattern::Matcher>(div_or_mul_to_negative_pow, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
Loading…
Reference in New Issue
Block a user