From b6a5532c270b3c61b2f9fb32e7b06642954b0820 Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Tue, 7 Dec 2021 22:53:12 +0300 Subject: [PATCH] [IE][nGraph] resolve division by zero for FP16 (#8676) * initial solution * changed namespaces from ngraph -> ov; completed unit-tests * added Power with negative exponent into the pattern * division to -> division by; aligned with OV new folder structure; some minor corrections * corrected pattern * changed get_pattern_value_map -> get_pattern_map --- .../offline_transformations_api_impl.cpp | 2 + .../division_by_zero_fp16_resolver_test.cpp | 210 ++++++++++++++++++ .../division_by_zero_fp16_resolver.hpp | 35 +++ .../division_by_zero_fp16_resolver.cpp | 81 +++++++ 4 files changed, 328 insertions(+) create mode 100644 inference-engine/tests/functional/inference_engine/transformations/division_by_zero_fp16_resolver_test.cpp create mode 100644 src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp create mode 100644 src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp diff --git a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp index 9a7d6528666..8ffb098e296 100644 --- a/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp +++ b/inference-engine/ie_bridges/python/src/openvino/offline_transformations/offline_transformations_api_impl.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ void InferenceEnginePython::GenerateMappingFile(InferenceEnginePython::IENetwork void InferenceEnginePython::CompressModelTransformation(InferenceEnginePython::IENetwork network) { ngraph::pass::Manager manager; + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.run_passes(network.actual->getFunction()); diff --git a/inference-engine/tests/functional/inference_engine/transformations/division_by_zero_fp16_resolver_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/division_by_zero_fp16_resolver_test.cpp new file mode 100644 index 00000000000..81fb6872434 --- /dev/null +++ b/inference-engine/tests/functional/inference_engine/transformations/division_by_zero_fp16_resolver_test.cpp @@ -0,0 +1,210 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ov; +constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value + + +TEST_F(TransformationTestsF, DivisionByZeroMinimalPattern) { + const float eps_value = 1.e-12; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } + + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, PowWithNegativeExponent) { + const float eps_value = 1.e-12; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); + + function = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } + + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {-1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); + + function_ref = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, PowWithPositiveExponent) { + // graph should be left unchanged + const float eps_value = 1.e-12; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); + + function = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } + + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto pow_exp_const = opset4::Constant::create(element::f32, Shape{1}, {1.77}); + auto pow = std::make_shared(add, pow_exp_const); + auto mul = std::make_shared(input_1, pow); + + function_ref = std::make_shared(NodeVector{mul}, ParameterVector{input_1, input_2}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, DivisionByZeroMinimalPatternUnchanged) { + // if eps_value is greater than normalized_fp16_min then leave graph unchanged + const float eps_value = 0.0001f; + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + + manager.register_pass(); + } + + { + auto input_1 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto input_2 = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(input_2, eps_const); + auto divide = std::make_shared(input_1, add); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input_1, input_2}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithMax) { + const float eps_value = 1.e-12; + { + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{}, {eps_value}); + auto max = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(max); + auto divide = std::make_shared(input, sqrt); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{}, {normalized_fp16_min}); + auto max = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(max); + auto divide = std::make_shared(input, sqrt); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + + +TEST_F(TransformationTestsF, DivisionByZeroInL2NormWithSqrtAndWithAdd) { + const float eps_value = 1.e-12; + { + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {eps_value}); + auto add = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(add); + auto divide = std::make_shared(input, sqrt); + + function = std::make_shared(NodeVector{divide}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, PartialShape::dynamic(3)); + auto exp = opset4::Constant::create(element::f32, Shape{}, {2.f}); + auto pow = std::make_shared(input, exp); + auto axes_const = opset4::Constant::create(element::i64, Shape{2}, {0, 1}); + auto reduce_sum = std::make_shared(pow, axes_const); + auto eps_const = opset4::Constant::create(element::f32, Shape{1}, {normalized_fp16_min}); + auto add = std::make_shared(reduce_sum, eps_const); + auto sqrt = std::make_shared(add); + auto divide = std::make_shared(input, sqrt); + + function_ref = std::make_shared(NodeVector{divide}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} diff --git a/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp b/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp new file mode 100644 index 00000000000..1109f3af377 --- /dev/null +++ b/src/common/transformations/include/transformations/common_optimizations/division_by_zero_fp16_resolver.hpp @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include +#include +#include "ngraph/pattern/matcher.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API DivisionByZeroFP16Resolver; + +} // namespace pass +} // namespace ov + +/** + * @ingroup ie_transformation_common_api + * @brief: clamps eps into fp16 minimal normalized value in input_1/Maximum(input_2, eps); input_1/Add(input_2, eps); + * and input_1*Pow(Maximum[Add](input_2, eps), -z) patterns to prevent division by zero. + * + * eps must be always nonzero to prevent from NaNs in such expressions if input_1 and input_2 simultaneously happened to be zero. + * We should keep in such patterns eps >= fp16 minimal normalized value so that + * CompressFloatConstants should not cast them into zero during compression into f16. + */ +class ov::pass::DivisionByZeroFP16Resolver: public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + DivisionByZeroFP16Resolver(); +}; diff --git a/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp b/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp new file mode 100644 index 00000000000..60fc69d5a94 --- /dev/null +++ b/src/common/transformations/src/transformations/common_optimizations/division_by_zero_fp16_resolver.cpp @@ -0,0 +1,81 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "itt.hpp" +#include "transformations/common_optimizations/division_by_zero_fp16_resolver.hpp" +#include "transformations/utils/utils.hpp" + +#include +#include + +#include +#include "ngraph/rt_info.hpp" +#include +#include + +NGRAPH_RTTI_DEFINITION(ov::pass::DivisionByZeroFP16Resolver, "DivisionByZeroFP16Resolver", 0); + +constexpr float normalized_fp16_min = 6.103515625e-05f; // fp16 minimal normalized value + +using namespace ov; + +ov::pass::DivisionByZeroFP16Resolver::DivisionByZeroFP16Resolver() { + MATCHER_SCOPE(DivisionByZeroFP16Resolver); + + // to detect the following patterns where eps is used to prevent division by zero: + // input_1/Maximum(input_2, eps) + // input_1/Add(input_2, eps) + // input_1/Sqrt(Maximum(input_2, eps)) + // input_1/Sqrt(Add(input_2, eps)) + // input_1*Pow(Maximum(input_2, eps), -z) + // input_1*Pow(Add(input_2, eps), -z) + auto input_1 = pattern::any_input(); + auto input_2 = pattern::any_input(); + + auto eps_const_pattern = pattern::wrap_type(); + auto max = std::make_shared(input_2, eps_const_pattern); + auto add = std::make_shared(input_2, eps_const_pattern); + auto max_or_add = std::make_shared(OutputVector{max, add}); + + auto sqrt = std::make_shared(max_or_add); + auto sqrt_or_max_add = std::make_shared(OutputVector{max_or_add, sqrt}); + // whether is divided directly or after sqrt (e.g. in L2Norm after sqrt, in MVN is divided directly) + auto divide = std::make_shared(input_1, sqrt_or_max_add); + + auto pow_exp = pattern::wrap_type(); + auto pow_pattern = std::make_shared(max_or_add, pow_exp); + auto mul_pattern = std::make_shared(input_1, pow_pattern); + auto div_or_mul_to_negative_pow = std::make_shared(OutputVector{divide, mul_pattern}); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + const auto& pattern_to_output = m.get_pattern_map(); + + const auto mul = std::dynamic_pointer_cast(m.get_match_root()); + if (mul) { + // pattern input_1*Pow(Maximum(input_2, eps), z) or input_1*Pow(Add(input_2, eps), z) is matched + const auto pow_const = std::dynamic_pointer_cast(pattern_to_output.at(pow_exp)); + for (float val : pow_const->get_vector()) + if (val >= 0) // continue only if exponent is negative (z < 0) + return false; + } + + const auto eps_const = std::dynamic_pointer_cast(pattern_to_output.at(eps_const_pattern)); + if (!eps_const || eps_const->get_element_type() != ov::element::f32) + return false; + + for (float val : eps_const->get_vector()) + if (val >= normalized_fp16_min) + return false; + + auto new_constant = std::make_shared(eps_const->get_element_type(), + eps_const->get_shape(), + normalized_fp16_min); + copy_runtime_info(eps_const, new_constant); + replace_node(eps_const, new_constant); + return true; + }; + + auto m = std::make_shared(div_or_mul_to_negative_pow, matcher_name); + register_matcher(m, callback); +}