diff --git a/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp b/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp index 8b40b808dcb..80475f49811 100644 --- a/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp +++ b/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -38,6 +39,7 @@ void regmodule_offline_transformations(py::module m) { if (smart_reshape) manager.register_pass(); manager.register_pass(cf); + manager.register_pass(); manager.run_passes(model); }, py::arg("model"), diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py index cf3089e30fc..fdc7eec0048 100644 --- a/src/bindings/python/tests/test_transformations/test_offline_api.py +++ b/src/bindings/python/tests/test_transformations/test_offline_api.py @@ -339,3 +339,20 @@ def test_convert_gru_to_tensor_iterator(): # assert that GRU sequence got transformed into TensorIterator assert "GRUSequence" not in ops_types assert "TensorIterator" in ops_types + + +def test_flush_fp32_subnormals_to_zero(): + parameter = ov.opset10.parameter([1, 8], name="X") + subnorm_val = -2.0e-45 + + weights = ov.opset10.constant(np.array([0.0, 1.0, 2.0, 3.0, subnorm_val, subnorm_val, subnorm_val, subnorm_val]), + dtype=np.float32) + add_node = ov.opset10.add(parameter, weights) + + result = ov.opset10.result(add_node) + model = Model([result], [parameter]) + + apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside + + assert np.all(weights.data[4:8] != subnorm_val) + assert np.all(weights.data[4:8] == 0.0) diff --git a/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp b/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp new file mode 100644 index 00000000000..e155cf1c6c6 --- /dev/null +++ b/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "openvino/core/model.hpp" +#include "openvino/pass/serialize.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { + +class TRANSFORMATIONS_API FlushFP32SubnormalsToZero; + +} // namespace pass +} // namespace ov + +/* @ingroup ie_transformation_common_api + * @brief FlushFP32SubnormalsToZero flushes f32 subnormals to zero. + * This is read/write expensive transformation, therefore should be run at offline phase. + */ +class ov::pass::FlushFP32SubnormalsToZero : public MatcherPass { +public: + OPENVINO_RTTI("FlushFP32SubnormalsToZero", "0"); + FlushFP32SubnormalsToZero(); +}; diff --git a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp new file mode 100644 index 00000000000..f01c60b1c03 --- /dev/null +++ b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp @@ -0,0 +1,51 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/flush_fp32_subnormals_to_zero.hpp" + +#include +#include +#include + +#include "itt.hpp" +#include "openvino/opsets/opset10.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +using namespace std; +using namespace ov; +using namespace pass; + +ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() { + MATCHER_SCOPE(FlushFP32SubnormalsToZero); + + auto node_pattern = pattern::wrap_type(); + + matcher_pass_callback callback = [=](pattern::Matcher& m) { + auto node = dynamic_pointer_cast(m.get_match_root()); + + if (!node) + return false; + if (node->get_output_element_type(0) != element::f32) + return false; + + auto* data = const_cast(node->get_data_ptr()); + const auto size = ov::shape_size(node->get_shape()); + + bool has_subnormals = false; + for (size_t i = 0; i < size; ++i) { + if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) { + data[i] = 0.0f; + has_subnormals = true; + } + } + if (has_subnormals) + return true; + + return false; + }; + + auto m = make_shared(node_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp b/src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp new file mode 100644 index 00000000000..cf2d090a2a3 --- /dev/null +++ b/src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp @@ -0,0 +1,376 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include +#include +#include +#include + +#include "common_test_utils/ngraph_test_utils.hpp" + +using namespace testing; +using namespace ov; +using namespace ov::opset10; +using namespace std; + +namespace { +union FloatIntUnion { + uint32_t u; + float f; +}; +FloatIntUnion maximum_subnorm_val = {0x007fffff}; // = 2^−126 * (1 - 2^−23) ~= 1.1754942107e-38f +FloatIntUnion minimum_subnorm_val = {0x00000001}; // = 2^−149 ~= 1.4012984643e-45f +FloatIntUnion minimum_norm_val = {0x00800000}; // = 2^−126 ~= 1.1754943508-38f +} // namespace + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_subnorm) { + float subnormal_val = maximum_subnorm_val.f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_subnorm) { + float subnormal_val = minimum_subnorm_val.f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_subnorm) { + float subnormal_val = 2.0e-44f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_neg_subnorm) { + float subnormal_val = -maximum_subnorm_val.f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_neg_subnorm) { + float subnormal_val = -minimum_subnorm_val.f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_neg_subnorm) { + float subnormal_val = -2.0e-45f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val, + subnormal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} + +TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_norm) { + // minimum normalized val should not be flushed to zero + float normal_val = minimum_norm_val.f; + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + normal_val, + normal_val, + normal_val, + normal_val, + normal_val, + normal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + function = std::make_shared(NodeVector{conv}, ParameterVector{input}); + + manager.register_pass(); + } + + { + auto input = std::make_shared(element::f32, Shape{1, 3, 12, 12}); + + auto const_weights = Constant::create(element::f32, + Shape{1, 3, 4, 1}, + {0.0f, + 1.0f, + 2.0f, + 3.0f, + 4.0f, + 5.0f, + normal_val, + normal_val, + normal_val, + normal_val, + normal_val, + normal_val}); + auto conv = std::make_shared(input, + const_weights, + Strides{1, 1}, + CoordinateDiff{0, 0}, + CoordinateDiff{0, 0}, + Strides{1, 1}); + + function_ref = std::make_shared(NodeVector{conv}, ParameterVector{input}); + } + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); +} diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 02ee4f67b7b..3213fc50896 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -355,6 +355,8 @@ void Input::cloneBlobIfRequired() { if (weightCache) { MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob); memoryPtr = std::const_pointer_cast(ptr); + // IRs already have all subnormals flushed to zero, but in + // read_model scenario with directly loaded original model still can have subnormals } else if (isBlobAligned() && !hasSubnormals() && !isWA()) { auto ptr = new Memory(getEngine()); ptr->Create(memDesc, constOp->get_data_ptr());