[LoadTime][MO] flush fp32 subnormals to zero at offline phase (#15929)

* flush fp32 subnormals to zero in IR * style fix in test_offline_api.py * simplified call of FlushFP32SubnormalsToZero: is called form offline_transformations.cpp * reverted offline_transformations.py * use fpclassify * style-fix * Update src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> --------- Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
2023-03-09 13:21:28 +01:00
parent f04507f56c
commit e43f606750
6 changed files with 478 additions and 0 deletions
--- a/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp
+++ b/src/bindings/python/src/pyopenvino/core/offline_transformations.cpp
@@ -17,6 +17,7 @@
 #include <transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp>
 #include <transformations/common_optimizations/moc_legacy_transformations.hpp>
 #include <transformations/common_optimizations/moc_transformations.hpp>
+#include <transformations/flush_fp32_subnormals_to_zero.hpp>
 #include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
 #include <transformations/smart_reshape/smart_reshape.hpp>

@@ -38,6 +39,7 @@ void regmodule_offline_transformations(py::module m) {
            if (smart_reshape)
                manager.register_pass<ov::pass::SmartReshape>();
            manager.register_pass<ov::pass::MOCTransformations>(cf);
+            manager.register_pass<ov::pass::FlushFP32SubnormalsToZero>();
            manager.run_passes(model);
        },
        py::arg("model"),
--- a/src/bindings/python/tests/test_transformations/test_offline_api.py
+++ b/src/bindings/python/tests/test_transformations/test_offline_api.py
@@ -339,3 +339,20 @@ def test_convert_gru_to_tensor_iterator():
    # assert that GRU sequence got transformed into TensorIterator
    assert "GRUSequence" not in ops_types
    assert "TensorIterator" in ops_types
+
+
+def test_flush_fp32_subnormals_to_zero():
+    parameter = ov.opset10.parameter([1, 8], name="X")
+    subnorm_val = -2.0e-45
+
+    weights = ov.opset10.constant(np.array([0.0, 1.0, 2.0, 3.0, subnorm_val, subnorm_val, subnorm_val, subnorm_val]),
+                                  dtype=np.float32)
+    add_node = ov.opset10.add(parameter, weights)
+
+    result = ov.opset10.result(add_node)
+    model = Model([result], [parameter])
+
+    apply_moc_transformations(model, cf=False, smart_reshape=True)  # apply_flush_fp32_subnormals_to_zero is called inside
+
+    assert np.all(weights.data[4:8] != subnorm_val)
+    assert np.all(weights.data[4:8] == 0.0)
--- a/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp
+++ b/src/common/transformations/include/transformations/flush_fp32_subnormals_to_zero.hpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <openvino/pass/graph_rewrite.hpp>
+#include <string>
+
+#include "openvino/core/model.hpp"
+#include "openvino/pass/serialize.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API FlushFP32SubnormalsToZero;
+
+}  // namespace pass
+}  // namespace ov
+
+/* @ingroup ie_transformation_common_api
+ * @brief FlushFP32SubnormalsToZero flushes f32 subnormals to zero.
+ * This is read/write expensive transformation, therefore should be run at offline phase.
+ */
+class ov::pass::FlushFP32SubnormalsToZero : public MatcherPass {
+public:
+    OPENVINO_RTTI("FlushFP32SubnormalsToZero", "0");
+    FlushFP32SubnormalsToZero();
+};
--- a/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp
+++ b/src/common/transformations/src/transformations/flush_fp32_subnormals_to_zero.cpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/flush_fp32_subnormals_to_zero.hpp"
+
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "itt.hpp"
+#include "openvino/opsets/opset10.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "transformations/utils/utils.hpp"
+
+using namespace std;
+using namespace ov;
+using namespace pass;
+
+ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() {
+    MATCHER_SCOPE(FlushFP32SubnormalsToZero);
+
+    auto node_pattern = pattern::wrap_type<opset10::Constant>();
+
+    matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        auto node = dynamic_pointer_cast<ov::opset10::Constant>(m.get_match_root());
+
+        if (!node)
+            return false;
+        if (node->get_output_element_type(0) != element::f32)
+            return false;
+
+        auto* data = const_cast<float*>(node->get_data_ptr<float>());
+        const auto size = ov::shape_size(node->get_shape());
+
+        bool has_subnormals = false;
+        for (size_t i = 0; i < size; ++i) {
+            if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) {
+                data[i] = 0.0f;
+                has_subnormals = true;
+            }
+        }
+        if (has_subnormals)
+            return true;
+
+        return false;
+    };
+
+    auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);
+    register_matcher(m, callback);
+}
--- a/src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp
+++ b/src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp
@@ -0,0 +1,376 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <openvino/core/model.hpp>
+#include <openvino/opsets/opset10.hpp>
+#include <openvino/pass/manager.hpp>
+#include <transformations/flush_fp32_subnormals_to_zero.hpp>
+#include <transformations/init_node_info.hpp>
+
+#include "common_test_utils/ngraph_test_utils.hpp"
+
+using namespace testing;
+using namespace ov;
+using namespace ov::opset10;
+using namespace std;
+
+namespace {
+union FloatIntUnion {
+    uint32_t u;
+    float f;
+};
+FloatIntUnion maximum_subnorm_val = {0x007fffff};  // = 2^−126 * (1 - 2^−23) ~= 1.1754942107e-38f
+FloatIntUnion minimum_subnorm_val = {0x00000001};  // = 2^−149 ~= 1.4012984643e-45f
+FloatIntUnion minimum_norm_val = {0x00800000};     // = 2^−126 ~= 1.1754943508-38f
+}  // namespace
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_subnorm) {
+    float subnormal_val = maximum_subnorm_val.f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_subnorm) {
+    float subnormal_val = minimum_subnorm_val.f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_subnorm) {
+    float subnormal_val = 2.0e-44f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_neg_subnorm) {
+    float subnormal_val = -maximum_subnorm_val.f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_neg_subnorm) {
+    float subnormal_val = -minimum_subnorm_val.f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_neg_subnorm) {
+    float subnormal_val = -2.0e-45f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val,
+                                               subnormal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
+
+TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_norm) {
+    // minimum normalized val should not be flushed to zero
+    float normal_val = minimum_norm_val.f;
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+        function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+
+        manager.register_pass<pass::FlushFP32SubnormalsToZero>();
+    }
+
+    {
+        auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
+
+        auto const_weights = Constant::create(element::f32,
+                                              Shape{1, 3, 4, 1},
+                                              {0.0f,
+                                               1.0f,
+                                               2.0f,
+                                               3.0f,
+                                               4.0f,
+                                               5.0f,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val,
+                                               normal_val});
+        auto conv = std::make_shared<Convolution>(input,
+                                                  const_weights,
+                                                  Strides{1, 1},
+                                                  CoordinateDiff{0, 0},
+                                                  CoordinateDiff{0, 0},
+                                                  Strides{1, 1});
+
+        function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
+    }
+    comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
+}
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@@ -355,6 +355,8 @@ void Input::cloneBlobIfRequired() {
    if (weightCache) {
        MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob);
        memoryPtr = std::const_pointer_cast<const Memory>(ptr);
+    // IRs already have all subnormals flushed to zero, but in
+    // read_model scenario with directly loaded original model still can have subnormals
    } else if (isBlobAligned() && !hasSubnormals() && !isWA()) {
        auto ptr = new Memory(getEngine());
        ptr->Create(memDesc, constOp->get_data_ptr());