[LoadTime][MO] flush fp32 subnormals to zero at offline phase (#15929)

* flush fp32 subnormals to zero in IR

* style fix in test_offline_api.py

* simplified call of FlushFP32SubnormalsToZero: is called form offline_transformations.cpp

* reverted offline_transformations.py

* use fpclassify

* style-fix

* Update src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp

Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>

---------

Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
This commit is contained in:
Pavel Esir
2023-03-09 13:21:28 +01:00
committed by GitHub
parent f04507f56c
commit e43f606750
6 changed files with 478 additions and 0 deletions

View File

@@ -17,6 +17,7 @@
#include <transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp>
#include <transformations/common_optimizations/moc_legacy_transformations.hpp>
#include <transformations/common_optimizations/moc_transformations.hpp>
#include <transformations/flush_fp32_subnormals_to_zero.hpp>
#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
#include <transformations/smart_reshape/smart_reshape.hpp>
@@ -38,6 +39,7 @@ void regmodule_offline_transformations(py::module m) {
if (smart_reshape)
manager.register_pass<ov::pass::SmartReshape>();
manager.register_pass<ov::pass::MOCTransformations>(cf);
manager.register_pass<ov::pass::FlushFP32SubnormalsToZero>();
manager.run_passes(model);
},
py::arg("model"),

View File

@@ -339,3 +339,20 @@ def test_convert_gru_to_tensor_iterator():
# assert that GRU sequence got transformed into TensorIterator
assert "GRUSequence" not in ops_types
assert "TensorIterator" in ops_types
def test_flush_fp32_subnormals_to_zero():
parameter = ov.opset10.parameter([1, 8], name="X")
subnorm_val = -2.0e-45
weights = ov.opset10.constant(np.array([0.0, 1.0, 2.0, 3.0, subnorm_val, subnorm_val, subnorm_val, subnorm_val]),
dtype=np.float32)
add_node = ov.opset10.add(parameter, weights)
result = ov.opset10.result(add_node)
model = Model([result], [parameter])
apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside
assert np.all(weights.data[4:8] != subnorm_val)
assert np.all(weights.data[4:8] == 0.0)

View File

@@ -0,0 +1,30 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <openvino/pass/graph_rewrite.hpp>
#include <string>
#include "openvino/core/model.hpp"
#include "openvino/pass/serialize.hpp"
#include "transformations_visibility.hpp"
namespace ov {
namespace pass {
class TRANSFORMATIONS_API FlushFP32SubnormalsToZero;
} // namespace pass
} // namespace ov
/* @ingroup ie_transformation_common_api
* @brief FlushFP32SubnormalsToZero flushes f32 subnormals to zero.
* This is read/write expensive transformation, therefore should be run at offline phase.
*/
class ov::pass::FlushFP32SubnormalsToZero : public MatcherPass {
public:
OPENVINO_RTTI("FlushFP32SubnormalsToZero", "0");
FlushFP32SubnormalsToZero();
};

View File

@@ -0,0 +1,51 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "transformations/flush_fp32_subnormals_to_zero.hpp"
#include <cmath>
#include <memory>
#include <vector>
#include "itt.hpp"
#include "openvino/opsets/opset10.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "transformations/utils/utils.hpp"
using namespace std;
using namespace ov;
using namespace pass;
ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() {
MATCHER_SCOPE(FlushFP32SubnormalsToZero);
auto node_pattern = pattern::wrap_type<opset10::Constant>();
matcher_pass_callback callback = [=](pattern::Matcher& m) {
auto node = dynamic_pointer_cast<ov::opset10::Constant>(m.get_match_root());
if (!node)
return false;
if (node->get_output_element_type(0) != element::f32)
return false;
auto* data = const_cast<float*>(node->get_data_ptr<float>());
const auto size = ov::shape_size(node->get_shape());
bool has_subnormals = false;
for (size_t i = 0; i < size; ++i) {
if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) {
data[i] = 0.0f;
has_subnormals = true;
}
}
if (has_subnormals)
return true;
return false;
};
auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);
register_matcher(m, callback);
}

View File

@@ -0,0 +1,376 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <gtest/gtest.h>
#include <openvino/core/model.hpp>
#include <openvino/opsets/opset10.hpp>
#include <openvino/pass/manager.hpp>
#include <transformations/flush_fp32_subnormals_to_zero.hpp>
#include <transformations/init_node_info.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
using namespace testing;
using namespace ov;
using namespace ov::opset10;
using namespace std;
namespace {
union FloatIntUnion {
uint32_t u;
float f;
};
FloatIntUnion maximum_subnorm_val = {0x007fffff}; // = 2^126 * (1 - 2^23) ~= 1.1754942107e-38f
FloatIntUnion minimum_subnorm_val = {0x00000001}; // = 2^149 ~= 1.4012984643e-45f
FloatIntUnion minimum_norm_val = {0x00800000}; // = 2^126 ~= 1.1754943508-38f
} // namespace
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_subnorm) {
float subnormal_val = maximum_subnorm_val.f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_subnorm) {
float subnormal_val = minimum_subnorm_val.f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_subnorm) {
float subnormal_val = 2.0e-44f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_neg_subnorm) {
float subnormal_val = -maximum_subnorm_val.f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_neg_subnorm) {
float subnormal_val = -minimum_subnorm_val.f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_neg_subnorm) {
float subnormal_val = -2.0e-45f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val,
subnormal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_norm) {
// minimum normalized val should not be flushed to zero
float normal_val = minimum_norm_val.f;
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
normal_val,
normal_val,
normal_val,
normal_val,
normal_val,
normal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
}
{
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
auto const_weights = Constant::create(element::f32,
Shape{1, 3, 4, 1},
{0.0f,
1.0f,
2.0f,
3.0f,
4.0f,
5.0f,
normal_val,
normal_val,
normal_val,
normal_val,
normal_val,
normal_val});
auto conv = std::make_shared<Convolution>(input,
const_weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
}

View File

@@ -355,6 +355,8 @@ void Input::cloneBlobIfRequired() {
if (weightCache) {
MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob);
memoryPtr = std::const_pointer_cast<const Memory>(ptr);
// IRs already have all subnormals flushed to zero, but in
// read_model scenario with directly loaded original model still can have subnormals
} else if (isBlobAligned() && !hasSubnormals() && !isWA()) {
auto ptr = new Memory(getEngine());
ptr->Create(memDesc, constOp->get_data_ptr());