[LoadTime][MO] flush fp32 subnormals to zero at offline phase (#15929)
* flush fp32 subnormals to zero in IR * style fix in test_offline_api.py * simplified call of FlushFP32SubnormalsToZero: is called form offline_transformations.cpp * reverted offline_transformations.py * use fpclassify * style-fix * Update src/common/transformations/tests/common_optimizations/flush_fp32_subnormals_to_zero_test.cpp Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com> --------- Co-authored-by: Roman Kazantsev <roman.kazantsev@intel.com>
This commit is contained in:
@@ -17,6 +17,7 @@
|
||||
#include <transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp>
|
||||
#include <transformations/common_optimizations/moc_legacy_transformations.hpp>
|
||||
#include <transformations/common_optimizations/moc_transformations.hpp>
|
||||
#include <transformations/flush_fp32_subnormals_to_zero.hpp>
|
||||
#include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
|
||||
#include <transformations/smart_reshape/smart_reshape.hpp>
|
||||
|
||||
@@ -38,6 +39,7 @@ void regmodule_offline_transformations(py::module m) {
|
||||
if (smart_reshape)
|
||||
manager.register_pass<ov::pass::SmartReshape>();
|
||||
manager.register_pass<ov::pass::MOCTransformations>(cf);
|
||||
manager.register_pass<ov::pass::FlushFP32SubnormalsToZero>();
|
||||
manager.run_passes(model);
|
||||
},
|
||||
py::arg("model"),
|
||||
|
||||
@@ -339,3 +339,20 @@ def test_convert_gru_to_tensor_iterator():
|
||||
# assert that GRU sequence got transformed into TensorIterator
|
||||
assert "GRUSequence" not in ops_types
|
||||
assert "TensorIterator" in ops_types
|
||||
|
||||
|
||||
def test_flush_fp32_subnormals_to_zero():
|
||||
parameter = ov.opset10.parameter([1, 8], name="X")
|
||||
subnorm_val = -2.0e-45
|
||||
|
||||
weights = ov.opset10.constant(np.array([0.0, 1.0, 2.0, 3.0, subnorm_val, subnorm_val, subnorm_val, subnorm_val]),
|
||||
dtype=np.float32)
|
||||
add_node = ov.opset10.add(parameter, weights)
|
||||
|
||||
result = ov.opset10.result(add_node)
|
||||
model = Model([result], [parameter])
|
||||
|
||||
apply_moc_transformations(model, cf=False, smart_reshape=True) # apply_flush_fp32_subnormals_to_zero is called inside
|
||||
|
||||
assert np.all(weights.data[4:8] != subnorm_val)
|
||||
assert np.all(weights.data[4:8] == 0.0)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <openvino/pass/graph_rewrite.hpp>
|
||||
#include <string>
|
||||
|
||||
#include "openvino/core/model.hpp"
|
||||
#include "openvino/pass/serialize.hpp"
|
||||
#include "transformations_visibility.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace pass {
|
||||
|
||||
class TRANSFORMATIONS_API FlushFP32SubnormalsToZero;
|
||||
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
/* @ingroup ie_transformation_common_api
|
||||
* @brief FlushFP32SubnormalsToZero flushes f32 subnormals to zero.
|
||||
* This is read/write expensive transformation, therefore should be run at offline phase.
|
||||
*/
|
||||
class ov::pass::FlushFP32SubnormalsToZero : public MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("FlushFP32SubnormalsToZero", "0");
|
||||
FlushFP32SubnormalsToZero();
|
||||
};
|
||||
@@ -0,0 +1,51 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "transformations/flush_fp32_subnormals_to_zero.hpp"
|
||||
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "itt.hpp"
|
||||
#include "openvino/opsets/opset10.hpp"
|
||||
#include "openvino/pass/pattern/op/wrap_type.hpp"
|
||||
#include "transformations/utils/utils.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ov;
|
||||
using namespace pass;
|
||||
|
||||
ov::pass::FlushFP32SubnormalsToZero::FlushFP32SubnormalsToZero() {
|
||||
MATCHER_SCOPE(FlushFP32SubnormalsToZero);
|
||||
|
||||
auto node_pattern = pattern::wrap_type<opset10::Constant>();
|
||||
|
||||
matcher_pass_callback callback = [=](pattern::Matcher& m) {
|
||||
auto node = dynamic_pointer_cast<ov::opset10::Constant>(m.get_match_root());
|
||||
|
||||
if (!node)
|
||||
return false;
|
||||
if (node->get_output_element_type(0) != element::f32)
|
||||
return false;
|
||||
|
||||
auto* data = const_cast<float*>(node->get_data_ptr<float>());
|
||||
const auto size = ov::shape_size(node->get_shape());
|
||||
|
||||
bool has_subnormals = false;
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (fpclassify(std::abs(data[i])) == FP_SUBNORMAL) {
|
||||
data[i] = 0.0f;
|
||||
has_subnormals = true;
|
||||
}
|
||||
}
|
||||
if (has_subnormals)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
auto m = make_shared<pattern::Matcher>(node_pattern, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
@@ -0,0 +1,376 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <openvino/core/model.hpp>
|
||||
#include <openvino/opsets/opset10.hpp>
|
||||
#include <openvino/pass/manager.hpp>
|
||||
#include <transformations/flush_fp32_subnormals_to_zero.hpp>
|
||||
#include <transformations/init_node_info.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
|
||||
using namespace testing;
|
||||
using namespace ov;
|
||||
using namespace ov::opset10;
|
||||
using namespace std;
|
||||
|
||||
namespace {
|
||||
union FloatIntUnion {
|
||||
uint32_t u;
|
||||
float f;
|
||||
};
|
||||
FloatIntUnion maximum_subnorm_val = {0x007fffff}; // = 2^−126 * (1 - 2^−23) ~= 1.1754942107e-38f
|
||||
FloatIntUnion minimum_subnorm_val = {0x00000001}; // = 2^−149 ~= 1.4012984643e-45f
|
||||
FloatIntUnion minimum_norm_val = {0x00800000}; // = 2^−126 ~= 1.1754943508-38f
|
||||
} // namespace
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_subnorm) {
|
||||
float subnormal_val = maximum_subnorm_val.f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_subnorm) {
|
||||
float subnormal_val = minimum_subnorm_val.f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_subnorm) {
|
||||
float subnormal_val = 2.0e-44f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_max_neg_subnorm) {
|
||||
float subnormal_val = -maximum_subnorm_val.f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_min_neg_subnorm) {
|
||||
float subnormal_val = -minimum_subnorm_val.f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_neg_subnorm) {
|
||||
float subnormal_val = -2.0e-45f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val,
|
||||
subnormal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, test_flush_fp32_subnorm_to_zero_arbitrary_norm) {
|
||||
// minimum normalized val should not be flushed to zero
|
||||
float normal_val = minimum_norm_val.f;
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
|
||||
manager.register_pass<pass::FlushFP32SubnormalsToZero>();
|
||||
}
|
||||
|
||||
{
|
||||
auto input = std::make_shared<Parameter>(element::f32, Shape{1, 3, 12, 12});
|
||||
|
||||
auto const_weights = Constant::create(element::f32,
|
||||
Shape{1, 3, 4, 1},
|
||||
{0.0f,
|
||||
1.0f,
|
||||
2.0f,
|
||||
3.0f,
|
||||
4.0f,
|
||||
5.0f,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val,
|
||||
normal_val});
|
||||
auto conv = std::make_shared<Convolution>(input,
|
||||
const_weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
|
||||
function_ref = std::make_shared<Model>(NodeVector{conv}, ParameterVector{input});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
}
|
||||
@@ -355,6 +355,8 @@ void Input::cloneBlobIfRequired() {
|
||||
if (weightCache) {
|
||||
MemoryPtr ptr = *weightCache->findOrCreate(blobKey(), cloneBlob);
|
||||
memoryPtr = std::const_pointer_cast<const Memory>(ptr);
|
||||
// IRs already have all subnormals flushed to zero, but in
|
||||
// read_model scenario with directly loaded original model still can have subnormals
|
||||
} else if (isBlobAligned() && !hasSubnormals() && !isWA()) {
|
||||
auto ptr = new Memory(getEngine());
|
||||
ptr->Create(memDesc, constOp->get_data_ptr());
|
||||
|
||||
Reference in New Issue
Block a user