Efficient FP32 -> FP16 conversion for convert_precision, save_model, ovc and mo (#18988)

* WIP Postpone fp16 in CompressFloatConstantsImpl

* Apply suggestions from code review

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>

* WIP: Compression to FP16 in Serialize

* Prepared for efficient fp32 to fp16 conversion

* Update src/core/reference/src/runtime/reference/convert.cpp

* Called real slow reference implementations in the place where the optimized versions are supposed to be implemented

* Code style

* Fixed 0 values in the fast f64 to f16 compression

* Optimized convert_from_f32_to_f16_with_clamp

* Added optimized f32->f16 instance of change_constant_precision

* compression transformation Python test

* use tmp dir, minor corrections

* Update src/bindings/python/tests/test_transformations/test_compression.py

* Update src/bindings/python/tests/test_transformations/test_compression.py

* style fix

* define rt_info for postponed_fp16_compression

* remove redundant class

* fix temp dir for Win in test_compression.py

* update definitions in convert.hpp

* Update implementation in convert.cpp

* Update serialize.cpp

* Update compress_float_constants.cpp

* added macros for ARM/non_x86 in convert.cpp

* fix macros in convert.cpp

* change fixme placement in serialize.cpp

* style_fix

* Update src/core/reference/src/runtime/reference/convert.cpp

* style_fix

* Optimized count_out_of_f16_range

* Code style

* Revert unused

* Update src/core/src/pass/serialize.cpp

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>

* Update src/core/reference/src/runtime/reference/convert.cpp

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>

* use optimized convert_from_f32_to_f16_with_clamp for non postponed

* minor corrections

* Update src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp

* Update compress_float_constants.cpp

* Switched mo and ovc to save_model instead of serialize to leverage performance improvements in fp32->fp16

* Applied minor code imporvements to address review feedback

* Minor changes in code

* Update tools/ovc/openvino/tools/ovc/main.py

* Apply suggestions from code review

* Fixed failed test in case when both usual xml compression and fp16 compression are applied simultaneously (disabled for now)

* Added description for CompressFloatConstantImpl postponed parameter

* Description of postponed parameter for CompressFloatConstants

* Reverted switching to save_model in mo as the compression can be applied not only via CLI and old code should be kept for Python path (not applicable for ovc)

* Removed remining committed test artefacts and reverted remaining changes in mo

---------

Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
Co-authored-by: dmitrygo <dmitry.gorokhov@intel.com>
Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com>
Co-authored-by: Pavel Esir <pavel.esir@intel.com>
Co-authored-by: Pavel Esir <pavel.esir@gmail.com>
This commit is contained in:
Sergey Lyalin
2023-08-17 15:08:33 +04:00
committed by GitHub
parent 2394732055
commit f0300a36eb
11 changed files with 666 additions and 44 deletions

View File

@@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import tempfile
from typing import List
import numpy as np
from openvino.runtime.op import Parameter, Constant
from openvino.runtime.opset12 import add, multiply
import openvino as ov
def make_constant(values, transposed):
return Constant(ov.Type.f32, ov.Shape([1, len(values)] if transposed else [len(values), 1]), values)
# keep fp16 denormals, flush fp32 denormals to zero
in_range = [-65504.0, -2.0, 1.00097656, -1.0, -0.99951172, -0.00006103515625, -0.000000059604645, 0.0,
0.000000059604645, 0.99951172, 0.00006103515625, 1.0, 1.00097656, 2.0, 65504]
out_of_range = [float("-inf"), -65505.0, -1e-10, -1e-39, 1e-39, 1e-10, 65505.0, float("inf")]
converted_out_of_range = [-65504.0, -65504.0, 0, 0, 0, 0, 65504.0, 65504.0]
# test inputs
more_in_range = out_of_range + 10 * in_range
more_out_of_range = in_range + 10 * out_of_range
# reference after conversion more_in_range to fp16
converted_more_in_range = converted_out_of_range + 10 * in_range
def make_model(add_consts, mul_consts):
parameter1 = Parameter(ov.Type.f32, ov.PartialShape([-1]))
add1 = add(parameter1, make_constant(add_consts, False))
mul1 = multiply(add1, make_constant(mul_consts, True))
return ov.Model([mul1], [parameter1])
def get_constants(model) -> List[Constant]:
from pathlib import Path
model_name = Path(tempfile.gettempdir()) / "f32_partially_compressed.xml"
ov.save_model(model, model_name)
core = ov.Core()
restored_model = core.read_model(model_name)
op_ind_map = {"Add": 0, "Multiply": 1}
constants_list = [[]] * len(op_ind_map)
for op in restored_model.get_ordered_ops():
op_type = op.get_type_info().name
if op_type not in op_ind_map.keys():
continue
in_node = op.input_value(1).get_node()
if in_node.get_type_info().name == "Convert":
const_node = in_node.input_value(0).get_node()
if const_node.get_type_info().name != "Constant":
const_node = None
elif in_node.get_type_info().name == "Constant":
const_node = in_node
constants_list[op_ind_map[op_type]] = const_node
for node in constants_list:
assert not isinstance(node, list)
# sanity check that model is compilable
ov.compile_model(restored_model)
return constants_list
def test_compression_1():
model = make_model(more_in_range, more_out_of_range)
const_fp16, const_fp32 = get_constants(model)
assert const_fp32 is not None, "There is no Constant op on FP32 branch"
assert const_fp16 is not None, "There is no compressed Constant + Convert op on FP16 branch"
assert const_fp32.get_output_element_type(0) == ov.Type.f32
assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32.get_vector())
assert const_fp16.get_output_element_type(0) == ov.Type.f16
msg = f"Difference: {np.array(converted_more_in_range, dtype=np.float32) - const_fp16.get_vector()}"
assert np.all(np.array(converted_more_in_range, dtype=np.float32) == const_fp16.get_vector()), msg
def test_compression_2():
model = make_model(more_in_range, more_in_range)
const_fp16_1, const_fp16_2 = get_constants(model)
assert const_fp16_1 is not None, "There is no Constant op on FP16 branch"
assert const_fp16_2 is not None, "There is no Constant op on FP16 branch"
assert const_fp16_1.get_output_element_type(0) == ov.Type.f16, "Const element type is not f16"
assert const_fp16_2.get_output_element_type(0) == ov.Type.f16, "Const element type is not f16"
f16_min, f16_max = np.finfo(np.float16).min, np.finfo(np.float16).max
in_range_clipped = np.clip(more_in_range, f16_min, f16_max).astype(np.float16)
assert np.all(in_range_clipped == const_fp16_1.get_vector())
assert np.all(in_range_clipped == const_fp16_2.get_vector())
def test_no_compression():
model = make_model(more_out_of_range, more_out_of_range)
const_fp32_1, const_fp32_2 = get_constants(model)
assert const_fp32_1 is not None, "There is no Constant op on FP32 branch"
assert const_fp32_2 is not None, "There is no Constant op on FP32 branch"
assert const_fp32_1.get_output_element_type(0) == ov.Type.f32, "Const element type is not f32"
assert const_fp32_2.get_output_element_type(0) == ov.Type.f32, "Const element type is not f32"
assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32_1.get_vector())
assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32_2.get_vector())