Efficient FP32 -> FP16 conversion for convert_precision, save_model, ovc and mo (#18988)

* WIP Postpone fp16 in CompressFloatConstantsImpl * Apply suggestions from code review Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> * WIP: Compression to FP16 in Serialize * Prepared for efficient fp32 to fp16 conversion * Update src/core/reference/src/runtime/reference/convert.cpp * Called real slow reference implementations in the place where the optimized versions are supposed to be implemented * Code style * Fixed 0 values in the fast f64 to f16 compression * Optimized convert_from_f32_to_f16_with_clamp * Added optimized f32->f16 instance of change_constant_precision * compression transformation Python test * use tmp dir, minor corrections * Update src/bindings/python/tests/test_transformations/test_compression.py * Update src/bindings/python/tests/test_transformations/test_compression.py * style fix * define rt_info for postponed_fp16_compression * remove redundant class * fix temp dir for Win in test_compression.py * update definitions in convert.hpp * Update implementation in convert.cpp * Update serialize.cpp * Update compress_float_constants.cpp * added macros for ARM/non_x86 in convert.cpp * fix macros in convert.cpp * change fixme placement in serialize.cpp * style_fix * Update src/core/reference/src/runtime/reference/convert.cpp * style_fix * Optimized count_out_of_f16_range * Code style * Revert unused * Update src/core/src/pass/serialize.cpp Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> * Update src/core/reference/src/runtime/reference/convert.cpp Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> * use optimized convert_from_f32_to_f16_with_clamp for non postponed * minor corrections * Update src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp * Update compress_float_constants.cpp * Switched mo and ovc to save_model instead of serialize to leverage performance improvements in fp32->fp16 * Applied minor code imporvements to address review feedback * Minor changes in code * Update tools/ovc/openvino/tools/ovc/main.py * Apply suggestions from code review * Fixed failed test in case when both usual xml compression and fp16 compression are applied simultaneously (disabled for now) * Added description for CompressFloatConstantImpl postponed parameter * Description of postponed parameter for CompressFloatConstants * Reverted switching to save_model in mo as the compression can be applied not only via CLI and old code should be kept for Python path (not applicable for ovc) * Removed remining committed test artefacts and reverted remaining changes in mo --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> Co-authored-by: dmitrygo <dmitry.gorokhov@intel.com> Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com> Co-authored-by: Pavel Esir <pavel.esir@intel.com> Co-authored-by: Pavel Esir <pavel.esir@gmail.com>
2023-08-17 15:08:33 +04:00
parent 2394732055
commit f0300a36eb
11 changed files with 666 additions and 44 deletions
--- a/src/bindings/python/tests/test_transformations/test_compression.py
+++ b/src/bindings/python/tests/test_transformations/test_compression.py
@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import tempfile
+from typing import List
+
+import numpy as np
+from openvino.runtime.op import Parameter, Constant
+from openvino.runtime.opset12 import add, multiply
+
+import openvino as ov
+
+
+def make_constant(values, transposed):
+    return Constant(ov.Type.f32, ov.Shape([1, len(values)] if transposed else [len(values), 1]), values)
+
+
+# keep fp16 denormals, flush fp32 denormals to zero
+in_range = [-65504.0, -2.0, 1.00097656, -1.0, -0.99951172, -0.00006103515625, -0.000000059604645, 0.0,
+            0.000000059604645, 0.99951172, 0.00006103515625, 1.0, 1.00097656, 2.0, 65504]
+out_of_range = [float("-inf"), -65505.0, -1e-10, -1e-39, 1e-39, 1e-10, 65505.0, float("inf")]
+converted_out_of_range = [-65504.0, -65504.0, 0, 0, 0, 0, 65504.0, 65504.0]
+
+# test inputs
+more_in_range = out_of_range + 10 * in_range
+more_out_of_range = in_range + 10 * out_of_range
+
+# reference after conversion more_in_range to fp16
+converted_more_in_range = converted_out_of_range + 10 * in_range
+
+
+def make_model(add_consts, mul_consts):
+    parameter1 = Parameter(ov.Type.f32, ov.PartialShape([-1]))
+    add1 = add(parameter1, make_constant(add_consts, False))
+    mul1 = multiply(add1, make_constant(mul_consts, True))
+    return ov.Model([mul1], [parameter1])
+
+
+def get_constants(model) -> List[Constant]:
+    from pathlib import Path
+    model_name = Path(tempfile.gettempdir()) / "f32_partially_compressed.xml"
+    ov.save_model(model, model_name)
+    core = ov.Core()
+    restored_model = core.read_model(model_name)
+
+    op_ind_map = {"Add": 0, "Multiply": 1}
+    constants_list = [[]] * len(op_ind_map)
+
+    for op in restored_model.get_ordered_ops():
+        op_type = op.get_type_info().name
+        if op_type not in op_ind_map.keys():
+            continue
+
+        in_node = op.input_value(1).get_node()
+        if in_node.get_type_info().name == "Convert":
+            const_node = in_node.input_value(0).get_node()
+            if const_node.get_type_info().name != "Constant":
+                const_node = None
+        elif in_node.get_type_info().name == "Constant":
+            const_node = in_node
+
+        constants_list[op_ind_map[op_type]] = const_node
+
+    for node in constants_list:
+        assert not isinstance(node, list)
+
+    # sanity check that model is compilable
+    ov.compile_model(restored_model)
+    return constants_list
+
+
+def test_compression_1():
+    model = make_model(more_in_range, more_out_of_range)
+    const_fp16, const_fp32 = get_constants(model)
+    assert const_fp32 is not None, "There is no Constant op on FP32 branch"
+    assert const_fp16 is not None, "There is no compressed Constant + Convert op on FP16 branch"
+
+    assert const_fp32.get_output_element_type(0) == ov.Type.f32
+    assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32.get_vector())
+
+    assert const_fp16.get_output_element_type(0) == ov.Type.f16
+
+    msg = f"Difference: {np.array(converted_more_in_range, dtype=np.float32) - const_fp16.get_vector()}"
+    assert np.all(np.array(converted_more_in_range, dtype=np.float32) == const_fp16.get_vector()), msg
+
+
+def test_compression_2():
+    model = make_model(more_in_range, more_in_range)
+    const_fp16_1, const_fp16_2 = get_constants(model)
+
+    assert const_fp16_1 is not None, "There is no Constant op on FP16 branch"
+    assert const_fp16_2 is not None, "There is no Constant op on FP16 branch"
+
+    assert const_fp16_1.get_output_element_type(0) == ov.Type.f16, "Const element type is not f16"
+    assert const_fp16_2.get_output_element_type(0) == ov.Type.f16, "Const element type is not f16"
+    f16_min, f16_max = np.finfo(np.float16).min, np.finfo(np.float16).max
+    in_range_clipped = np.clip(more_in_range, f16_min, f16_max).astype(np.float16)
+
+    assert np.all(in_range_clipped == const_fp16_1.get_vector())
+    assert np.all(in_range_clipped == const_fp16_2.get_vector())
+
+
+def test_no_compression():
+    model = make_model(more_out_of_range, more_out_of_range)
+    const_fp32_1, const_fp32_2 = get_constants(model)
+
+    assert const_fp32_1 is not None, "There is no Constant op on FP32 branch"
+    assert const_fp32_2 is not None, "There is no Constant op on FP32 branch"
+
+    assert const_fp32_1.get_output_element_type(0) == ov.Type.f32, "Const element type is not f32"
+
+    assert const_fp32_2.get_output_element_type(0) == ov.Type.f32, "Const element type is not f32"
+
+    assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32_1.get_vector())
+    assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32_2.get_vector())