Efficient FP32 -> FP16 conversion for convert_precision, save_model, ovc and mo (#18988)
* WIP Postpone fp16 in CompressFloatConstantsImpl * Apply suggestions from code review Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> * WIP: Compression to FP16 in Serialize * Prepared for efficient fp32 to fp16 conversion * Update src/core/reference/src/runtime/reference/convert.cpp * Called real slow reference implementations in the place where the optimized versions are supposed to be implemented * Code style * Fixed 0 values in the fast f64 to f16 compression * Optimized convert_from_f32_to_f16_with_clamp * Added optimized f32->f16 instance of change_constant_precision * compression transformation Python test * use tmp dir, minor corrections * Update src/bindings/python/tests/test_transformations/test_compression.py * Update src/bindings/python/tests/test_transformations/test_compression.py * style fix * define rt_info for postponed_fp16_compression * remove redundant class * fix temp dir for Win in test_compression.py * update definitions in convert.hpp * Update implementation in convert.cpp * Update serialize.cpp * Update compress_float_constants.cpp * added macros for ARM/non_x86 in convert.cpp * fix macros in convert.cpp * change fixme placement in serialize.cpp * style_fix * Update src/core/reference/src/runtime/reference/convert.cpp * style_fix * Optimized count_out_of_f16_range * Code style * Revert unused * Update src/core/src/pass/serialize.cpp Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> * Update src/core/reference/src/runtime/reference/convert.cpp Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> * use optimized convert_from_f32_to_f16_with_clamp for non postponed * minor corrections * Update src/common/transformations/src/transformations/common_optimizations/compress_float_constants.cpp * Update compress_float_constants.cpp * Switched mo and ovc to save_model instead of serialize to leverage performance improvements in fp32->fp16 * Applied minor code imporvements to address review feedback * Minor changes in code * Update tools/ovc/openvino/tools/ovc/main.py * Apply suggestions from code review * Fixed failed test in case when both usual xml compression and fp16 compression are applied simultaneously (disabled for now) * Added description for CompressFloatConstantImpl postponed parameter * Description of postponed parameter for CompressFloatConstants * Reverted switching to save_model in mo as the compression can be applied not only via CLI and old code should be kept for Python path (not applicable for ovc) * Removed remining committed test artefacts and reverted remaining changes in mo --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> Co-authored-by: dmitrygo <dmitry.gorokhov@intel.com> Co-authored-by: Vladimir Paramuzov <vladimir.paramuzov@intel.com> Co-authored-by: Pavel Esir <pavel.esir@intel.com> Co-authored-by: Pavel Esir <pavel.esir@gmail.com>
This commit is contained in:
@@ -0,0 +1,116 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import tempfile
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
from openvino.runtime.op import Parameter, Constant
|
||||
from openvino.runtime.opset12 import add, multiply
|
||||
|
||||
import openvino as ov
|
||||
|
||||
|
||||
def make_constant(values, transposed):
|
||||
return Constant(ov.Type.f32, ov.Shape([1, len(values)] if transposed else [len(values), 1]), values)
|
||||
|
||||
|
||||
# keep fp16 denormals, flush fp32 denormals to zero
|
||||
in_range = [-65504.0, -2.0, 1.00097656, -1.0, -0.99951172, -0.00006103515625, -0.000000059604645, 0.0,
|
||||
0.000000059604645, 0.99951172, 0.00006103515625, 1.0, 1.00097656, 2.0, 65504]
|
||||
out_of_range = [float("-inf"), -65505.0, -1e-10, -1e-39, 1e-39, 1e-10, 65505.0, float("inf")]
|
||||
converted_out_of_range = [-65504.0, -65504.0, 0, 0, 0, 0, 65504.0, 65504.0]
|
||||
|
||||
# test inputs
|
||||
more_in_range = out_of_range + 10 * in_range
|
||||
more_out_of_range = in_range + 10 * out_of_range
|
||||
|
||||
# reference after conversion more_in_range to fp16
|
||||
converted_more_in_range = converted_out_of_range + 10 * in_range
|
||||
|
||||
|
||||
def make_model(add_consts, mul_consts):
|
||||
parameter1 = Parameter(ov.Type.f32, ov.PartialShape([-1]))
|
||||
add1 = add(parameter1, make_constant(add_consts, False))
|
||||
mul1 = multiply(add1, make_constant(mul_consts, True))
|
||||
return ov.Model([mul1], [parameter1])
|
||||
|
||||
|
||||
def get_constants(model) -> List[Constant]:
|
||||
from pathlib import Path
|
||||
model_name = Path(tempfile.gettempdir()) / "f32_partially_compressed.xml"
|
||||
ov.save_model(model, model_name)
|
||||
core = ov.Core()
|
||||
restored_model = core.read_model(model_name)
|
||||
|
||||
op_ind_map = {"Add": 0, "Multiply": 1}
|
||||
constants_list = [[]] * len(op_ind_map)
|
||||
|
||||
for op in restored_model.get_ordered_ops():
|
||||
op_type = op.get_type_info().name
|
||||
if op_type not in op_ind_map.keys():
|
||||
continue
|
||||
|
||||
in_node = op.input_value(1).get_node()
|
||||
if in_node.get_type_info().name == "Convert":
|
||||
const_node = in_node.input_value(0).get_node()
|
||||
if const_node.get_type_info().name != "Constant":
|
||||
const_node = None
|
||||
elif in_node.get_type_info().name == "Constant":
|
||||
const_node = in_node
|
||||
|
||||
constants_list[op_ind_map[op_type]] = const_node
|
||||
|
||||
for node in constants_list:
|
||||
assert not isinstance(node, list)
|
||||
|
||||
# sanity check that model is compilable
|
||||
ov.compile_model(restored_model)
|
||||
return constants_list
|
||||
|
||||
|
||||
def test_compression_1():
|
||||
model = make_model(more_in_range, more_out_of_range)
|
||||
const_fp16, const_fp32 = get_constants(model)
|
||||
assert const_fp32 is not None, "There is no Constant op on FP32 branch"
|
||||
assert const_fp16 is not None, "There is no compressed Constant + Convert op on FP16 branch"
|
||||
|
||||
assert const_fp32.get_output_element_type(0) == ov.Type.f32
|
||||
assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32.get_vector())
|
||||
|
||||
assert const_fp16.get_output_element_type(0) == ov.Type.f16
|
||||
|
||||
msg = f"Difference: {np.array(converted_more_in_range, dtype=np.float32) - const_fp16.get_vector()}"
|
||||
assert np.all(np.array(converted_more_in_range, dtype=np.float32) == const_fp16.get_vector()), msg
|
||||
|
||||
|
||||
def test_compression_2():
|
||||
model = make_model(more_in_range, more_in_range)
|
||||
const_fp16_1, const_fp16_2 = get_constants(model)
|
||||
|
||||
assert const_fp16_1 is not None, "There is no Constant op on FP16 branch"
|
||||
assert const_fp16_2 is not None, "There is no Constant op on FP16 branch"
|
||||
|
||||
assert const_fp16_1.get_output_element_type(0) == ov.Type.f16, "Const element type is not f16"
|
||||
assert const_fp16_2.get_output_element_type(0) == ov.Type.f16, "Const element type is not f16"
|
||||
f16_min, f16_max = np.finfo(np.float16).min, np.finfo(np.float16).max
|
||||
in_range_clipped = np.clip(more_in_range, f16_min, f16_max).astype(np.float16)
|
||||
|
||||
assert np.all(in_range_clipped == const_fp16_1.get_vector())
|
||||
assert np.all(in_range_clipped == const_fp16_2.get_vector())
|
||||
|
||||
|
||||
def test_no_compression():
|
||||
model = make_model(more_out_of_range, more_out_of_range)
|
||||
const_fp32_1, const_fp32_2 = get_constants(model)
|
||||
|
||||
assert const_fp32_1 is not None, "There is no Constant op on FP32 branch"
|
||||
assert const_fp32_2 is not None, "There is no Constant op on FP32 branch"
|
||||
|
||||
assert const_fp32_1.get_output_element_type(0) == ov.Type.f32, "Const element type is not f32"
|
||||
|
||||
assert const_fp32_2.get_output_element_type(0) == ov.Type.f32, "Const element type is not f32"
|
||||
|
||||
assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32_1.get_vector())
|
||||
assert np.all(np.array(more_out_of_range, dtype=np.float32) == const_fp32_2.get_vector())
|
||||
Reference in New Issue
Block a user