[MO] compress_to_fp16=False by default (#16854)
* compress_to_fp16=False by default * Apply suggestions from code review Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com> * note abound RAM consumption for FP16 compressed models * detailed notion about RAM usage * update 'get_compression_message()' * corrected get_compression_message: remove infor about RAM * fix pytorch convert layer tests --------- Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
This commit is contained in:
parent
de8f34c8f0
commit
68f46ff9a1
@ -2,18 +2,16 @@
|
||||
|
||||
@sphinxdirective
|
||||
|
||||
Model Optimizer by default converts all floating-point weights to ``FP16`` data type.
|
||||
The resulting IR is called compressed ``FP16`` model. The resulting model will occupy
|
||||
about twice as less space in the file system, but it may have some accuracy drop.
|
||||
For most models, the accuracy drop is negligible. But in case if accuracy drop is
|
||||
significant user can disable compression explicitly.
|
||||
Model Optimizer can convert all floating-point weights to the ``FP16`` data type.
|
||||
It results in creating a "compressed ``FP16`` model", which occupies about half of
|
||||
the original space in the file system. The compression may introduce a drop in accuracy.
|
||||
but it is negligible for most models.
|
||||
|
||||
By default, models are compressed to ``FP16``, but you can disable compression by
|
||||
specifying ``--compress_to_fp16=False``:
|
||||
To compress the model, use the `--compress_to_fp16` or `--compress_to_fp16=True` option:
|
||||
|
||||
.. code-block:: sh
|
||||
|
||||
mo --input_model INPUT_MODEL --compress_to_fp16=False
|
||||
mo --input_model INPUT_MODEL --compress_to_fp16
|
||||
|
||||
|
||||
For details on how plugins handle compressed ``FP16`` models, see
|
||||
@ -26,4 +24,11 @@ For details on how plugins handle compressed ``FP16`` models, see
|
||||
information about that.
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading
|
||||
phase of the inference. In case if you are facing such problems, please try to convert them without compression:
|
||||
`mo --input_model INPUT_MODEL --compress_to_fp16=False`
|
||||
|
||||
|
||||
@endsphinxdirective
|
||||
|
@ -458,23 +458,18 @@ def create_pytorch_nn_module_mean_list(tmp_dir):
|
||||
"input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
|
||||
|
||||
|
||||
def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
|
||||
# by default compression should be enabled (same as setting 'compress_to_fp16': True)
|
||||
# therefore decompression Converts will be present
|
||||
def create_pytorch_nn_module_mean_list_default_no_compression(tmp_dir):
|
||||
# by default compression is disabled (same as setting 'compress_to_fp16': False)
|
||||
pt_model = make_pt_model_two_inputs()
|
||||
shape = [1, 10, 10, 3]
|
||||
|
||||
shape = PartialShape(shape)
|
||||
param1 = ov.opset8.parameter(shape)
|
||||
param2 = ov.opset8.parameter(shape)
|
||||
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
|
||||
const1_decompressed = ov.opset8.convert(
|
||||
const1, destination_type=np.float32)
|
||||
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
|
||||
const2_decompressed = ov.opset8.convert(
|
||||
const2, destination_type=np.float32)
|
||||
sub1 = ov.opset8.subtract(param1, const1_decompressed)
|
||||
sub2 = ov.opset8.subtract(param2, const2_decompressed)
|
||||
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
|
||||
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
|
||||
sub1 = ov.opset8.subtract(param1, const1)
|
||||
sub2 = ov.opset8.subtract(param2, const2)
|
||||
add = ov.opset8.add(sub1, sub2)
|
||||
relu = ov.opset8.relu(add)
|
||||
sigm = ov.opset8.sigmoid(relu)
|
||||
@ -529,23 +524,18 @@ def create_pytorch_nn_module_scale_list(tmp_dir):
|
||||
return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
|
||||
|
||||
|
||||
def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
|
||||
# by default compression should be enabled (same as setting 'compress_to_fp16': True)
|
||||
# therefore decompression Converts will be present
|
||||
def create_pytorch_nn_module_scale_list_default_no_compression(tmp_dir):
|
||||
# by default compression is disabled (same as setting 'compress_to_fp16': False)
|
||||
pt_model = make_pt_model_two_inputs()
|
||||
shape = [1, 10, 10, 3]
|
||||
|
||||
shape = PartialShape(shape)
|
||||
param1 = ov.opset8.parameter(shape)
|
||||
param2 = ov.opset8.parameter(shape)
|
||||
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
|
||||
const1_decompressed = ov.opset8.convert(
|
||||
const1, destination_type=np.float32)
|
||||
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
|
||||
const2_decompressed = ov.opset8.convert(
|
||||
const2, destination_type=np.float32)
|
||||
sub1 = ov.opset8.multiply(param1, const1_decompressed)
|
||||
sub2 = ov.opset8.multiply(param2, const2_decompressed)
|
||||
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
|
||||
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
|
||||
sub1 = ov.opset8.multiply(param1, const1)
|
||||
sub2 = ov.opset8.multiply(param2, const2)
|
||||
add = ov.opset8.add(sub1, sub2)
|
||||
relu = ov.opset8.relu(add)
|
||||
sigm = ov.opset8.sigmoid(relu)
|
||||
@ -754,10 +744,10 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
|
||||
create_pytorch_nn_module_layout_list,
|
||||
create_pytorch_nn_module_layout_list_case2,
|
||||
create_pytorch_nn_module_mean_list,
|
||||
create_pytorch_nn_module_mean_list_default_compression,
|
||||
create_pytorch_nn_module_mean_list_default_no_compression,
|
||||
create_pytorch_nn_module_mean_list_compressin_enabled,
|
||||
create_pytorch_nn_module_scale_list,
|
||||
create_pytorch_nn_module_scale_list_default_compression,
|
||||
create_pytorch_nn_module_scale_list_default_no_compression,
|
||||
create_pytorch_nn_module_scale_list_compression_enabled,
|
||||
create_pytorch_nn_module_shapes_list_static,
|
||||
create_pytorch_nn_module_shapes_list_dynamic,
|
||||
|
@ -34,7 +34,7 @@ def convert_model(
|
||||
source_layout: [str, Layout, dict] = (),
|
||||
target_layout: [str, Layout, dict] = (),
|
||||
layout: [str, Layout, LayoutMap, list, dict] = (),
|
||||
compress_to_fp16: bool = True,
|
||||
compress_to_fp16: bool = False,
|
||||
extensions: [str, pathlib.Path, list, Any] = None,
|
||||
transform: [str, list, tuple] = "",
|
||||
transformations_config: [str, pathlib.Path] = None,
|
||||
|
@ -37,7 +37,7 @@ def get_tf_fe_message():
|
||||
def get_compression_message():
|
||||
link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html"
|
||||
message = '[ INFO ] Generated IR will be compressed to FP16. ' \
|
||||
'If you get lower accuracy, please consider disabling compression explicitly ' \
|
||||
'by adding argument --compress_to_fp16=False.\n' \
|
||||
'If you get lower accuracy, please consider disabling compression ' \
|
||||
'by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False.\n' \
|
||||
'Find more information about compression to FP16 at {}'.format(link)
|
||||
return message
|
||||
|
Loading…
Reference in New Issue
Block a user