[MO] compress_to_fp16=False by default (#16854)

* compress_to_fp16=False by default

* Apply suggestions from code review

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>

* note abound RAM consumption for FP16 compressed models

* detailed notion about RAM usage

* update 'get_compression_message()'

* corrected get_compression_message: remove infor about RAM

* fix pytorch convert layer tests

---------

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
This commit is contained in:
Pavel Esir 2023-04-14 03:16:41 +02:00 committed by GitHub
parent de8f34c8f0
commit 68f46ff9a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 35 deletions

View File

@ -2,18 +2,16 @@
@sphinxdirective
Model Optimizer by default converts all floating-point weights to ``FP16`` data type.
The resulting IR is called compressed ``FP16`` model. The resulting model will occupy
about twice as less space in the file system, but it may have some accuracy drop.
For most models, the accuracy drop is negligible. But in case if accuracy drop is
significant user can disable compression explicitly.
Model Optimizer can convert all floating-point weights to the ``FP16`` data type.
It results in creating a "compressed ``FP16`` model", which occupies about half of
the original space in the file system. The compression may introduce a drop in accuracy.
but it is negligible for most models.
By default, models are compressed to ``FP16``, but you can disable compression by
specifying ``--compress_to_fp16=False``:
To compress the model, use the `--compress_to_fp16` or `--compress_to_fp16=True` option:
.. code-block:: sh
mo --input_model INPUT_MODEL --compress_to_fp16=False
mo --input_model INPUT_MODEL --compress_to_fp16
For details on how plugins handle compressed ``FP16`` models, see
@ -26,4 +24,11 @@ For details on how plugins handle compressed ``FP16`` models, see
information about that.
.. note::
Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading
phase of the inference. In case if you are facing such problems, please try to convert them without compression:
`mo --input_model INPUT_MODEL --compress_to_fp16=False`
@endsphinxdirective

View File

@ -458,23 +458,18 @@ def create_pytorch_nn_module_mean_list(tmp_dir):
"input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
# by default compression should be enabled (same as setting 'compress_to_fp16': True)
# therefore decompression Converts will be present
def create_pytorch_nn_module_mean_list_default_no_compression(tmp_dir):
# by default compression is disabled (same as setting 'compress_to_fp16': False)
pt_model = make_pt_model_two_inputs()
shape = [1, 10, 10, 3]
shape = PartialShape(shape)
param1 = ov.opset8.parameter(shape)
param2 = ov.opset8.parameter(shape)
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
const1_decompressed = ov.opset8.convert(
const1, destination_type=np.float32)
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
const2_decompressed = ov.opset8.convert(
const2, destination_type=np.float32)
sub1 = ov.opset8.subtract(param1, const1_decompressed)
sub2 = ov.opset8.subtract(param2, const2_decompressed)
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
sub1 = ov.opset8.subtract(param1, const1)
sub2 = ov.opset8.subtract(param2, const2)
add = ov.opset8.add(sub1, sub2)
relu = ov.opset8.relu(add)
sigm = ov.opset8.sigmoid(relu)
@ -529,23 +524,18 @@ def create_pytorch_nn_module_scale_list(tmp_dir):
return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
# by default compression should be enabled (same as setting 'compress_to_fp16': True)
# therefore decompression Converts will be present
def create_pytorch_nn_module_scale_list_default_no_compression(tmp_dir):
# by default compression is disabled (same as setting 'compress_to_fp16': False)
pt_model = make_pt_model_two_inputs()
shape = [1, 10, 10, 3]
shape = PartialShape(shape)
param1 = ov.opset8.parameter(shape)
param2 = ov.opset8.parameter(shape)
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
const1_decompressed = ov.opset8.convert(
const1, destination_type=np.float32)
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
const2_decompressed = ov.opset8.convert(
const2, destination_type=np.float32)
sub1 = ov.opset8.multiply(param1, const1_decompressed)
sub2 = ov.opset8.multiply(param2, const2_decompressed)
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
sub1 = ov.opset8.multiply(param1, const1)
sub2 = ov.opset8.multiply(param2, const2)
add = ov.opset8.add(sub1, sub2)
relu = ov.opset8.relu(add)
sigm = ov.opset8.sigmoid(relu)
@ -754,10 +744,10 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
create_pytorch_nn_module_layout_list,
create_pytorch_nn_module_layout_list_case2,
create_pytorch_nn_module_mean_list,
create_pytorch_nn_module_mean_list_default_compression,
create_pytorch_nn_module_mean_list_default_no_compression,
create_pytorch_nn_module_mean_list_compressin_enabled,
create_pytorch_nn_module_scale_list,
create_pytorch_nn_module_scale_list_default_compression,
create_pytorch_nn_module_scale_list_default_no_compression,
create_pytorch_nn_module_scale_list_compression_enabled,
create_pytorch_nn_module_shapes_list_static,
create_pytorch_nn_module_shapes_list_dynamic,

View File

@ -34,7 +34,7 @@ def convert_model(
source_layout: [str, Layout, dict] = (),
target_layout: [str, Layout, dict] = (),
layout: [str, Layout, LayoutMap, list, dict] = (),
compress_to_fp16: bool = True,
compress_to_fp16: bool = False,
extensions: [str, pathlib.Path, list, Any] = None,
transform: [str, list, tuple] = "",
transformations_config: [str, pathlib.Path] = None,

View File

@ -37,7 +37,7 @@ def get_tf_fe_message():
def get_compression_message():
link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html"
message = '[ INFO ] Generated IR will be compressed to FP16. ' \
'If you get lower accuracy, please consider disabling compression explicitly ' \
'by adding argument --compress_to_fp16=False.\n' \
'If you get lower accuracy, please consider disabling compression ' \
'by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False.\n' \
'Find more information about compression to FP16 at {}'.format(link)
return message