[MO] compress_to_fp16=False by default (#16854)

* compress_to_fp16=False by default

* Apply suggestions from code review

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>

* note abound RAM consumption for FP16 compressed models

* detailed notion about RAM usage

* update 'get_compression_message()'

* corrected get_compression_message: remove infor about RAM

* fix pytorch convert layer tests

---------

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
This commit is contained in:
Pavel Esir 2023-04-14 03:16:41 +02:00 committed by GitHub
parent de8f34c8f0
commit 68f46ff9a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 35 deletions

View File

@ -2,18 +2,16 @@
@sphinxdirective @sphinxdirective
Model Optimizer by default converts all floating-point weights to ``FP16`` data type. Model Optimizer can convert all floating-point weights to the ``FP16`` data type.
The resulting IR is called compressed ``FP16`` model. The resulting model will occupy It results in creating a "compressed ``FP16`` model", which occupies about half of
about twice as less space in the file system, but it may have some accuracy drop. the original space in the file system. The compression may introduce a drop in accuracy.
For most models, the accuracy drop is negligible. But in case if accuracy drop is but it is negligible for most models.
significant user can disable compression explicitly.
By default, models are compressed to ``FP16``, but you can disable compression by To compress the model, use the `--compress_to_fp16` or `--compress_to_fp16=True` option:
specifying ``--compress_to_fp16=False``:
.. code-block:: sh .. code-block:: sh
mo --input_model INPUT_MODEL --compress_to_fp16=False mo --input_model INPUT_MODEL --compress_to_fp16
For details on how plugins handle compressed ``FP16`` models, see For details on how plugins handle compressed ``FP16`` models, see
@ -26,4 +24,11 @@ For details on how plugins handle compressed ``FP16`` models, see
information about that. information about that.
.. note::
Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading
phase of the inference. In case if you are facing such problems, please try to convert them without compression:
`mo --input_model INPUT_MODEL --compress_to_fp16=False`
@endsphinxdirective @endsphinxdirective

View File

@ -458,23 +458,18 @@ def create_pytorch_nn_module_mean_list(tmp_dir):
"input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]} "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
def create_pytorch_nn_module_mean_list_default_compression(tmp_dir): def create_pytorch_nn_module_mean_list_default_no_compression(tmp_dir):
# by default compression should be enabled (same as setting 'compress_to_fp16': True) # by default compression is disabled (same as setting 'compress_to_fp16': False)
# therefore decompression Converts will be present
pt_model = make_pt_model_two_inputs() pt_model = make_pt_model_two_inputs()
shape = [1, 10, 10, 3] shape = [1, 10, 10, 3]
shape = PartialShape(shape) shape = PartialShape(shape)
param1 = ov.opset8.parameter(shape) param1 = ov.opset8.parameter(shape)
param2 = ov.opset8.parameter(shape) param2 = ov.opset8.parameter(shape)
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16) const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
const1_decompressed = ov.opset8.convert( const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
const1, destination_type=np.float32) sub1 = ov.opset8.subtract(param1, const1)
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16) sub2 = ov.opset8.subtract(param2, const2)
const2_decompressed = ov.opset8.convert(
const2, destination_type=np.float32)
sub1 = ov.opset8.subtract(param1, const1_decompressed)
sub2 = ov.opset8.subtract(param2, const2_decompressed)
add = ov.opset8.add(sub1, sub2) add = ov.opset8.add(sub1, sub2)
relu = ov.opset8.relu(add) relu = ov.opset8.relu(add)
sigm = ov.opset8.sigmoid(relu) sigm = ov.opset8.sigmoid(relu)
@ -529,23 +524,18 @@ def create_pytorch_nn_module_scale_list(tmp_dir):
return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]} return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
def create_pytorch_nn_module_scale_list_default_compression(tmp_dir): def create_pytorch_nn_module_scale_list_default_no_compression(tmp_dir):
# by default compression should be enabled (same as setting 'compress_to_fp16': True) # by default compression is disabled (same as setting 'compress_to_fp16': False)
# therefore decompression Converts will be present
pt_model = make_pt_model_two_inputs() pt_model = make_pt_model_two_inputs()
shape = [1, 10, 10, 3] shape = [1, 10, 10, 3]
shape = PartialShape(shape) shape = PartialShape(shape)
param1 = ov.opset8.parameter(shape) param1 = ov.opset8.parameter(shape)
param2 = ov.opset8.parameter(shape) param2 = ov.opset8.parameter(shape)
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16) const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
const1_decompressed = ov.opset8.convert( const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
const1, destination_type=np.float32) sub1 = ov.opset8.multiply(param1, const1)
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16) sub2 = ov.opset8.multiply(param2, const2)
const2_decompressed = ov.opset8.convert(
const2, destination_type=np.float32)
sub1 = ov.opset8.multiply(param1, const1_decompressed)
sub2 = ov.opset8.multiply(param2, const2_decompressed)
add = ov.opset8.add(sub1, sub2) add = ov.opset8.add(sub1, sub2)
relu = ov.opset8.relu(add) relu = ov.opset8.relu(add)
sigm = ov.opset8.sigmoid(relu) sigm = ov.opset8.sigmoid(relu)
@ -754,10 +744,10 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
create_pytorch_nn_module_layout_list, create_pytorch_nn_module_layout_list,
create_pytorch_nn_module_layout_list_case2, create_pytorch_nn_module_layout_list_case2,
create_pytorch_nn_module_mean_list, create_pytorch_nn_module_mean_list,
create_pytorch_nn_module_mean_list_default_compression, create_pytorch_nn_module_mean_list_default_no_compression,
create_pytorch_nn_module_mean_list_compressin_enabled, create_pytorch_nn_module_mean_list_compressin_enabled,
create_pytorch_nn_module_scale_list, create_pytorch_nn_module_scale_list,
create_pytorch_nn_module_scale_list_default_compression, create_pytorch_nn_module_scale_list_default_no_compression,
create_pytorch_nn_module_scale_list_compression_enabled, create_pytorch_nn_module_scale_list_compression_enabled,
create_pytorch_nn_module_shapes_list_static, create_pytorch_nn_module_shapes_list_static,
create_pytorch_nn_module_shapes_list_dynamic, create_pytorch_nn_module_shapes_list_dynamic,

View File

@ -34,7 +34,7 @@ def convert_model(
source_layout: [str, Layout, dict] = (), source_layout: [str, Layout, dict] = (),
target_layout: [str, Layout, dict] = (), target_layout: [str, Layout, dict] = (),
layout: [str, Layout, LayoutMap, list, dict] = (), layout: [str, Layout, LayoutMap, list, dict] = (),
compress_to_fp16: bool = True, compress_to_fp16: bool = False,
extensions: [str, pathlib.Path, list, Any] = None, extensions: [str, pathlib.Path, list, Any] = None,
transform: [str, list, tuple] = "", transform: [str, list, tuple] = "",
transformations_config: [str, pathlib.Path] = None, transformations_config: [str, pathlib.Path] = None,

View File

@ -37,7 +37,7 @@ def get_tf_fe_message():
def get_compression_message(): def get_compression_message():
link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html" link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html"
message = '[ INFO ] Generated IR will be compressed to FP16. ' \ message = '[ INFO ] Generated IR will be compressed to FP16. ' \
'If you get lower accuracy, please consider disabling compression explicitly ' \ 'If you get lower accuracy, please consider disabling compression ' \
'by adding argument --compress_to_fp16=False.\n' \ 'by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False.\n' \
'Find more information about compression to FP16 at {}'.format(link) 'Find more information about compression to FP16 at {}'.format(link)
return message return message