From 68f46ff9a116ded2e94da9f091f91b04121ea6ac Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 14 Apr 2023 03:16:41 +0200
Subject: [PATCH] [MO] compress_to_fp16=False by default (#16854)

* compress_to_fp16=False by default

* Apply suggestions from code review

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>

* note abound RAM consumption for FP16 compressed models

* detailed notion about RAM usage

* update 'get_compression_message()'

* corrected get_compression_message: remove infor about RAM

* fix pytorch convert layer tests

---------

Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
---
 docs/MO_DG/prepare_model/FP16_Compression.md  | 21 ++++++----
 .../test_mo_convert_pytorch.py                | 38 +++++++------------
 tools/mo/openvino/tools/mo/convert.py         |  2 +-
 .../tools/mo/utils/get_ov_update_message.py   |  4 +-
 4 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/docs/MO_DG/prepare_model/FP16_Compression.md b/docs/MO_DG/prepare_model/FP16_Compression.md
index 96086e596b7..78279a298cd 100644
--- a/docs/MO_DG/prepare_model/FP16_Compression.md
+++ b/docs/MO_DG/prepare_model/FP16_Compression.md
@@ -2,18 +2,16 @@
 
 @sphinxdirective
 
-Model Optimizer by default converts all floating-point weights to ``FP16`` data type. 
-The resulting IR is called compressed ``FP16`` model. The resulting model will occupy 
-about twice as less space in the file system, but it may have some accuracy drop. 
-For most models, the accuracy drop is negligible. But in case if accuracy drop is 
-significant user can disable compression explicitly.
+Model Optimizer can convert all floating-point weights to the ``FP16`` data type. 
+It results in creating a "compressed ``FP16`` model", which occupies about half of 
+the original space in the file system. The compression may introduce a drop in accuracy.
+but it is negligible for most models.
 
-By default, models are compressed to ``FP16``, but you can disable compression by 
-specifying ``--compress_to_fp16=False``:
+To compress the model, use the `--compress_to_fp16` or `--compress_to_fp16=True` option:
 
 .. code-block:: sh
 
-   mo --input_model INPUT_MODEL --compress_to_fp16=False
+   mo --input_model INPUT_MODEL --compress_to_fp16
 
 
 For details on how plugins handle compressed ``FP16`` models, see 
@@ -26,4 +24,11 @@ For details on how plugins handle compressed ``FP16`` models, see
    information about that.
 
 
+.. note::
+
+   Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading
+   phase of the inference. In case if you are facing such problems, please try to convert them without compression: 
+   `mo --input_model INPUT_MODEL --compress_to_fp16=False`
+
+
 @endsphinxdirective
diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
index a4649f42390..3c66ce8ec30 100644
--- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py
@@ -458,23 +458,18 @@ def create_pytorch_nn_module_mean_list(tmp_dir):
         "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
-def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
-    # by default compression should be enabled (same as setting 'compress_to_fp16': True)
-    # therefore decompression Converts will be present
+def create_pytorch_nn_module_mean_list_default_no_compression(tmp_dir):
+    # by default compression is disabled (same as setting 'compress_to_fp16': False)
     pt_model = make_pt_model_two_inputs()
     shape = [1, 10, 10, 3]
 
     shape = PartialShape(shape)
     param1 = ov.opset8.parameter(shape)
     param2 = ov.opset8.parameter(shape)
-    const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
-    const1_decompressed = ov.opset8.convert(
-        const1, destination_type=np.float32)
-    const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
-    const2_decompressed = ov.opset8.convert(
-        const2, destination_type=np.float32)
-    sub1 = ov.opset8.subtract(param1, const1_decompressed)
-    sub2 = ov.opset8.subtract(param2, const2_decompressed)
+    const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
+    const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
+    sub1 = ov.opset8.subtract(param1, const1)
+    sub2 = ov.opset8.subtract(param2, const2)
     add = ov.opset8.add(sub1, sub2)
     relu = ov.opset8.relu(add)
     sigm = ov.opset8.sigmoid(relu)
@@ -529,23 +524,18 @@ def create_pytorch_nn_module_scale_list(tmp_dir):
     return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
 
 
-def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
-    # by default compression should be enabled (same as setting 'compress_to_fp16': True)
-    # therefore decompression Converts will be present
+def create_pytorch_nn_module_scale_list_default_no_compression(tmp_dir):
+    # by default compression is disabled (same as setting 'compress_to_fp16': False)
     pt_model = make_pt_model_two_inputs()
     shape = [1, 10, 10, 3]
 
     shape = PartialShape(shape)
     param1 = ov.opset8.parameter(shape)
     param2 = ov.opset8.parameter(shape)
-    const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
-    const1_decompressed = ov.opset8.convert(
-        const1, destination_type=np.float32)
-    const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
-    const2_decompressed = ov.opset8.convert(
-        const2, destination_type=np.float32)
-    sub1 = ov.opset8.multiply(param1, const1_decompressed)
-    sub2 = ov.opset8.multiply(param2, const2_decompressed)
+    const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
+    const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
+    sub1 = ov.opset8.multiply(param1, const1)
+    sub2 = ov.opset8.multiply(param2, const2)
     add = ov.opset8.add(sub1, sub2)
     relu = ov.opset8.relu(add)
     sigm = ov.opset8.sigmoid(relu)
@@ -754,10 +744,10 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
         create_pytorch_nn_module_layout_list,
         create_pytorch_nn_module_layout_list_case2,
         create_pytorch_nn_module_mean_list,
-        create_pytorch_nn_module_mean_list_default_compression,
+        create_pytorch_nn_module_mean_list_default_no_compression,
         create_pytorch_nn_module_mean_list_compressin_enabled,
         create_pytorch_nn_module_scale_list,
-        create_pytorch_nn_module_scale_list_default_compression,
+        create_pytorch_nn_module_scale_list_default_no_compression,
         create_pytorch_nn_module_scale_list_compression_enabled,
         create_pytorch_nn_module_shapes_list_static,
         create_pytorch_nn_module_shapes_list_dynamic,
diff --git a/tools/mo/openvino/tools/mo/convert.py b/tools/mo/openvino/tools/mo/convert.py
index 228df63c2e7..339b3ad19f2 100644
--- a/tools/mo/openvino/tools/mo/convert.py
+++ b/tools/mo/openvino/tools/mo/convert.py
@@ -34,7 +34,7 @@ def convert_model(
         source_layout: [str, Layout, dict] = (),
         target_layout: [str, Layout, dict] = (),
         layout: [str, Layout, LayoutMap, list, dict] = (),
-        compress_to_fp16: bool = True,
+        compress_to_fp16: bool = False,
         extensions: [str, pathlib.Path, list, Any] = None,
         transform: [str, list, tuple] = "",
         transformations_config: [str, pathlib.Path] = None,
diff --git a/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py b/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py
index 2ac095b27ca..545bc204bab 100644
--- a/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py
+++ b/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py
@@ -37,7 +37,7 @@ def get_tf_fe_message():
 def get_compression_message():
     link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html"
     message = '[ INFO ] Generated IR will be compressed to FP16. ' \
-              'If you get lower accuracy, please consider disabling compression explicitly ' \
-              'by adding argument --compress_to_fp16=False.\n' \
+              'If you get lower accuracy, please consider disabling compression ' \
+              'by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False.\n' \
               'Find more information about compression to FP16 at {}'.format(link)
     return message