From 68f46ff9a116ded2e94da9f091f91b04121ea6ac Mon Sep 17 00:00:00 2001 From: Pavel Esir Date: Fri, 14 Apr 2023 03:16:41 +0200 Subject: [PATCH] [MO] compress_to_fp16=False by default (#16854) * compress_to_fp16=False by default * Apply suggestions from code review Co-authored-by: Karol Blaszczak * note abound RAM consumption for FP16 compressed models * detailed notion about RAM usage * update 'get_compression_message()' * corrected get_compression_message: remove infor about RAM * fix pytorch convert layer tests --------- Co-authored-by: Karol Blaszczak --- docs/MO_DG/prepare_model/FP16_Compression.md | 21 ++++++---- .../test_mo_convert_pytorch.py | 38 +++++++------------ tools/mo/openvino/tools/mo/convert.py | 2 +- .../tools/mo/utils/get_ov_update_message.py | 4 +- 4 files changed, 30 insertions(+), 35 deletions(-) diff --git a/docs/MO_DG/prepare_model/FP16_Compression.md b/docs/MO_DG/prepare_model/FP16_Compression.md index 96086e596b7..78279a298cd 100644 --- a/docs/MO_DG/prepare_model/FP16_Compression.md +++ b/docs/MO_DG/prepare_model/FP16_Compression.md @@ -2,18 +2,16 @@ @sphinxdirective -Model Optimizer by default converts all floating-point weights to ``FP16`` data type. -The resulting IR is called compressed ``FP16`` model. The resulting model will occupy -about twice as less space in the file system, but it may have some accuracy drop. -For most models, the accuracy drop is negligible. But in case if accuracy drop is -significant user can disable compression explicitly. +Model Optimizer can convert all floating-point weights to the ``FP16`` data type. +It results in creating a "compressed ``FP16`` model", which occupies about half of +the original space in the file system. The compression may introduce a drop in accuracy. +but it is negligible for most models. -By default, models are compressed to ``FP16``, but you can disable compression by -specifying ``--compress_to_fp16=False``: +To compress the model, use the `--compress_to_fp16` or `--compress_to_fp16=True` option: .. code-block:: sh - mo --input_model INPUT_MODEL --compress_to_fp16=False + mo --input_model INPUT_MODEL --compress_to_fp16 For details on how plugins handle compressed ``FP16`` models, see @@ -26,4 +24,11 @@ For details on how plugins handle compressed ``FP16`` models, see information about that. +.. note:: + + Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading + phase of the inference. In case if you are facing such problems, please try to convert them without compression: + `mo --input_model INPUT_MODEL --compress_to_fp16=False` + + @endsphinxdirective diff --git a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py index a4649f42390..3c66ce8ec30 100644 --- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py +++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_pytorch.py @@ -458,23 +458,18 @@ def create_pytorch_nn_module_mean_list(tmp_dir): "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]} -def create_pytorch_nn_module_mean_list_default_compression(tmp_dir): - # by default compression should be enabled (same as setting 'compress_to_fp16': True) - # therefore decompression Converts will be present +def create_pytorch_nn_module_mean_list_default_no_compression(tmp_dir): + # by default compression is disabled (same as setting 'compress_to_fp16': False) pt_model = make_pt_model_two_inputs() shape = [1, 10, 10, 3] shape = PartialShape(shape) param1 = ov.opset8.parameter(shape) param2 = ov.opset8.parameter(shape) - const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16) - const1_decompressed = ov.opset8.convert( - const1, destination_type=np.float32) - const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16) - const2_decompressed = ov.opset8.convert( - const2, destination_type=np.float32) - sub1 = ov.opset8.subtract(param1, const1_decompressed) - sub2 = ov.opset8.subtract(param2, const2_decompressed) + const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32) + const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32) + sub1 = ov.opset8.subtract(param1, const1) + sub2 = ov.opset8.subtract(param2, const2) add = ov.opset8.add(sub1, sub2) relu = ov.opset8.relu(add) sigm = ov.opset8.sigmoid(relu) @@ -529,23 +524,18 @@ def create_pytorch_nn_module_scale_list(tmp_dir): return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]} -def create_pytorch_nn_module_scale_list_default_compression(tmp_dir): - # by default compression should be enabled (same as setting 'compress_to_fp16': True) - # therefore decompression Converts will be present +def create_pytorch_nn_module_scale_list_default_no_compression(tmp_dir): + # by default compression is disabled (same as setting 'compress_to_fp16': False) pt_model = make_pt_model_two_inputs() shape = [1, 10, 10, 3] shape = PartialShape(shape) param1 = ov.opset8.parameter(shape) param2 = ov.opset8.parameter(shape) - const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16) - const1_decompressed = ov.opset8.convert( - const1, destination_type=np.float32) - const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16) - const2_decompressed = ov.opset8.convert( - const2, destination_type=np.float32) - sub1 = ov.opset8.multiply(param1, const1_decompressed) - sub2 = ov.opset8.multiply(param2, const2_decompressed) + const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32) + const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32) + sub1 = ov.opset8.multiply(param1, const1) + sub2 = ov.opset8.multiply(param2, const2) add = ov.opset8.add(sub1, sub2) relu = ov.opset8.relu(add) sigm = ov.opset8.sigmoid(relu) @@ -754,10 +744,10 @@ class TestMoConvertPyTorch(CommonMOConvertTest): create_pytorch_nn_module_layout_list, create_pytorch_nn_module_layout_list_case2, create_pytorch_nn_module_mean_list, - create_pytorch_nn_module_mean_list_default_compression, + create_pytorch_nn_module_mean_list_default_no_compression, create_pytorch_nn_module_mean_list_compressin_enabled, create_pytorch_nn_module_scale_list, - create_pytorch_nn_module_scale_list_default_compression, + create_pytorch_nn_module_scale_list_default_no_compression, create_pytorch_nn_module_scale_list_compression_enabled, create_pytorch_nn_module_shapes_list_static, create_pytorch_nn_module_shapes_list_dynamic, diff --git a/tools/mo/openvino/tools/mo/convert.py b/tools/mo/openvino/tools/mo/convert.py index 228df63c2e7..339b3ad19f2 100644 --- a/tools/mo/openvino/tools/mo/convert.py +++ b/tools/mo/openvino/tools/mo/convert.py @@ -34,7 +34,7 @@ def convert_model( source_layout: [str, Layout, dict] = (), target_layout: [str, Layout, dict] = (), layout: [str, Layout, LayoutMap, list, dict] = (), - compress_to_fp16: bool = True, + compress_to_fp16: bool = False, extensions: [str, pathlib.Path, list, Any] = None, transform: [str, list, tuple] = "", transformations_config: [str, pathlib.Path] = None, diff --git a/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py b/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py index 2ac095b27ca..545bc204bab 100644 --- a/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py +++ b/tools/mo/openvino/tools/mo/utils/get_ov_update_message.py @@ -37,7 +37,7 @@ def get_tf_fe_message(): def get_compression_message(): link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html" message = '[ INFO ] Generated IR will be compressed to FP16. ' \ - 'If you get lower accuracy, please consider disabling compression explicitly ' \ - 'by adding argument --compress_to_fp16=False.\n' \ + 'If you get lower accuracy, please consider disabling compression ' \ + 'by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False.\n' \ 'Find more information about compression to FP16 at {}'.format(link) return message