[MO] compress_to_fp16=False by default (#16854)
* compress_to_fp16=False by default * Apply suggestions from code review Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com> * note abound RAM consumption for FP16 compressed models * detailed notion about RAM usage * update 'get_compression_message()' * corrected get_compression_message: remove infor about RAM * fix pytorch convert layer tests --------- Co-authored-by: Karol Blaszczak <karol.blaszczak@intel.com>
This commit is contained in:
parent
de8f34c8f0
commit
68f46ff9a1
@ -2,18 +2,16 @@
|
|||||||
|
|
||||||
@sphinxdirective
|
@sphinxdirective
|
||||||
|
|
||||||
Model Optimizer by default converts all floating-point weights to ``FP16`` data type.
|
Model Optimizer can convert all floating-point weights to the ``FP16`` data type.
|
||||||
The resulting IR is called compressed ``FP16`` model. The resulting model will occupy
|
It results in creating a "compressed ``FP16`` model", which occupies about half of
|
||||||
about twice as less space in the file system, but it may have some accuracy drop.
|
the original space in the file system. The compression may introduce a drop in accuracy.
|
||||||
For most models, the accuracy drop is negligible. But in case if accuracy drop is
|
but it is negligible for most models.
|
||||||
significant user can disable compression explicitly.
|
|
||||||
|
|
||||||
By default, models are compressed to ``FP16``, but you can disable compression by
|
To compress the model, use the `--compress_to_fp16` or `--compress_to_fp16=True` option:
|
||||||
specifying ``--compress_to_fp16=False``:
|
|
||||||
|
|
||||||
.. code-block:: sh
|
.. code-block:: sh
|
||||||
|
|
||||||
mo --input_model INPUT_MODEL --compress_to_fp16=False
|
mo --input_model INPUT_MODEL --compress_to_fp16
|
||||||
|
|
||||||
|
|
||||||
For details on how plugins handle compressed ``FP16`` models, see
|
For details on how plugins handle compressed ``FP16`` models, see
|
||||||
@ -26,4 +24,11 @@ For details on how plugins handle compressed ``FP16`` models, see
|
|||||||
information about that.
|
information about that.
|
||||||
|
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Some large models (larger than a few Gb) when compressed to ``FP16`` may consume enormous amount of RAM on the loading
|
||||||
|
phase of the inference. In case if you are facing such problems, please try to convert them without compression:
|
||||||
|
`mo --input_model INPUT_MODEL --compress_to_fp16=False`
|
||||||
|
|
||||||
|
|
||||||
@endsphinxdirective
|
@endsphinxdirective
|
||||||
|
@ -458,23 +458,18 @@ def create_pytorch_nn_module_mean_list(tmp_dir):
|
|||||||
"input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
|
"input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
|
||||||
|
|
||||||
|
|
||||||
def create_pytorch_nn_module_mean_list_default_compression(tmp_dir):
|
def create_pytorch_nn_module_mean_list_default_no_compression(tmp_dir):
|
||||||
# by default compression should be enabled (same as setting 'compress_to_fp16': True)
|
# by default compression is disabled (same as setting 'compress_to_fp16': False)
|
||||||
# therefore decompression Converts will be present
|
|
||||||
pt_model = make_pt_model_two_inputs()
|
pt_model = make_pt_model_two_inputs()
|
||||||
shape = [1, 10, 10, 3]
|
shape = [1, 10, 10, 3]
|
||||||
|
|
||||||
shape = PartialShape(shape)
|
shape = PartialShape(shape)
|
||||||
param1 = ov.opset8.parameter(shape)
|
param1 = ov.opset8.parameter(shape)
|
||||||
param2 = ov.opset8.parameter(shape)
|
param2 = ov.opset8.parameter(shape)
|
||||||
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
|
const1 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
|
||||||
const1_decompressed = ov.opset8.convert(
|
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float32)
|
||||||
const1, destination_type=np.float32)
|
sub1 = ov.opset8.subtract(param1, const1)
|
||||||
const2 = ov.opset8.constant([[[[0, 0, 0]]]], dtype=np.float16)
|
sub2 = ov.opset8.subtract(param2, const2)
|
||||||
const2_decompressed = ov.opset8.convert(
|
|
||||||
const2, destination_type=np.float32)
|
|
||||||
sub1 = ov.opset8.subtract(param1, const1_decompressed)
|
|
||||||
sub2 = ov.opset8.subtract(param2, const2_decompressed)
|
|
||||||
add = ov.opset8.add(sub1, sub2)
|
add = ov.opset8.add(sub1, sub2)
|
||||||
relu = ov.opset8.relu(add)
|
relu = ov.opset8.relu(add)
|
||||||
sigm = ov.opset8.sigmoid(relu)
|
sigm = ov.opset8.sigmoid(relu)
|
||||||
@ -529,23 +524,18 @@ def create_pytorch_nn_module_scale_list(tmp_dir):
|
|||||||
return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
|
return pt_model, ref_model, {'input_shape': [shape, shape], 'scale_values': [[1, 1, 1], [1, 1, 1]], 'compress_to_fp16': False, "input": [InputCutInfo("x", None, "f32", None), InputCutInfo("y", None, "f32", None)]}
|
||||||
|
|
||||||
|
|
||||||
def create_pytorch_nn_module_scale_list_default_compression(tmp_dir):
|
def create_pytorch_nn_module_scale_list_default_no_compression(tmp_dir):
|
||||||
# by default compression should be enabled (same as setting 'compress_to_fp16': True)
|
# by default compression is disabled (same as setting 'compress_to_fp16': False)
|
||||||
# therefore decompression Converts will be present
|
|
||||||
pt_model = make_pt_model_two_inputs()
|
pt_model = make_pt_model_two_inputs()
|
||||||
shape = [1, 10, 10, 3]
|
shape = [1, 10, 10, 3]
|
||||||
|
|
||||||
shape = PartialShape(shape)
|
shape = PartialShape(shape)
|
||||||
param1 = ov.opset8.parameter(shape)
|
param1 = ov.opset8.parameter(shape)
|
||||||
param2 = ov.opset8.parameter(shape)
|
param2 = ov.opset8.parameter(shape)
|
||||||
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
|
const1 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
|
||||||
const1_decompressed = ov.opset8.convert(
|
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float32)
|
||||||
const1, destination_type=np.float32)
|
sub1 = ov.opset8.multiply(param1, const1)
|
||||||
const2 = ov.opset8.constant([[[[1, 1, 1]]]], dtype=np.float16)
|
sub2 = ov.opset8.multiply(param2, const2)
|
||||||
const2_decompressed = ov.opset8.convert(
|
|
||||||
const2, destination_type=np.float32)
|
|
||||||
sub1 = ov.opset8.multiply(param1, const1_decompressed)
|
|
||||||
sub2 = ov.opset8.multiply(param2, const2_decompressed)
|
|
||||||
add = ov.opset8.add(sub1, sub2)
|
add = ov.opset8.add(sub1, sub2)
|
||||||
relu = ov.opset8.relu(add)
|
relu = ov.opset8.relu(add)
|
||||||
sigm = ov.opset8.sigmoid(relu)
|
sigm = ov.opset8.sigmoid(relu)
|
||||||
@ -754,10 +744,10 @@ class TestMoConvertPyTorch(CommonMOConvertTest):
|
|||||||
create_pytorch_nn_module_layout_list,
|
create_pytorch_nn_module_layout_list,
|
||||||
create_pytorch_nn_module_layout_list_case2,
|
create_pytorch_nn_module_layout_list_case2,
|
||||||
create_pytorch_nn_module_mean_list,
|
create_pytorch_nn_module_mean_list,
|
||||||
create_pytorch_nn_module_mean_list_default_compression,
|
create_pytorch_nn_module_mean_list_default_no_compression,
|
||||||
create_pytorch_nn_module_mean_list_compressin_enabled,
|
create_pytorch_nn_module_mean_list_compressin_enabled,
|
||||||
create_pytorch_nn_module_scale_list,
|
create_pytorch_nn_module_scale_list,
|
||||||
create_pytorch_nn_module_scale_list_default_compression,
|
create_pytorch_nn_module_scale_list_default_no_compression,
|
||||||
create_pytorch_nn_module_scale_list_compression_enabled,
|
create_pytorch_nn_module_scale_list_compression_enabled,
|
||||||
create_pytorch_nn_module_shapes_list_static,
|
create_pytorch_nn_module_shapes_list_static,
|
||||||
create_pytorch_nn_module_shapes_list_dynamic,
|
create_pytorch_nn_module_shapes_list_dynamic,
|
||||||
|
@ -34,7 +34,7 @@ def convert_model(
|
|||||||
source_layout: [str, Layout, dict] = (),
|
source_layout: [str, Layout, dict] = (),
|
||||||
target_layout: [str, Layout, dict] = (),
|
target_layout: [str, Layout, dict] = (),
|
||||||
layout: [str, Layout, LayoutMap, list, dict] = (),
|
layout: [str, Layout, LayoutMap, list, dict] = (),
|
||||||
compress_to_fp16: bool = True,
|
compress_to_fp16: bool = False,
|
||||||
extensions: [str, pathlib.Path, list, Any] = None,
|
extensions: [str, pathlib.Path, list, Any] = None,
|
||||||
transform: [str, list, tuple] = "",
|
transform: [str, list, tuple] = "",
|
||||||
transformations_config: [str, pathlib.Path] = None,
|
transformations_config: [str, pathlib.Path] = None,
|
||||||
|
@ -37,7 +37,7 @@ def get_tf_fe_message():
|
|||||||
def get_compression_message():
|
def get_compression_message():
|
||||||
link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html"
|
link = "https://docs.openvino.ai/latest/openvino_docs_MO_DG_FP16_Compression.html"
|
||||||
message = '[ INFO ] Generated IR will be compressed to FP16. ' \
|
message = '[ INFO ] Generated IR will be compressed to FP16. ' \
|
||||||
'If you get lower accuracy, please consider disabling compression explicitly ' \
|
'If you get lower accuracy, please consider disabling compression ' \
|
||||||
'by adding argument --compress_to_fp16=False.\n' \
|
'by removing argument --compress_to_fp16 or set it to false --compress_to_fp16=False.\n' \
|
||||||
'Find more information about compression to FP16 at {}'.format(link)
|
'Find more information about compression to FP16 at {}'.format(link)
|
||||||
return message
|
return message
|
||||||
|
Loading…
Reference in New Issue
Block a user