From 329200cc62e1ace05a378e8a3497cefab8f2bf45 Mon Sep 17 00:00:00 2001
From: Sergey Lyalin <sergey.lyalin@intel.com>
Date: Fri, 21 Jul 2023 16:03:53 +0400
Subject: [PATCH] New save_model function (C++, Python) (#18656)

* Draft version of save_model function (C++, Python)

* Fixed code style.

* Fixed incorrect test model construction for Python save_model tests

* Minor improvements in code readability

* Minor adjustment based on PR review
---
 .../python/src/openvino/runtime/__init__.py   |  1 +
 .../python/src/pyopenvino/pyopenvino.cpp      | 35 ++++++++++
 .../test_transformations/test_offline_api.py  | 70 +++++++++++++++----
 src/core/include/openvino/core/graph_util.hpp | 14 ++++
 src/core/src/graph_util.cpp                   | 15 ++++
 src/core/src/pass/serialize.cpp               |  2 +-
 .../tests/pass/serialization/serialize.cpp    |  6 ++
 7 files changed, 127 insertions(+), 16 deletions(-)

diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py
index 2755e0d59c4..b53976a6006 100644
--- a/src/bindings/python/src/openvino/runtime/__init__.py
+++ b/src/bindings/python/src/openvino/runtime/__init__.py
@@ -44,6 +44,7 @@ from openvino._pyopenvino import ProfilingInfo
 from openvino._pyopenvino import get_batch
 from openvino._pyopenvino import set_batch
 from openvino._pyopenvino import serialize
+from openvino._pyopenvino import save_model
 from openvino._pyopenvino import shutdown
 
 # Import opsets
diff --git a/src/bindings/python/src/pyopenvino/pyopenvino.cpp b/src/bindings/python/src/pyopenvino/pyopenvino.cpp
index 5030d2c6a55..1c8f7cf80ce 100644
--- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp
+++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp
@@ -117,6 +117,9 @@ PYBIND11_MODULE(_pyopenvino, m) {
         R"(
             Serialize given model into IR. The generated .xml and .bin files will be saved
             into provided paths.
+            This method serializes model "as-is" that means no weights compression is applied.
+            It is recommended to use ov::save_model function instead of ov::serialize in all cases
+            when it is not related to debugging.
             :param model: model which will be converted to IR representation
             :type model: openvino.runtime.Model
             :param xml_path: path where .xml file will be saved
@@ -157,6 +160,38 @@ PYBIND11_MODULE(_pyopenvino, m) {
                 serialize(model, xml_path="./serialized.xml", bin_path="./serialized.bin", version="IR_V11")
         )");
 
+    m.def(
+        "save_model",
+        [](std::shared_ptr<ov::Model>& model,
+           const py::object& xml_path,
+           bool compress_to_fp16) {
+            ov::save_model(model,
+                          Common::utils::convert_path_to_string(xml_path),
+                          compress_to_fp16);
+        },
+        py::arg("model"),
+        py::arg("output_model"),
+        py::arg("compress_to_fp16") = true,
+        R"(
+            Save model into IR files (xml and bin). Floating point weights are compressed to FP16 by default.
+            This method saves a model to IR applying all necessary transformations that usually applied
+            in model conversion flow provided by mo tool. Paricularly, floatting point weights are
+            compressed to FP16, debug information in model nodes are cleaned up, etc.
+            :param model: model which will be converted to IR representation
+            :type model: openvino.runtime.Model
+            :param output_model: path to output model file
+            :type output_model: Union[str, bytes, pathlib.Path]
+            :param compress_to_fp16: whether to compress floating point weights to FP16 (default: True)
+            :type compress_to_fp16: bool
+
+            :Examples:
+
+            .. code-block:: python
+
+                model = convert_model('your_model.onnx')
+                save_model(model, './model.xml')
+        )");
+
     m.def("shutdown",
           &ov::shutdown,
           R"(
diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py
index 07cd7cfd307..b200006de5f 100644
--- a/src/bindings/python/tests/test_transformations/test_offline_api.py
+++ b/src/bindings/python/tests/test_transformations/test_offline_api.py
@@ -16,10 +16,10 @@ from openvino._offline_transformations import (
     apply_fused_names_cleanup,
 )
 
-from openvino.runtime import Model, PartialShape, Core, serialize
+from openvino.runtime import Model, PartialShape, Core, serialize, save_model
 import openvino.runtime as ov
 
-from tests.test_utils.test_utils import create_filename_for_test, compare_models
+from tests.test_utils.test_utils import create_filename_for_test, compare_models, _compare_models
 
 
 def get_relu_model():
@@ -165,6 +165,32 @@ def test_fused_names_cleanup():
         assert len(node.get_rt_info()) == 0
 
 
+def prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin):
+    xml_path, bin_path = create_filename_for_test(request.node.name,
+                                                  tmp_path,
+                                                  is_path_xml,
+                                                  is_path_bin)
+    shape = [100, 100, 2]
+    parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A")
+    parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B")
+    node_floor = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b))
+    node_constant = ov.opset8.constant(np.array(0.1, dtype=np.float32))
+    node_ceil = ov.opset8.ceiling(node_constant)
+    node_add = ov.opset8.add(node_ceil, node_floor)
+    return Model([node_add], [parameter_a, parameter_b], "Model"), xml_path, bin_path
+
+
+def compare_models_and_finalize_after_test(model, xml_path, bin_path):
+    assert model is not None
+    core = Core()
+    res_model = core.read_model(model=xml_path, weights=bin_path)
+    assert compare_models(model, res_model)
+    del res_model
+    del model
+    os.remove(xml_path)
+    os.remove(bin_path)
+
+
 # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
 @pytest.mark.parametrize("is_path_xml, is_path_bin", [  # noqa: PT006
     (True, True),
@@ -174,27 +200,41 @@ def test_fused_names_cleanup():
 ],
 )
 def test_serialize_pass_v2(request, tmp_path, is_path_xml, is_path_bin):
-    core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name,
-                                                  tmp_path,
-                                                  is_path_xml,
-                                                  is_path_bin)
-    shape = [100, 100, 2]
-    parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A")
-    parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B")
-    _model = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b))
-    model = Model(_model, [parameter_a, parameter_b], "Model")
-
+    model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin)
     serialize(model, xml_path, bin_path)
+    compare_models_and_finalize_after_test(model, xml_path, bin_path)
 
+
+# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
+@pytest.mark.parametrize("is_path_xml", [  # noqa: PT006
+    (True),
+    (False),
+],
+)
+def test_save_model(request, tmp_path, is_path_xml):
+    model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, False)
+    save_model(model, xml_path, compress_to_fp16=False)
+    compare_models_and_finalize_after_test(model, xml_path, bin_path)
+
+
+def test_save_model_fp16(request, tmp_path):
+    model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, False, False)
+    save_model(model, xml_path)
     assert model is not None
-
+    core = Core()
     res_model = core.read_model(model=xml_path, weights=bin_path)
 
+    # number of operations are different due to an extra Convert op
+    # test model has only single constant that can be compressed, so
+    # only a single extra op is expected
+    assert len(model.get_ops()) + 1 == len(res_model.get_ops())
+
+    # after compression of original modlel to fp16, they should match
+    compress_model_transformation(model)
     assert compare_models(model, res_model)
 
     del res_model
-
+    del model
     os.remove(xml_path)
     os.remove(bin_path)
 
diff --git a/src/core/include/openvino/core/graph_util.hpp b/src/core/include/openvino/core/graph_util.hpp
index 3aee53be409..d0b86252416 100644
--- a/src/core/include/openvino/core/graph_util.hpp
+++ b/src/core/include/openvino/core/graph_util.hpp
@@ -296,6 +296,9 @@ OPENVINO_API
 bool replace_node_update_name(const std::shared_ptr<Node>& target, const std::shared_ptr<Node>& replacement);
 
 /// \brief Serialize given model into IR. The generated .xml and .bin files will be saved into provided paths.
+/// This method serializes model "as-is" that means no weights compression and other possible transformations
+/// are applied. It is recommended to use ov::save_model function instead of ov::serialize, because it is aligned
+/// with default model conversion flow.
 /// \param m Model which will be converted to IR representation.
 /// \param xml_path Path where .xml file will be saved.
 /// \param bin_path Path where .bin file will be saved (optional).
@@ -306,4 +309,15 @@ void serialize(const std::shared_ptr<const ov::Model>& m,
                const std::string& xml_path,
                const std::string& bin_path = "",
                ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED);
+
+/// \brief Save given model into IR. Floating point weights are compressed to FP16 by default.
+/// This method saves a model to IR applying all necessary transformations that usually applied
+/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16.
+/// \param model Model which will be converted to IR representation.
+/// \param output_model Path to the output model file, must have extension .xml
+/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default)
+OPENVINO_API
+void save_model(const std::shared_ptr<const ov::Model>& model,
+                const std::string& output_model,
+                bool compress_to_fp16 = true);
 }  // namespace ov
diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp
index ea4168b4910..7998314f1bd 100644
--- a/src/core/src/graph_util.cpp
+++ b/src/core/src/graph_util.cpp
@@ -28,6 +28,9 @@
 #include "ngraph/rt_info.hpp"
 #include "ngraph/util.hpp"
 #include "openvino/core/descriptor/tensor.hpp"
+#include "transformations/common_optimizations/compress_float_constants.hpp"
+#include "transformations/common_optimizations/fused_names_cleanup.hpp"
+#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
 
 using namespace std;
 
@@ -847,3 +850,15 @@ void ov::serialize(const std::shared_ptr<const ov::Model>& m,
     manager.register_pass<ov::pass::Serialize>(xml_path, bin_path, version);
     manager.run_passes(std::const_pointer_cast<ov::Model>(m));
 }
+
+void ov::save_model(const std::shared_ptr<const ov::Model>& m, const std::string& output_model, bool compress_to_fp16) {
+    ov::pass::Manager manager;
+    if (compress_to_fp16) {
+        manager.register_pass<ov::pass::MarkPrecisionSensitiveConstants>();
+        manager.register_pass<ov::pass::CompressFloatConstants>();
+    }
+    manager.register_pass<ov::pass::FusedNamesCleanup>();
+    manager.register_pass<ov::pass::Serialize>(output_model, "");
+    auto cloned = m->clone();  // TODO: Implement on-the-fly compression in pass::Serialize
+    manager.run_passes(cloned);
+}
diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp
index a140b3f7d99..e7bad776f35 100644
--- a/src/core/src/pass/serialize.cpp
+++ b/src/core/src/pass/serialize.cpp
@@ -1025,7 +1025,7 @@ void ngfunction_2_ir(pugi::xml_node& netXml,
 }
 
 std::string valid_xml_path(const std::string& path) {
-    OPENVINO_ASSERT(path.length() > 4, "Path for xml file is to short: \"" + path + "\"");
+    OPENVINO_ASSERT(path.length() > 4, "Path for xml file is too short: \"" + path + "\"");
 
     const char* const extension = ".xml";
     const bool has_xml_extension = path.rfind(extension) == path.size() - std::strlen(extension);
diff --git a/src/core/tests/pass/serialization/serialize.cpp b/src/core/tests/pass/serialization/serialize.cpp
index e0e66a21064..7bfe6934891 100644
--- a/src/core/tests/pass/serialization/serialize.cpp
+++ b/src/core/tests/pass/serialization/serialize.cpp
@@ -70,6 +70,12 @@ TEST_P(SerializationTest, SerializeHelper) {
     });
 }
 
+TEST_P(SerializationTest, SaveModel) {
+    CompareSerialized([this](const std::shared_ptr<ov::Model>& m) {
+        ov::save_model(m, m_out_xml_path, false);
+    });
+}
+
 INSTANTIATE_TEST_SUITE_P(
     IRSerialization,
     SerializationTest,