New save_model function (C++, Python) (#18656)

* Draft version of save_model function (C++, Python) * Fixed code style. * Fixed incorrect test model construction for Python save_model tests * Minor improvements in code readability * Minor adjustment based on PR review
2023-07-21 16:03:53 +04:00 · 2023-07-21 16:03:53 +04:00 · 329200cc62
commit 329200cc62
parent 44cae128cc
7 changed files with 127 additions and 16 deletions
--- a/src/bindings/python/src/openvino/runtime/init.py
+++ b/src/bindings/python/src/openvino/runtime/init.py
@ -44,6 +44,7 @@ from openvino._pyopenvino import ProfilingInfo
 from openvino._pyopenvino import get_batch
 from openvino._pyopenvino import set_batch
 from openvino._pyopenvino import serialize
+from openvino._pyopenvino import save_model
 from openvino._pyopenvino import shutdown

 # Import opsets
--- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp
+++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp
@ -117,6 +117,9 @@ PYBIND11_MODULE(_pyopenvino, m) {
        R"(
            Serialize given model into IR. The generated .xml and .bin files will be saved
            into provided paths.
+            This method serializes model "as-is" that means no weights compression is applied.
+            It is recommended to use ov::save_model function instead of ov::serialize in all cases
+            when it is not related to debugging.
            :param model: model which will be converted to IR representation
            :type model: openvino.runtime.Model
            :param xml_path: path where .xml file will be saved
@ -157,6 +160,38 @@ PYBIND11_MODULE(_pyopenvino, m) {
                serialize(model, xml_path="./serialized.xml", bin_path="./serialized.bin", version="IR_V11")
        )");

+    m.def(
+        "save_model",
+        [](std::shared_ptr<ov::Model>& model,
+           const py::object& xml_path,
+           bool compress_to_fp16) {
+            ov::save_model(model,
+                          Common::utils::convert_path_to_string(xml_path),
+                          compress_to_fp16);
+        },
+        py::arg("model"),
+        py::arg("output_model"),
+        py::arg("compress_to_fp16") = true,
+        R"(
+            Save model into IR files (xml and bin). Floating point weights are compressed to FP16 by default.
+            This method saves a model to IR applying all necessary transformations that usually applied
+            in model conversion flow provided by mo tool. Paricularly, floatting point weights are
+            compressed to FP16, debug information in model nodes are cleaned up, etc.
+            :param model: model which will be converted to IR representation
+            :type model: openvino.runtime.Model
+            :param output_model: path to output model file
+            :type output_model: Union[str, bytes, pathlib.Path]
+            :param compress_to_fp16: whether to compress floating point weights to FP16 (default: True)
+            :type compress_to_fp16: bool
+
+            :Examples:
+
+            .. code-block:: python
+
+                model = convert_model('your_model.onnx')
+                save_model(model, './model.xml')
+        )");
+
    m.def("shutdown",
          &ov::shutdown,
          R"(
--- a/src/bindings/python/tests/test_transformations/test_offline_api.py
+++ b/src/bindings/python/tests/test_transformations/test_offline_api.py
@ -16,10 +16,10 @@ from openvino._offline_transformations import (
    apply_fused_names_cleanup,
 )

-from openvino.runtime import Model, PartialShape, Core, serialize
+from openvino.runtime import Model, PartialShape, Core, serialize, save_model
 import openvino.runtime as ov

-from tests.test_utils.test_utils import create_filename_for_test, compare_models
+from tests.test_utils.test_utils import create_filename_for_test, compare_models, _compare_models


 def get_relu_model():
@ -165,6 +165,32 @@ def test_fused_names_cleanup():
        assert len(node.get_rt_info()) == 0


+def prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin):
+    xml_path, bin_path = create_filename_for_test(request.node.name,
+                                                  tmp_path,
+                                                  is_path_xml,
+                                                  is_path_bin)
+    shape = [100, 100, 2]
+    parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A")
+    parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B")
+    node_floor = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b))
+    node_constant = ov.opset8.constant(np.array(0.1, dtype=np.float32))
+    node_ceil = ov.opset8.ceiling(node_constant)
+    node_add = ov.opset8.add(node_ceil, node_floor)
+    return Model([node_add], [parameter_a, parameter_b], "Model"), xml_path, bin_path
+
+
+def compare_models_and_finalize_after_test(model, xml_path, bin_path):
+    assert model is not None
+    core = Core()
+    res_model = core.read_model(model=xml_path, weights=bin_path)
+    assert compare_models(model, res_model)
+    del res_model
+    del model
+    os.remove(xml_path)
+    os.remove(bin_path)
+
+
 # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
@pytest.mark.parametrize("is_path_xml, is_path_bin", [  # noqa: PT006
    (True, True),
@ -174,27 +200,41 @@ def test_fused_names_cleanup():
 ],
 )
 def test_serialize_pass_v2(request, tmp_path, is_path_xml, is_path_bin):
-    core = Core()
-    xml_path, bin_path = create_filename_for_test(request.node.name,
-                                                  tmp_path,
-                                                  is_path_xml,
-                                                  is_path_bin)
-    shape = [100, 100, 2]
-    parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A")
-    parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B")
-    _model = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b))
-    model = Model(_model, [parameter_a, parameter_b], "Model")
-
+    model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin)
    serialize(model, xml_path, bin_path)
+    compare_models_and_finalize_after_test(model, xml_path, bin_path)

+
+# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
+@pytest.mark.parametrize("is_path_xml", [  # noqa: PT006
+    (True),
+    (False),
+],
+)
+def test_save_model(request, tmp_path, is_path_xml):
+    model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, False)
+    save_model(model, xml_path, compress_to_fp16=False)
+    compare_models_and_finalize_after_test(model, xml_path, bin_path)
+
+
+def test_save_model_fp16(request, tmp_path):
+    model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, False, False)
+    save_model(model, xml_path)
    assert model is not None
-
+    core = Core()
    res_model = core.read_model(model=xml_path, weights=bin_path)

+    # number of operations are different due to an extra Convert op
+    # test model has only single constant that can be compressed, so
+    # only a single extra op is expected
+    assert len(model.get_ops()) + 1 == len(res_model.get_ops())
+
+    # after compression of original modlel to fp16, they should match
+    compress_model_transformation(model)
    assert compare_models(model, res_model)

    del res_model
-
+    del model
    os.remove(xml_path)
    os.remove(bin_path)

--- a/src/core/include/openvino/core/graph_util.hpp
+++ b/src/core/include/openvino/core/graph_util.hpp
@ -296,6 +296,9 @@ OPENVINO_API
 bool replace_node_update_name(const std::shared_ptr<Node>& target, const std::shared_ptr<Node>& replacement);

 /// \brief Serialize given model into IR. The generated .xml and .bin files will be saved into provided paths.
+/// This method serializes model "as-is" that means no weights compression and other possible transformations
+/// are applied. It is recommended to use ov::save_model function instead of ov::serialize, because it is aligned
+/// with default model conversion flow.
 /// \param m Model which will be converted to IR representation.
 /// \param xml_path Path where .xml file will be saved.
 /// \param bin_path Path where .bin file will be saved (optional).
@ -306,4 +309,15 @@ void serialize(const std::shared_ptr<const ov::Model>& m,
               const std::string& xml_path,
               const std::string& bin_path = "",
               ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED);
+
+/// \brief Save given model into IR. Floating point weights are compressed to FP16 by default.
+/// This method saves a model to IR applying all necessary transformations that usually applied
+/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16.
+/// \param model Model which will be converted to IR representation.
+/// \param output_model Path to the output model file, must have extension .xml
+/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default)
+OPENVINO_API
+void save_model(const std::shared_ptr<const ov::Model>& model,
+                const std::string& output_model,
+                bool compress_to_fp16 = true);
 }  // namespace ov
--- a/src/core/src/graph_util.cpp
+++ b/src/core/src/graph_util.cpp
@ -28,6 +28,9 @@
 #include "ngraph/rt_info.hpp"
 #include "ngraph/util.hpp"
 #include "openvino/core/descriptor/tensor.hpp"
+#include "transformations/common_optimizations/compress_float_constants.hpp"
+#include "transformations/common_optimizations/fused_names_cleanup.hpp"
+#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"

 using namespace std;

@ -847,3 +850,15 @@ void ov::serialize(const std::shared_ptr<const ov::Model>& m,
    manager.register_pass<ov::pass::Serialize>(xml_path, bin_path, version);
    manager.run_passes(std::const_pointer_cast<ov::Model>(m));
 }
+
+void ov::save_model(const std::shared_ptr<const ov::Model>& m, const std::string& output_model, bool compress_to_fp16) {
+    ov::pass::Manager manager;
+    if (compress_to_fp16) {
+        manager.register_pass<ov::pass::MarkPrecisionSensitiveConstants>();
+        manager.register_pass<ov::pass::CompressFloatConstants>();
+    }
+    manager.register_pass<ov::pass::FusedNamesCleanup>();
+    manager.register_pass<ov::pass::Serialize>(output_model, "");
+    auto cloned = m->clone();  // TODO: Implement on-the-fly compression in pass::Serialize
+    manager.run_passes(cloned);
+}
--- a/src/core/src/pass/serialize.cpp
+++ b/src/core/src/pass/serialize.cpp
@ -1025,7 +1025,7 @@ void ngfunction_2_ir(pugi::xml_node& netXml,
 }

 std::string valid_xml_path(const std::string& path) {
-    OPENVINO_ASSERT(path.length() > 4, "Path for xml file is to short: \"" + path + "\"");
+    OPENVINO_ASSERT(path.length() > 4, "Path for xml file is too short: \"" + path + "\"");

    const char* const extension = ".xml";
    const bool has_xml_extension = path.rfind(extension) == path.size() - std::strlen(extension);
--- a/src/core/tests/pass/serialization/serialize.cpp
+++ b/src/core/tests/pass/serialization/serialize.cpp
@ -70,6 +70,12 @@ TEST_P(SerializationTest, SerializeHelper) {
    });
 }

+TEST_P(SerializationTest, SaveModel) {
+    CompareSerialized([this](const std::shared_ptr<ov::Model>& m) {
+        ov::save_model(m, m_out_xml_path, false);
+    });
+}
+
 INSTANTIATE_TEST_SUITE_P(
    IRSerialization,
    SerializationTest,