From 329200cc62e1ace05a378e8a3497cefab8f2bf45 Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Fri, 21 Jul 2023 16:03:53 +0400 Subject: [PATCH] New save_model function (C++, Python) (#18656) * Draft version of save_model function (C++, Python) * Fixed code style. * Fixed incorrect test model construction for Python save_model tests * Minor improvements in code readability * Minor adjustment based on PR review --- .../python/src/openvino/runtime/__init__.py | 1 + .../python/src/pyopenvino/pyopenvino.cpp | 35 ++++++++++ .../test_transformations/test_offline_api.py | 70 +++++++++++++++---- src/core/include/openvino/core/graph_util.hpp | 14 ++++ src/core/src/graph_util.cpp | 15 ++++ src/core/src/pass/serialize.cpp | 2 +- .../tests/pass/serialization/serialize.cpp | 6 ++ 7 files changed, 127 insertions(+), 16 deletions(-) diff --git a/src/bindings/python/src/openvino/runtime/__init__.py b/src/bindings/python/src/openvino/runtime/__init__.py index 2755e0d59c4..b53976a6006 100644 --- a/src/bindings/python/src/openvino/runtime/__init__.py +++ b/src/bindings/python/src/openvino/runtime/__init__.py @@ -44,6 +44,7 @@ from openvino._pyopenvino import ProfilingInfo from openvino._pyopenvino import get_batch from openvino._pyopenvino import set_batch from openvino._pyopenvino import serialize +from openvino._pyopenvino import save_model from openvino._pyopenvino import shutdown # Import opsets diff --git a/src/bindings/python/src/pyopenvino/pyopenvino.cpp b/src/bindings/python/src/pyopenvino/pyopenvino.cpp index 5030d2c6a55..1c8f7cf80ce 100644 --- a/src/bindings/python/src/pyopenvino/pyopenvino.cpp +++ b/src/bindings/python/src/pyopenvino/pyopenvino.cpp @@ -117,6 +117,9 @@ PYBIND11_MODULE(_pyopenvino, m) { R"( Serialize given model into IR. The generated .xml and .bin files will be saved into provided paths. + This method serializes model "as-is" that means no weights compression is applied. + It is recommended to use ov::save_model function instead of ov::serialize in all cases + when it is not related to debugging. :param model: model which will be converted to IR representation :type model: openvino.runtime.Model :param xml_path: path where .xml file will be saved @@ -157,6 +160,38 @@ PYBIND11_MODULE(_pyopenvino, m) { serialize(model, xml_path="./serialized.xml", bin_path="./serialized.bin", version="IR_V11") )"); + m.def( + "save_model", + [](std::shared_ptr& model, + const py::object& xml_path, + bool compress_to_fp16) { + ov::save_model(model, + Common::utils::convert_path_to_string(xml_path), + compress_to_fp16); + }, + py::arg("model"), + py::arg("output_model"), + py::arg("compress_to_fp16") = true, + R"( + Save model into IR files (xml and bin). Floating point weights are compressed to FP16 by default. + This method saves a model to IR applying all necessary transformations that usually applied + in model conversion flow provided by mo tool. Paricularly, floatting point weights are + compressed to FP16, debug information in model nodes are cleaned up, etc. + :param model: model which will be converted to IR representation + :type model: openvino.runtime.Model + :param output_model: path to output model file + :type output_model: Union[str, bytes, pathlib.Path] + :param compress_to_fp16: whether to compress floating point weights to FP16 (default: True) + :type compress_to_fp16: bool + + :Examples: + + .. code-block:: python + + model = convert_model('your_model.onnx') + save_model(model, './model.xml') + )"); + m.def("shutdown", &ov::shutdown, R"( diff --git a/src/bindings/python/tests/test_transformations/test_offline_api.py b/src/bindings/python/tests/test_transformations/test_offline_api.py index 07cd7cfd307..b200006de5f 100644 --- a/src/bindings/python/tests/test_transformations/test_offline_api.py +++ b/src/bindings/python/tests/test_transformations/test_offline_api.py @@ -16,10 +16,10 @@ from openvino._offline_transformations import ( apply_fused_names_cleanup, ) -from openvino.runtime import Model, PartialShape, Core, serialize +from openvino.runtime import Model, PartialShape, Core, serialize, save_model import openvino.runtime as ov -from tests.test_utils.test_utils import create_filename_for_test, compare_models +from tests.test_utils.test_utils import create_filename_for_test, compare_models, _compare_models def get_relu_model(): @@ -165,6 +165,32 @@ def test_fused_names_cleanup(): assert len(node.get_rt_info()) == 0 +def prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin): + xml_path, bin_path = create_filename_for_test(request.node.name, + tmp_path, + is_path_xml, + is_path_bin) + shape = [100, 100, 2] + parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A") + parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B") + node_floor = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b)) + node_constant = ov.opset8.constant(np.array(0.1, dtype=np.float32)) + node_ceil = ov.opset8.ceiling(node_constant) + node_add = ov.opset8.add(node_ceil, node_floor) + return Model([node_add], [parameter_a, parameter_b], "Model"), xml_path, bin_path + + +def compare_models_and_finalize_after_test(model, xml_path, bin_path): + assert model is not None + core = Core() + res_model = core.read_model(model=xml_path, weights=bin_path) + assert compare_models(model, res_model) + del res_model + del model + os.remove(xml_path) + os.remove(bin_path) + + # request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request @pytest.mark.parametrize("is_path_xml, is_path_bin", [ # noqa: PT006 (True, True), @@ -174,27 +200,41 @@ def test_fused_names_cleanup(): ], ) def test_serialize_pass_v2(request, tmp_path, is_path_xml, is_path_bin): - core = Core() - xml_path, bin_path = create_filename_for_test(request.node.name, - tmp_path, - is_path_xml, - is_path_bin) - shape = [100, 100, 2] - parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A") - parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B") - _model = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b)) - model = Model(_model, [parameter_a, parameter_b], "Model") - + model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin) serialize(model, xml_path, bin_path) + compare_models_and_finalize_after_test(model, xml_path, bin_path) + +# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request +@pytest.mark.parametrize("is_path_xml", [ # noqa: PT006 + (True), + (False), +], +) +def test_save_model(request, tmp_path, is_path_xml): + model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, False) + save_model(model, xml_path, compress_to_fp16=False) + compare_models_and_finalize_after_test(model, xml_path, bin_path) + + +def test_save_model_fp16(request, tmp_path): + model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, False, False) + save_model(model, xml_path) assert model is not None - + core = Core() res_model = core.read_model(model=xml_path, weights=bin_path) + # number of operations are different due to an extra Convert op + # test model has only single constant that can be compressed, so + # only a single extra op is expected + assert len(model.get_ops()) + 1 == len(res_model.get_ops()) + + # after compression of original modlel to fp16, they should match + compress_model_transformation(model) assert compare_models(model, res_model) del res_model - + del model os.remove(xml_path) os.remove(bin_path) diff --git a/src/core/include/openvino/core/graph_util.hpp b/src/core/include/openvino/core/graph_util.hpp index 3aee53be409..d0b86252416 100644 --- a/src/core/include/openvino/core/graph_util.hpp +++ b/src/core/include/openvino/core/graph_util.hpp @@ -296,6 +296,9 @@ OPENVINO_API bool replace_node_update_name(const std::shared_ptr& target, const std::shared_ptr& replacement); /// \brief Serialize given model into IR. The generated .xml and .bin files will be saved into provided paths. +/// This method serializes model "as-is" that means no weights compression and other possible transformations +/// are applied. It is recommended to use ov::save_model function instead of ov::serialize, because it is aligned +/// with default model conversion flow. /// \param m Model which will be converted to IR representation. /// \param xml_path Path where .xml file will be saved. /// \param bin_path Path where .bin file will be saved (optional). @@ -306,4 +309,15 @@ void serialize(const std::shared_ptr& m, const std::string& xml_path, const std::string& bin_path = "", ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED); + +/// \brief Save given model into IR. Floating point weights are compressed to FP16 by default. +/// This method saves a model to IR applying all necessary transformations that usually applied +/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16. +/// \param model Model which will be converted to IR representation. +/// \param output_model Path to the output model file, must have extension .xml +/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default) +OPENVINO_API +void save_model(const std::shared_ptr& model, + const std::string& output_model, + bool compress_to_fp16 = true); } // namespace ov diff --git a/src/core/src/graph_util.cpp b/src/core/src/graph_util.cpp index ea4168b4910..7998314f1bd 100644 --- a/src/core/src/graph_util.cpp +++ b/src/core/src/graph_util.cpp @@ -28,6 +28,9 @@ #include "ngraph/rt_info.hpp" #include "ngraph/util.hpp" #include "openvino/core/descriptor/tensor.hpp" +#include "transformations/common_optimizations/compress_float_constants.hpp" +#include "transformations/common_optimizations/fused_names_cleanup.hpp" +#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp" using namespace std; @@ -847,3 +850,15 @@ void ov::serialize(const std::shared_ptr& m, manager.register_pass(xml_path, bin_path, version); manager.run_passes(std::const_pointer_cast(m)); } + +void ov::save_model(const std::shared_ptr& m, const std::string& output_model, bool compress_to_fp16) { + ov::pass::Manager manager; + if (compress_to_fp16) { + manager.register_pass(); + manager.register_pass(); + } + manager.register_pass(); + manager.register_pass(output_model, ""); + auto cloned = m->clone(); // TODO: Implement on-the-fly compression in pass::Serialize + manager.run_passes(cloned); +} diff --git a/src/core/src/pass/serialize.cpp b/src/core/src/pass/serialize.cpp index a140b3f7d99..e7bad776f35 100644 --- a/src/core/src/pass/serialize.cpp +++ b/src/core/src/pass/serialize.cpp @@ -1025,7 +1025,7 @@ void ngfunction_2_ir(pugi::xml_node& netXml, } std::string valid_xml_path(const std::string& path) { - OPENVINO_ASSERT(path.length() > 4, "Path for xml file is to short: \"" + path + "\""); + OPENVINO_ASSERT(path.length() > 4, "Path for xml file is too short: \"" + path + "\""); const char* const extension = ".xml"; const bool has_xml_extension = path.rfind(extension) == path.size() - std::strlen(extension); diff --git a/src/core/tests/pass/serialization/serialize.cpp b/src/core/tests/pass/serialization/serialize.cpp index e0e66a21064..7bfe6934891 100644 --- a/src/core/tests/pass/serialization/serialize.cpp +++ b/src/core/tests/pass/serialization/serialize.cpp @@ -70,6 +70,12 @@ TEST_P(SerializationTest, SerializeHelper) { }); } +TEST_P(SerializationTest, SaveModel) { + CompareSerialized([this](const std::shared_ptr& m) { + ov::save_model(m, m_out_xml_path, false); + }); +} + INSTANTIATE_TEST_SUITE_P( IRSerialization, SerializationTest,