New save_model function (C++, Python) (#18656)

* Draft version of save_model function (C++, Python)

* Fixed code style.

* Fixed incorrect test model construction for Python save_model tests

* Minor improvements in code readability

* Minor adjustment based on PR review
This commit is contained in:
Sergey Lyalin 2023-07-21 16:03:53 +04:00 committed by GitHub
parent 44cae128cc
commit 329200cc62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 127 additions and 16 deletions

View File

@ -44,6 +44,7 @@ from openvino._pyopenvino import ProfilingInfo
from openvino._pyopenvino import get_batch
from openvino._pyopenvino import set_batch
from openvino._pyopenvino import serialize
from openvino._pyopenvino import save_model
from openvino._pyopenvino import shutdown
# Import opsets

View File

@ -117,6 +117,9 @@ PYBIND11_MODULE(_pyopenvino, m) {
R"(
Serialize given model into IR. The generated .xml and .bin files will be saved
into provided paths.
This method serializes model "as-is" that means no weights compression is applied.
It is recommended to use ov::save_model function instead of ov::serialize in all cases
when it is not related to debugging.
:param model: model which will be converted to IR representation
:type model: openvino.runtime.Model
:param xml_path: path where .xml file will be saved
@ -157,6 +160,38 @@ PYBIND11_MODULE(_pyopenvino, m) {
serialize(model, xml_path="./serialized.xml", bin_path="./serialized.bin", version="IR_V11")
)");
m.def(
"save_model",
[](std::shared_ptr<ov::Model>& model,
const py::object& xml_path,
bool compress_to_fp16) {
ov::save_model(model,
Common::utils::convert_path_to_string(xml_path),
compress_to_fp16);
},
py::arg("model"),
py::arg("output_model"),
py::arg("compress_to_fp16") = true,
R"(
Save model into IR files (xml and bin). Floating point weights are compressed to FP16 by default.
This method saves a model to IR applying all necessary transformations that usually applied
in model conversion flow provided by mo tool. Paricularly, floatting point weights are
compressed to FP16, debug information in model nodes are cleaned up, etc.
:param model: model which will be converted to IR representation
:type model: openvino.runtime.Model
:param output_model: path to output model file
:type output_model: Union[str, bytes, pathlib.Path]
:param compress_to_fp16: whether to compress floating point weights to FP16 (default: True)
:type compress_to_fp16: bool
:Examples:
.. code-block:: python
model = convert_model('your_model.onnx')
save_model(model, './model.xml')
)");
m.def("shutdown",
&ov::shutdown,
R"(

View File

@ -16,10 +16,10 @@ from openvino._offline_transformations import (
apply_fused_names_cleanup,
)
from openvino.runtime import Model, PartialShape, Core, serialize
from openvino.runtime import Model, PartialShape, Core, serialize, save_model
import openvino.runtime as ov
from tests.test_utils.test_utils import create_filename_for_test, compare_models
from tests.test_utils.test_utils import create_filename_for_test, compare_models, _compare_models
def get_relu_model():
@ -165,6 +165,32 @@ def test_fused_names_cleanup():
assert len(node.get_rt_info()) == 0
def prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin):
xml_path, bin_path = create_filename_for_test(request.node.name,
tmp_path,
is_path_xml,
is_path_bin)
shape = [100, 100, 2]
parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B")
node_floor = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b))
node_constant = ov.opset8.constant(np.array(0.1, dtype=np.float32))
node_ceil = ov.opset8.ceiling(node_constant)
node_add = ov.opset8.add(node_ceil, node_floor)
return Model([node_add], [parameter_a, parameter_b], "Model"), xml_path, bin_path
def compare_models_and_finalize_after_test(model, xml_path, bin_path):
assert model is not None
core = Core()
res_model = core.read_model(model=xml_path, weights=bin_path)
assert compare_models(model, res_model)
del res_model
del model
os.remove(xml_path)
os.remove(bin_path)
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
@pytest.mark.parametrize("is_path_xml, is_path_bin", [ # noqa: PT006
(True, True),
@ -174,27 +200,41 @@ def test_fused_names_cleanup():
],
)
def test_serialize_pass_v2(request, tmp_path, is_path_xml, is_path_bin):
core = Core()
xml_path, bin_path = create_filename_for_test(request.node.name,
tmp_path,
is_path_xml,
is_path_bin)
shape = [100, 100, 2]
parameter_a = ov.opset8.parameter(shape, dtype=np.float32, name="A")
parameter_b = ov.opset8.parameter(shape, dtype=np.float32, name="B")
_model = ov.opset8.floor(ov.opset8.minimum(ov.opset8.abs(parameter_a), parameter_b))
model = Model(_model, [parameter_a, parameter_b], "Model")
model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, is_path_bin)
serialize(model, xml_path, bin_path)
compare_models_and_finalize_after_test(model, xml_path, bin_path)
# request - https://docs.pytest.org/en/7.1.x/reference/reference.html#request
@pytest.mark.parametrize("is_path_xml", [ # noqa: PT006
(True),
(False),
],
)
def test_save_model(request, tmp_path, is_path_xml):
model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, is_path_xml, False)
save_model(model, xml_path, compress_to_fp16=False)
compare_models_and_finalize_after_test(model, xml_path, bin_path)
def test_save_model_fp16(request, tmp_path):
model, xml_path, bin_path = prepare_test_model_for_serialize(request, tmp_path, False, False)
save_model(model, xml_path)
assert model is not None
core = Core()
res_model = core.read_model(model=xml_path, weights=bin_path)
# number of operations are different due to an extra Convert op
# test model has only single constant that can be compressed, so
# only a single extra op is expected
assert len(model.get_ops()) + 1 == len(res_model.get_ops())
# after compression of original modlel to fp16, they should match
compress_model_transformation(model)
assert compare_models(model, res_model)
del res_model
del model
os.remove(xml_path)
os.remove(bin_path)

View File

@ -296,6 +296,9 @@ OPENVINO_API
bool replace_node_update_name(const std::shared_ptr<Node>& target, const std::shared_ptr<Node>& replacement);
/// \brief Serialize given model into IR. The generated .xml and .bin files will be saved into provided paths.
/// This method serializes model "as-is" that means no weights compression and other possible transformations
/// are applied. It is recommended to use ov::save_model function instead of ov::serialize, because it is aligned
/// with default model conversion flow.
/// \param m Model which will be converted to IR representation.
/// \param xml_path Path where .xml file will be saved.
/// \param bin_path Path where .bin file will be saved (optional).
@ -306,4 +309,15 @@ void serialize(const std::shared_ptr<const ov::Model>& m,
const std::string& xml_path,
const std::string& bin_path = "",
ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED);
/// \brief Save given model into IR. Floating point weights are compressed to FP16 by default.
/// This method saves a model to IR applying all necessary transformations that usually applied
/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16.
/// \param model Model which will be converted to IR representation.
/// \param output_model Path to the output model file, must have extension .xml
/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default)
OPENVINO_API
void save_model(const std::shared_ptr<const ov::Model>& model,
const std::string& output_model,
bool compress_to_fp16 = true);
} // namespace ov

View File

@ -28,6 +28,9 @@
#include "ngraph/rt_info.hpp"
#include "ngraph/util.hpp"
#include "openvino/core/descriptor/tensor.hpp"
#include "transformations/common_optimizations/compress_float_constants.hpp"
#include "transformations/common_optimizations/fused_names_cleanup.hpp"
#include "transformations/common_optimizations/mark_precision_sensitive_shapeof_subgraphs.hpp"
using namespace std;
@ -847,3 +850,15 @@ void ov::serialize(const std::shared_ptr<const ov::Model>& m,
manager.register_pass<ov::pass::Serialize>(xml_path, bin_path, version);
manager.run_passes(std::const_pointer_cast<ov::Model>(m));
}
void ov::save_model(const std::shared_ptr<const ov::Model>& m, const std::string& output_model, bool compress_to_fp16) {
ov::pass::Manager manager;
if (compress_to_fp16) {
manager.register_pass<ov::pass::MarkPrecisionSensitiveConstants>();
manager.register_pass<ov::pass::CompressFloatConstants>();
}
manager.register_pass<ov::pass::FusedNamesCleanup>();
manager.register_pass<ov::pass::Serialize>(output_model, "");
auto cloned = m->clone(); // TODO: Implement on-the-fly compression in pass::Serialize
manager.run_passes(cloned);
}

View File

@ -1025,7 +1025,7 @@ void ngfunction_2_ir(pugi::xml_node& netXml,
}
std::string valid_xml_path(const std::string& path) {
OPENVINO_ASSERT(path.length() > 4, "Path for xml file is to short: \"" + path + "\"");
OPENVINO_ASSERT(path.length() > 4, "Path for xml file is too short: \"" + path + "\"");
const char* const extension = ".xml";
const bool has_xml_extension = path.rfind(extension) == path.size() - std::strlen(extension);

View File

@ -70,6 +70,12 @@ TEST_P(SerializationTest, SerializeHelper) {
});
}
TEST_P(SerializationTest, SaveModel) {
CompareSerialized([this](const std::shared_ptr<ov::Model>& m) {
ov::save_model(m, m_out_xml_path, false);
});
}
INSTANTIATE_TEST_SUITE_P(
IRSerialization,
SerializationTest,