[MO][PDPD] Convert Paddle models from memory (#17005)

* support convert_model in paddle runtime * add convert runtime paddle test * fix a pylint error * fix ci error * skip test_mo_convert_paddle.py # Ticket: 95904 * auto remove tmp file * add docs for PDFE * enable paddle mo test in ci * fix docs * fix docs * fix the docs
2023-05-20 22:13:21 +08:00
parent 1b24e15e1e
commit 0b72998631
9 changed files with 315 additions and 6 deletions
--- a/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
+++ b/docs/MO_DG/prepare_model/convert_model/Convert_Model_From_Paddle.md
@@ -2,18 +2,103 @@

@sphinxdirective

+This page provides general instructions on how to convert a model from a PaddlePaddle format to the OpenVINO IR format using Model Optimizer. The instructions are different depending on PaddlePaddle model format.
+
+Converting PaddlePaddle Model Inference Format
+##############################################
+
+PaddlePaddle inference model includes ``.pdmodel`` (storing model structure) and ``.pdiparams`` (storing model weight). For how to export PaddlePaddle inference model, please refer to the `Exporting PaddlePaddle Inference Model <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/beginner/model_save_load_cn.html>`__ Chinese guide.
+
 To convert a PaddlePaddle model, use the ``mo`` script and specify the path to the input ``.pdmodel`` model file:

 .. code-block:: sh

  mo --input_model <INPUT_MODEL>.pdmodel

-**For example,** this command converts a yolo v3 PaddlePaddle network to OpenVINO IR network:
+**For example**, this command converts a yolo v3 PaddlePaddle network to OpenVINO IR network:

 .. code-block:: sh

  mo --input_model=yolov3.pdmodel --input=image,im_shape,scale_factor --input_shape=[1,3,608,608],[1,2],[1,2] --reverse_input_channels --output=save_infer_model/scale_0.tmp_1,save_infer_model/scale_1.tmp_1

+Converting PaddlePaddle Model From Memory Using Python API
+##########################################################
+
+MO Python API supports passing PaddlePaddle models directly from memory.
+
+Following PaddlePaddle model formats are supported:
+
+* ``paddle.hapi.model.Model``
+* ``paddle.fluid.dygraph.layers.Layer``
+* ``paddle.fluid.executor.Executor``
+
+Converting certain PaddlePaddle models may require setting ``example_input`` or ``example_output``. Below examples show how to execute such the conversion.
+
+* Example of converting ``paddle.hapi.model.Model`` format model:
+
+  .. code-block:: python
+
+    import paddle
+    from openvino.tools.mo import convert_model
+    
+    # create a paddle.hapi.model.Model format model
+    resnet50 = paddle.vision.models.resnet50()
+    x = paddle.static.InputSpec([1,3,224,224], 'float32', 'x')
+    y = paddle.static.InputSpec([1,1000], 'float32', 'y')
+
+    model = paddle.Model(resnet50, x, y)
+
+    # convert to OpenVINO IR format
+    ov_model = convert_model(model)
+
+    # optional: serialize OpenVINO IR to *.xml & *.bin
+    from openvino.runtime import serialize
+    serialize(ov_model, "ov_model.xml", "ov_model.bin")
+
+* Example of converting ``paddle.fluid.dygraph.layers.Layer`` format model:
+
+  ``example_input`` is required while ``example_output`` is optional, which accept the following formats:
+
+  ``list`` with tensor(``paddle.Tensor``) or InputSpec(``paddle.static.input.InputSpec``)
+
+  .. code-block:: python
+  
+    import paddle
+    from openvino.tools.mo import convert_model
+  
+    # create a paddle.fluid.dygraph.layers.Layer format model
+    model = paddle.vision.models.resnet50()
+    x = paddle.rand([1,3,224,224])
+
+    # convert to OpenVINO IR format
+    ov_model = convert_model(model, example_input=[x])
+
+* Example of converting ``paddle.fluid.executor.Executor`` format model:
+
+  ``example_input`` and ``example_output`` are required, which accept the following formats:
+
+  ``list`` or ``tuple`` with variable(``paddle.static.data``)
+
+  .. code-block:: python
+
+    import paddle
+    from openvino.tools.mo import convert_model
+
+    paddle.enable_static()
+
+    # create a paddle.fluid.executor.Executor format model
+    x = paddle.static.data(name="x", shape=[1,3,224])
+    y = paddle.static.data(name="y", shape=[1,3,224])
+    relu = paddle.nn.ReLU()
+    sigmoid = paddle.nn.Sigmoid()
+    y = sigmoid(relu(x))
+
+    exe = paddle.static.Executor(paddle.CPUPlace())
+    exe.run(paddle.static.default_startup_program())
+
+    # convert to OpenVINO IR format
+    ov_model = convert_model(exe, example_input=[x], example_output=[y])
+
 Supported PaddlePaddle Layers
 #############################

--- a/tests/layer_tests/mo_python_api_tests/test_mo_convert_paddle.py
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_convert_paddle.py
@@ -0,0 +1,103 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+from common.mo_convert_test_class import CommonMOConvertTest
+
+import openvino.runtime as ov
+from openvino.runtime import PartialShape, Model
+
+def make_pd_dynamic_graph_model():
+    import paddle
+    paddle.disable_static()
+    class NeuralNetwork(paddle.nn.Layer):
+        def __init__(self):
+            super(NeuralNetwork, self).__init__()
+            self.relu_sigmoid_stack = paddle.nn.Sequential(
+                paddle.nn.ReLU(),
+                paddle.nn.Sigmoid())
+        def forward(self, input):
+            return self.relu_sigmoid_stack(input)
+    return NeuralNetwork()
+
+def make_pd_static_graph_model(shape):
+    import paddle
+    import paddle.nn
+
+    paddle.enable_static()
+
+    x = paddle.static.data(name="x", shape=shape)
+    y = paddle.static.data(name="y", shape=shape)
+    relu = paddle.nn.ReLU()
+    sigmoid = paddle.nn.Sigmoid()
+    y = sigmoid(relu(x))
+    
+    exe = paddle.static.Executor(paddle.CPUPlace())
+    exe.run(paddle.static.default_startup_program())
+    return exe, x, y
+
+def make_pd_hapi_graph_model(shape):
+    import paddle
+    paddle.disable_static()
+    from paddle.static import InputSpec
+    net = paddle.nn.Sequential(
+        paddle.nn.ReLU(),
+        paddle.nn.Sigmoid())
+    input = InputSpec(shape, 'float32', 'x')
+    label = InputSpec(shape, 'float32', 'label')
+    
+    model = paddle.Model(net, input, label)
+    optim = paddle.optimizer.SGD(learning_rate=1e-3,
+        parameters=model.parameters())
+    model.prepare(optim, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy())
+    return model
+
+def make_ref_graph_model(shape, dtype=np.float32):
+    shape = PartialShape(shape)
+    param = ov.opset8.parameter(shape, name="x", dtype=dtype)
+
+    relu = ov.opset8.relu(param)
+    sigm = ov.opset8.sigmoid(relu)
+
+    model = Model([sigm], [param], "test")
+    return model
+
+def create_paddle_dynamic_module(tmp_dir):
+    import paddle
+    shape = [2,3,4]
+    pd_model = make_pd_dynamic_graph_model()
+    ref_model = make_ref_graph_model(shape)
+
+    x = paddle.static.InputSpec(shape=shape, dtype='float32', name='x')
+    return pd_model, ref_model, {"example_input": [x]}
+
+def create_paddle_static_module(tmp_dir):
+    shape = [2,3,4]
+    pd_model, x, y = make_pd_static_graph_model(shape)
+    ref_model = make_ref_graph_model(shape)
+
+    return pd_model, ref_model, {"example_input": [x], "example_output": [y]}
+
+def create_paddle_hapi_module(tmp_dir):
+    shape = [2,3,4]
+    pd_model = make_pd_hapi_graph_model(shape)
+    ref_model = make_ref_graph_model(shape)
+
+    return pd_model, ref_model, {}
+
+class TestMoConvertPaddle(CommonMOConvertTest):
+    test_data = [
+        create_paddle_dynamic_module,
+        create_paddle_static_module,
+        create_paddle_hapi_module
+    ]
+    @pytest.mark.skip(reason="Paddlepaddle has incompatible protobuf. Ticket: 95904")
+    @pytest.mark.parametrize("create_model", test_data)
+    def test_mo_import_from_memory_paddle_fe(self, create_model, ie_device, precision, ir_version,
+                                             temp_dir):
+        fw_model, graph_ref, mo_params = create_model(temp_dir)
+        test_params = {'input_model': fw_model, 'use_new_frontend': True}
+        if mo_params is not None:
+            test_params.update(mo_params)
+        self._test_by_ref_graph(temp_dir, test_params, graph_ref, compare_tensor_names=False)
--- a/tests/layer_tests/mo_python_api_tests/test_mo_help.py
+++ b/tests/layer_tests/mo_python_api_tests/test_mo_help.py
@@ -24,7 +24,7 @@ class TestSubprocessMoConvert(unittest.TestCase):

        # We don't expect PyTorch specific parameters to be in help message of the MO tool.
        for group in mo_convert_params:
-            if group == 'Pytorch-specific parameters:':
+            if group == 'Pytorch-specific parameters:' or group == 'PaddlePaddle-specific parameters:':
                continue
            for param_name in group:
                assert param_name in mo_output
@@ -36,4 +36,4 @@ class TestSubprocessMoConvert(unittest.TestCase):

        for group in mo_convert_params:
            for param_name in group:
-                assert param_name in mo_output
+                assert param_name in mo_output
--- a/tools/mo/.pylintrc
+++ b/tools/mo/.pylintrc
@@ -202,7 +202,7 @@ ignore-mixin-members=yes
 # (useful for modules/projects where namespaces are manipulated during runtime
 # and thus existing member attributes cannot be deduced by static analysis. It
 # supports qualified module names, as well as Unix pattern matching.
-ignored-modules=flask_sqlalchemy,app.extensions.flask_sqlalchemy,distutils,openvino,torch
+ignored-modules=flask_sqlalchemy,app.extensions.flask_sqlalchemy,distutils,openvino,torch,paddle

 # List of class names for which member attributes should not be checked (useful
 # for classes with dynamically set attributes). This supports the use of
--- a/tools/mo/automation/package_BOM.txt
+++ b/tools/mo/automation/package_BOM.txt
@@ -836,6 +836,7 @@ openvino/tools/mo/moc_frontend/analysis.py
 openvino/tools/mo/moc_frontend/check_config.py
 openvino/tools/mo/moc_frontend/extractor.py
 openvino/tools/mo/moc_frontend/layout_utils.py
+openvino/tools/mo/moc_frontend/paddle_frontend_utils.py
 openvino/tools/mo/moc_frontend/pipeline.py
 openvino/tools/mo/moc_frontend/pytorch_frontend_utils.py
 openvino/tools/mo/moc_frontend/serialize.py
--- a/tools/mo/openvino/tools/mo/convert.py
+++ b/tools/mo/openvino/tools/mo/convert.py
@@ -43,6 +43,10 @@ def convert_model(
        progress: bool = False,
        stream_output: bool = False,

+        # PaddlePaddle-specific parameters:
+        # example_input: Any = None, which can be shared with PyTorch-specific parameters
+        example_output: Any = None,
+
        # PyTorch-specific parameters:
        example_input: Any = None,

@@ -96,6 +100,11 @@ def convert_model(

            Supported formats of input model:

+            PaddlePaddle
+            paddle.hapi.model.Model
+            paddle.fluid.dygraph.layers.Layer
+            paddle.fluid.executor.Executor
+
            PyTorch
            torch.nn.Module
            torch.jit.ScriptModule
@@ -261,6 +270,12 @@ def convert_model(
        :param stream_output:
            Switch model conversion progress display to a multiline mode.

+    PaddlePaddle-specific parameters:
+        :param example_input:
+            Sample of model input in original framework. For PaddlePaddle it can be Paddle Variable.
+        :param example_output:
+            Sample of model output in original framework. For PaddlePaddle it can be Paddle Variable.
+
    PyTorch-specific parameters:
        :param example_input:
            Sample of model input in original framework. For PyTorch it can be torch.Tensor.
--- a/tools/mo/openvino/tools/mo/convert_impl.py
+++ b/tools/mo/openvino/tools/mo/convert_impl.py
@@ -50,6 +50,7 @@ from openvino.tools.mo.utils.telemetry_utils import send_params_info, send_frame
 from openvino.tools.mo.utils.versions_checker import get_environment_setup  # pylint: disable=no-name-in-module
 from openvino.tools.mo.moc_frontend.check_config import legacy_extensions_used
 from openvino.tools.mo.moc_frontend.pytorch_frontend_utils import get_pytorch_decoder
+from openvino.tools.mo.moc_frontend.paddle_frontend_utils import paddle_frontend_converter
 from openvino.tools.mo.moc_frontend.shape_utils import parse_input_shapes, get_static_shape

 # pylint: disable=no-name-in-module,import-error
@@ -577,6 +578,11 @@ def check_model_object(argv):
    if isinstance(model, io.BytesIO):
        return 'onnx'

+    if 'paddle' in sys.modules:
+        import paddle
+        if isinstance(model, paddle.hapi.model.Model) or isinstance(model, paddle.fluid.dygraph.layers.Layer) or isinstance(model, paddle.fluid.executor.Executor):
+            return "paddle"
+
    raise Error('Unknown model type: {}'.format(type(model)))


@@ -875,7 +881,19 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args, python_api_us

                decoder = get_pytorch_decoder(args['input_model'], parse_input_shapes(args), example_inputs, args.get("input"))
                args['input_model'] = decoder
-                args["framework"] = "pytorch"
+                args['framework'] = model_framework
+            if model_framework == "paddle":
+                example_inputs = None
+                if 'example_input' in args and args['example_input'] is not None:
+                    example_inputs = args['example_input']
+
+                example_outputs = None
+                if 'example_output' in args and args['example_output'] is not None:
+                    example_outputs = args['example_output']
+                paddle_runtime_converter = paddle_frontend_converter(args['input_model'], example_inputs, example_outputs)
+                pdmodel = paddle_runtime_converter.convert_paddle_to_pdmodel()
+                args['input_model'] = pdmodel
+                args['framework'] = model_framework

        update_args_for_saved_model_dir(args)

@@ -914,6 +932,10 @@ def _convert(cli_parser: argparse.ArgumentParser, framework, args, python_api_us

        ov_model, legacy_path = driver(argv, {"conversion_parameters": non_default_params})

+        if inp_model_is_object and model_framework == "paddle":
+            if paddle_runtime_converter:
+                paddle_runtime_converter.destroy()
+
        # add MO meta data to model
        ov_model.set_rt_info(VersionChecker().get_mo_version(), "MO_version")
        ov_model.set_rt_info(get_rt_version(), "Runtime_version")
--- a/tools/mo/openvino/tools/mo/moc_frontend/paddle_frontend_utils.py
+++ b/tools/mo/openvino/tools/mo/moc_frontend/paddle_frontend_utils.py
@@ -0,0 +1,82 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+import tempfile
+
+class paddle_frontend_converter:
+    def __init__(self, model, inputs=None, outputs=None):
+        self.model = model
+        self.inputs = inputs
+        self.outputs = outputs
+        self.tmp = None
+        self.model_name = None
+        self.pdmodel = None
+        self.pdiparams = None
+        self.pdiparams_info = None
+        self.is_generated = False
+
+    def destroy(self):
+        # close tmp file
+        if isinstance(self.tmp, tempfile._TemporaryFileWrapper):
+            self.tmp.close()
+
+        # remove the *.pdmodel
+        if os.path.exists(self.pdmodel):
+            os.remove(self.pdmodel)
+
+        # remove the *.pdiparams
+        if os.path.exists(self.pdiparams):
+            os.remove(self.pdiparams)
+
+        # remove the *.pdiparams.info
+        if os.path.exists(self.pdiparams_info):
+            os.remove(self.pdiparams_info)
+        
+    def convert_paddle_to_pdmodel(self):
+        '''
+            There are three paddle model categories:
+            - High Level API: is a wrapper for dynamic or static model, use `self.save` to serialize
+            - Dynamic Model: use `paddle.jit.save` to serialize
+            - Static Model: use `paddle.static.save_inference_model` to serialize
+        '''
+        try:
+            self.tmp = tempfile.NamedTemporaryFile(delete=True)
+            self.model_name = self.tmp.name
+            self.pdmodel = "{}.pdmodel".format(self.model_name)
+            self.pdiparams = "{}.pdiparams".format(self.model_name)
+            self.pdiparams_info = "{}.pdiparams.info".format(self.model_name)
+
+            import paddle
+            if isinstance(self.model, paddle.hapi.model.Model):
+                self.model.save(self.model_name, False)
+            else:
+                if self.inputs is None:
+                    raise RuntimeError(
+                        "Saving inference model needs 'inputs' before saving. Please specify 'example_input'"
+                    )
+                if isinstance(self.model, paddle.fluid.dygraph.layers.Layer):
+                    with paddle.fluid.framework._dygraph_guard(None):
+                        paddle.jit.save(self.model, self.model_name, input_spec=self.inputs, output_spec=self.outputs)
+                elif isinstance(self.model, paddle.fluid.executor.Executor):
+                    if self.outputs is None:
+                        raise RuntimeError(
+                            "Model is static. Saving inference model needs 'outputs' before saving. Please specify 'example_output' for this model"
+                        )
+                    paddle.static.save_inference_model(self.model_name, self.inputs, self.outputs, self.model)
+                else:
+                    raise RuntimeError(
+                        "Conversion just support paddle.hapi.model.Model, paddle.fluid.dygraph.layers.Layer and paddle.fluid.executor.Executor"
+                    )
+
+            if not os.path.exists(self.pdmodel):
+                print("Failed generating paddle inference format model")
+                sys.exit(1)
+
+            self.is_generated = True
+            return self.pdmodel
+        finally:
+            # close tmp file
+            if isinstance(self.tmp, tempfile._TemporaryFileWrapper):
+                self.tmp.close()
--- a/tools/mo/unit_tests/mo/utils/cli_parser_test.py
+++ b/tools/mo/unit_tests/mo/utils/cli_parser_test.py
@@ -2052,6 +2052,7 @@ class TestConvertModelParamsParsing(unittest.TestCase):
            'MXNet-specific parameters:': {'input_symbol', 'nd_prefix_name', 'pretrained_model_name', 'save_params_from_nd',
                                           'legacy_mxnet_model', 'enable_ssd_gluoncv'},
            'Kaldi-specific parameters:': {'counts', 'remove_output_softmax', 'remove_memory'},
+            'PaddlePaddle-specific parameters:': {'example_input', 'example_output'},
            'PyTorch-specific parameters:': {'example_input'}
        }

@@ -2064,7 +2065,7 @@ class TestConvertModelParamsParsing(unittest.TestCase):
        for group_name, params in ref_params.items():
            for param_name in params:
                param_name = '--' + param_name
-                if group_name == 'PyTorch-specific parameters:':
+                if group_name == 'PyTorch-specific parameters:' or group_name == 'PaddlePaddle-specific parameters:':
                    assert param_name not in cli_parser._option_string_actions
                else:
                    assert param_name in cli_parser._option_string_actions