From 9dbc71063cb5ce3426156f73b69db1fffc102a64 Mon Sep 17 00:00:00 2001
From: Maxim Vafin <maxim.vafin@intel.com>
Date: Fri, 30 Jul 2021 19:42:55 +0300
Subject: [PATCH] Improve model cutting (#6835)

* Improve model cutting

* If part of model is cut, conversion rules must not run on it

* Add incorrect cut test

* Fix code style

* Fix test

* Fix codestyle

* Do not change op places graph on convert

* Assert output has producer

* Fix code style
---
 ngraph/frontend/paddlepaddle/src/frontend.cpp | 37 ++++-----
 ngraph/frontend/paddlepaddle/src/model.cpp    | 76 ++++++++++++++++++-
 .../paddlepaddle/incorrect_cut_model.cpp      | 35 +++++++++
 3 files changed, 129 insertions(+), 19 deletions(-)
 create mode 100644 ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp
diff --git a/ngraph/frontend/paddlepaddle/src/frontend.cpp b/ngraph/frontend/paddlepaddle/src/frontend.cpp
index ebd4b0e329b..69904ea7206 100644
--- a/ngraph/frontend/paddlepaddle/src/frontend.cpp
+++ b/ngraph/frontend/paddlepaddle/src/frontend.cpp
@@ -51,12 +51,15 @@ namespace ngraph
                     for (const auto& in_tensor_name : input_port.arguments())
                     {
                         auto node_it = nodes.find(in_tensor_name);
-                        if (node_it != nodes.end())
-                            named_inputs[input_port.parameter()].push_back(node_it->second);
-                        else
-                            // return empty map when not all inputs exist. It usually means that
-                            // these nodes are not used because model inputs were overwritten
-                            return NamedOutputs();
+                        // general check, because in case of error partial conversion should fail
+                        FRONT_END_GENERAL_CHECK(
+                            node_it != nodes.end(),
+                            "Input ",
+                            in_tensor_name,
+                            " for node with type ",
+                            op_desc.type(),
+                            " wasn't found. It may happen if model was cut incorrectly.");
+                        named_inputs[input_port.parameter()].push_back(node_it->second);
                     }
                 }
 
@@ -76,17 +79,16 @@ namespace ngraph
                     for (const auto& in_tensor_name : input_port.arguments())
                     {
                         auto it = nodes.find(in_tensor_name);
-                        if (it != nodes.end())
-                        {
-                            inputs_vector.push_back(it->second);
-                            inputs_names.push_back(in_tensor_name);
-                        }
-                        else
-                        {
-                            // return empty map when not all inputs exist. It usually means that
-                            // these nodes are not used because model inputs were overwritten
-                            return named_outputs;
-                        }
+                        // general check, because in case of error partial conversion should fail
+                        FRONT_END_GENERAL_CHECK(
+                            it != nodes.end(),
+                            "Input ",
+                            in_tensor_name,
+                            " for node with type ",
+                            op_desc.type(),
+                            " wasn't found. It may happen if model was cut incorrectly.");
+                        inputs_vector.push_back(it->second);
+                        inputs_names.push_back(in_tensor_name);
                     }
                 }
 
@@ -156,7 +158,6 @@ namespace ngraph
                                                "Cannot open model file.");
                 return &ext_stream;
             }
-
         } // namespace pdpd
 
         std::shared_ptr<Function> FrontEndPDPD::convert_each_node(
diff --git a/ngraph/frontend/paddlepaddle/src/model.cpp b/ngraph/frontend/paddlepaddle/src/model.cpp
index 0c212e29acf..582fbf64cb8 100644
--- a/ngraph/frontend/paddlepaddle/src/model.cpp
+++ b/ngraph/frontend/paddlepaddle/src/model.cpp
@@ -7,7 +7,10 @@
 #include <paddlepaddle_frontend/place.hpp>
 
 #include <fstream>
+#include <queue>
+
 #include <ngraph/opsets/opset7.hpp>
+
 #include "decoder.hpp"
 #include "framework.pb.h"
 #include "node_context.hpp"
@@ -44,7 +47,7 @@ namespace ngraph
             void setElementType(Place::Ptr place, const ngraph::element::Type&);
             void setTensorValue(Place::Ptr place, const void* value);
 
-            std::vector<std::shared_ptr<OpPlacePDPD>> getOpPlaces() const { return m_op_places; }
+            std::vector<std::shared_ptr<OpPlacePDPD>> getOpPlaces() const;
             std::map<std::string, std::shared_ptr<TensorPlacePDPD>> getVarPlaces() const
             {
                 return m_var_places;
@@ -59,6 +62,7 @@ namespace ngraph
             template <typename T>
             void loadConsts(const std::basic_string<T>& folder_with_weights,
                             std::istream* weight_stream);
+            std::vector<std::shared_ptr<OpPlacePDPD>> determine_cut_nodes() const;
 
             std::vector<std::shared_ptr<OpPlacePDPD>> m_op_places;
             std::map<std::string, std::shared_ptr<TensorPlacePDPD>> m_var_places;
@@ -67,6 +71,9 @@ namespace ngraph
             std::vector<Place::Ptr> m_inputs;
             std::vector<Place::Ptr> m_outputs;
             std::map<pdpd::TensorName, Output<Node>> m_tensor_values;
+
+            // shows if some nodes might be deleted from graph
+            bool m_graph_changed = false;
         };
 
         void InputModelPDPD::InputModelPDPDImpl::loadPlaces()
@@ -228,6 +235,69 @@ namespace ngraph
 #endif
         } // namespace pdpd
 
+        std::vector<std::shared_ptr<OpPlacePDPD>>
+            InputModelPDPD::InputModelPDPDImpl::getOpPlaces() const
+        {
+            if (m_graph_changed)
+            {
+                return determine_cut_nodes();
+            }
+            return m_op_places;
+        }
+
+        std::vector<std::shared_ptr<OpPlacePDPD>>
+            InputModelPDPD::InputModelPDPDImpl::determine_cut_nodes() const
+        {
+            std::queue<OpPlacePDPD*> q;
+            std::unordered_set<OpPlacePDPD*> visited;
+            std::vector<std::shared_ptr<OpPlacePDPD>> new_op_places;
+            new_op_places.reserve(m_op_places.size());
+            // Marking nodes from outputs to inputs/constants
+            for (const auto& output : getOutputs())
+            {
+                if (!output->is_input())
+                {
+                    auto pdpd_output_op =
+                        std::dynamic_pointer_cast<OpPlacePDPD>(output->get_producing_operation());
+                    PDPD_ASSERT(pdpd_output_op != nullptr,
+                                "Output doesn't have producing operation");
+                    if (!visited.count(pdpd_output_op.get()))
+                    {
+                        visited.insert(pdpd_output_op.get());
+                        q.push(pdpd_output_op.get());
+                        new_op_places.push_back(pdpd_output_op);
+                    }
+                }
+            }
+            while (!q.empty())
+            {
+                auto p_op = q.front();
+                q.pop();
+                for (const auto& map_pair : p_op->get_input_ports())
+                {
+                    for (const auto& port : map_pair.second)
+                    {
+                        auto tensor = port->get_source_tensor();
+                        if (tensor && !tensor->is_input() &&
+                            !m_tensor_values.count(tensor->get_names()[0]))
+                        {
+                            std::shared_ptr<OpPlacePDPD> pdpd_op =
+                                std::dynamic_pointer_cast<OpPlacePDPD>(
+                                    tensor->get_producing_operation());
+                            if (pdpd_op && !visited.count(pdpd_op.get()))
+                            {
+                                visited.insert(pdpd_op.get());
+                                q.push(pdpd_op.get());
+                                new_op_places.push_back(pdpd_op);
+                            }
+                        }
+                    }
+                }
+            }
+            std::reverse(new_op_places.begin(), new_op_places.end());
+            return new_op_places;
+        }
+
         template <typename T>
         void InputModelPDPD::InputModelPDPDImpl::loadConsts(
             const std::basic_string<T>& folder_with_weights, std::istream* weight_stream)
@@ -368,6 +438,7 @@ namespace ngraph
         void InputModelPDPD::InputModelPDPDImpl::overrideAllInputs(
             const std::vector<Place::Ptr>& inputs)
         {
+            m_graph_changed = true;
             m_inputs.clear();
             for (const auto& inp : inputs)
             {
@@ -378,6 +449,7 @@ namespace ngraph
         void InputModelPDPD::InputModelPDPDImpl::overrideAllOutputs(
             const std::vector<Place::Ptr>& outputs)
         {
+            m_graph_changed = true;
             m_outputs.clear();
             for (const auto& outp : outputs)
             {
@@ -388,6 +460,7 @@ namespace ngraph
         void InputModelPDPD::InputModelPDPDImpl::extractSubgraph(
             const std::vector<Place::Ptr>& inputs, const std::vector<Place::Ptr>& outputs)
         {
+            m_graph_changed = true;
             overrideAllInputs(inputs);
             overrideAllOutputs(outputs);
         }
@@ -419,6 +492,7 @@ namespace ngraph
 
         void InputModelPDPD::InputModelPDPDImpl::setTensorValue(Place::Ptr place, const void* value)
         {
+            m_graph_changed = true;
             auto tensor_place = pdpd::castToTensorPlace(place);
             auto p_shape = tensor_place->get_partial_shape();
             auto type = tensor_place->get_element_type();
diff --git a/ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp b/ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp
new file mode 100644
index 00000000000..0065dc2231f
--- /dev/null
+++ b/ngraph/test/frontend/paddlepaddle/incorrect_cut_model.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <frontend_manager/frontend_exceptions.hpp>
+#include <frontend_manager/frontend_manager.hpp>
+
+#include "paddle_utils.hpp"
+#include "utils.hpp"
+
+using namespace ngraph;
+using namespace ngraph::frontend;
+
+TEST(FrontEndIncorrectCutModelTest, test_incorrect_cut)
+{
+    FrontEndManager fem;
+    FrontEnd::Ptr frontEnd;
+    InputModel::Ptr inputModel;
+    ASSERT_NO_THROW(frontEnd = fem.load_by_framework(PADDLE_FE));
+    ASSERT_NE(frontEnd, nullptr);
+    auto model_filename = FrontEndTestUtils::make_model_path(
+        std::string(TEST_PADDLE_MODELS_DIRNAME) +
+        std::string("2in_2out/2in_2out.pdmodel"));
+    ASSERT_NO_THROW(inputModel = frontEnd->load(model_filename));
+    ASSERT_NE(inputModel, nullptr);
+
+    // remove second input
+    inputModel->override_all_inputs({inputModel->get_inputs()[0]});
+
+    std::shared_ptr<ngraph::Function> function;
+    ASSERT_THROW(function = frontEnd->convert(inputModel), GeneralFailure);
+    ASSERT_EQ(function, nullptr);
+}
\ No newline at end of file