Fix performance on resnet50 quantized models (#7670)

* Fix performance on resnet50 quantized models LP transformations won't work on the model unless the last 4 inputs to FakeQuantize are constants. In order to meet that requirement, we need to perform constant folding for those inputs in QuantizeLinear ONNX operator. Ticket: 65375 * fix "Cannot find blob with name: y" exception during onnx_model_quant_conv_linear * remove linking with onnx_ngraph_frontend * fix exclude path
2021-10-13 09:18:37 +02:00 · 2021-10-13 09:18:37 +02:00 · db527fff41
commit db527fff41
parent 0d020974f9
14 changed files with 401 additions and 16 deletions
--- a/cmake/test_model_zoo.cmake
+++ b/cmake/test_model_zoo.cmake
@ -66,6 +66,11 @@ ov_model_convert("${CMAKE_CURRENT_SOURCE_DIR}/ngraph/test"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/ngraph"
                  onnx_out_files)

+set(rel_path "inference-engine/tests/functional/plugin/shared/models")
+ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
+                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/func_tests/models"
+                 ft_out_files)
+
 set(rel_path "inference-engine/tests/functional/inference_engine/onnx_reader")
 ov_model_convert("${OpenVINO_SOURCE_DIR}/${rel_path}"
                 "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/test_model_zoo/onnx_reader"
@ -116,6 +121,7 @@ if(ENABLE_TESTS)
    endif()

    add_custom_target(test_model_zoo DEPENDS ${onnx_out_files}
+                                             ${ft_out_files}
                                             ${ie_onnx_out_files}
                                             ${ie_serialize_out_files}
                                             ${ie_onnx_import_out_files})
--- a/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision/network_helper.hpp
@ -80,11 +80,11 @@ public:

    static std::shared_ptr<Node> swapMultiplyAndAdd(std::shared_ptr<opset1::Add> addAfterMultiply, const int multiplyBranch);

-    static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets);
+    static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::vector<std::shared_ptr<Node>>& targets, bool overrideName = true);

-    static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target);
+    static void copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target, bool overrideName = true);

-    static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target);
+    static void copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target, bool overrideName = true);

    static bool isScalarLike(std::shared_ptr<opset1::Constant> constant);

--- a/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
+++ b/inference-engine/src/low_precision_transformations/src/mat_mul.cpp
@ -108,7 +108,7 @@ bool MatMulTransformation::transform(TransformationContext &context, ngraph::pat

        // multiply by weights: [1, ..., 1, Y] x [Y, Z] => [1, ..., 1, Z]
        const auto newSubConst = NetworkHelper::toScalarIfPossible(fold<opset1::MatMul>(
-            broadcastedConst,
+            foldConvert(broadcastedConst, newMatMul->get_element_type()),
            foldConvert(newMatMul->input_value(1), newMatMul->get_element_type()),
            newMatMul->get_transpose_a(),
            newMatMul->get_transpose_b()));
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@ -302,12 +302,13 @@ std::shared_ptr<Node> NetworkHelper::swapMultiplyAndAdd(std::shared_ptr<opset1::

 void NetworkHelper::copyInfo(
    const std::vector<std::shared_ptr<Node>>& sources,
-    const std::vector<std::shared_ptr<Node>>& targets) {
+    const std::vector<std::shared_ptr<Node>>& targets,
+    bool overrideName) {
    ngraph::copy_runtime_info(sources, targets);

    for (const auto& target : targets) {
        const std::string friendlyName = sources[0]->get_friendly_name();
-        if (!friendlyName.empty()) {
+        if (!friendlyName.empty() && overrideName) {
            target->set_friendly_name(friendlyName);
        }

@ -345,12 +346,12 @@ void NetworkHelper::copyInfo(
    }
 }

-void NetworkHelper::copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target) {
-    copyInfo(sources, std::vector<std::shared_ptr<Node>>{ target });
+void NetworkHelper::copyInfo(const std::vector<std::shared_ptr<Node>>& sources, const std::shared_ptr<Node>& target, bool overrideName) {
+    copyInfo(sources, std::vector<std::shared_ptr<Node>>{ target }, overrideName);
 }

-void NetworkHelper::copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target) {
-    copyInfo(std::vector<std::shared_ptr<Node>>{ source }, std::vector<std::shared_ptr<Node>>{ target });
+void NetworkHelper::copyInfo(const std::shared_ptr<Node>& source, const std::shared_ptr<Node>& target, bool overrideName) {
+    copyInfo(std::vector<std::shared_ptr<Node>>{ source }, std::vector<std::shared_ptr<Node>>{ target }, overrideName);
 }

 bool NetworkHelper::isScalarLike(std::shared_ptr<opset1::Constant> constant) {
@ -657,8 +658,10 @@ std::shared_ptr<opset1::FakeQuantize> NetworkHelper::fuseConvert(const std::shar
        ngraph::op::TemporaryReplaceOutputType(fakeQuantize->input_value(4), element::f32).get(),
        fakeQuantize->get_levels());
    NetworkHelper::setOutDataPrecisionForTypeRelaxed(newFakeQuantize, node->get_output_element_type(0));
+    newFakeQuantize->set_friendly_name(node->get_friendly_name());
    replace_node(node->shared_from_this(), newFakeQuantize);
-    NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize);
+    bool overrideName = false;
+    NetworkHelper::copyInfo(fakeQuantize, newFakeQuantize, overrideName);

    return newFakeQuantize;
 }
--- a/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/cpu/CMakeLists.txt
@ -16,7 +16,7 @@ if (NGRAPH_ONNX_FRONTEND_ENABLE)
    list(APPEND LINK_LIBRARIES onnx_custom_op)
    list(APPEND DEPENDENCIES template_extension onnx_custom_op)
 else()
-    set(EXCLUDED_SOURCE_PATHS "${CMAKE_CURRENT_SOURCE_DIR}/extension")
+    set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/extension ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
 endif()

 addIeTargetTest(
--- a/inference-engine/tests/functional/plugin/cpu/onnx/quantized_models_tests.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/onnx/quantized_models_tests.cpp
@ -0,0 +1,12 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include "onnx/quantized_models_tests.hpp"
+
+using namespace ONNXTestsDefinitions;
+
+INSTANTIATE_TEST_SUITE_P(ONNXQuantizedModels, QuantizedModelsTests,
+                        ::testing::Values(CommonTestUtils::DEVICE_CPU),
+                        QuantizedModelsTests::getTestCaseName);
--- a/inference-engine/tests/functional/plugin/shared/CMakeLists.txt
+++ b/inference-engine/tests/functional/plugin/shared/CMakeLists.txt
@ -6,6 +6,14 @@ set(TARGET_NAME funcSharedTests)

 set(PUBLIC_HEADERS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/include")

+set(DEPENDENCIES inference_engine mock_engine HeteroPlugin MultiDevicePlugin)
+if (NGRAPH_ONNX_FRONTEND_ENABLE)
+    list(APPEND DEPENDENCIES test_model_zoo)
+    list(APPEND DEFINES TEST_MODELS="${TEST_MODEL_ZOO}/func_tests/models/")
+else()
+    set(EXCLUDED_SOURCE_PATHS ${CMAKE_CURRENT_SOURCE_DIR}/src/onnx)
+endif()
+
 addIeTarget(
        NAME ${TARGET_NAME}
        TYPE STATIC
@ -15,6 +23,8 @@ addIeTarget(
        ADD_CPPLINT
        DEVELOPER_PACKAGE
            inference_engine_tests
+        EXCLUDED_SOURCE_PATHS ${EXCLUDED_SOURCE_PATHS}
+        DEFINES ${DEFINES}
        INCLUDES
            PUBLIC
                ${PUBLIC_HEADERS_DIR}
@ -29,10 +39,7 @@ addIeTarget(
                openvino::util
                inference_engine_transformations
        DEPENDENCIES
-            inference_engine
-            mock_engine
-            HeteroPlugin
-            MultiDevicePlugin
+            ${DEPENDENCIES}
 )

 # CVS-55376
--- a/inference-engine/tests/functional/plugin/shared/include/onnx/quantized_models_tests.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/onnx/quantized_models_tests.hpp
@ -0,0 +1,22 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include "shared_test_classes/base/layer_test_utils.hpp"
+
+namespace ONNXTestsDefinitions {
+
+class QuantizedModelsTests : public testing::WithParamInterface<std::string>,
+                            virtual public LayerTestsUtils::LayerTestsCommon {
+public:
+    static std::string getTestCaseName(const testing::TestParamInfo<std::string>& obj);
+
+protected:
+    void SetUp() override;
+    void runModel(const char* model, const std::unordered_map<std::string, ngraph::element::Type_t>& expected_layer_types);
+};
+
+} // namespace ONNXTestsDefinitions
--- a/inference-engine/tests/functional/plugin/shared/models/max_pool_fq.prototxt
+++ b/inference-engine/tests/functional/plugin/shared/models/max_pool_fq.prototxt
@ -0,0 +1,132 @@
+ir_version: 6
+producer_name: "pytorch"
+producer_version: "1.8"
+graph {
+  node {
+    output: "884"
+    name: "Constant_10"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 1
+        raw_data: "\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "885"
+    name: "Constant_11"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 1
+        data_type: 1
+        raw_data: "6\241\311@"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "883"
+    input: "884"
+    input: "885"
+    input: "884"
+    input: "885"
+    output: "886"
+    name: "FakeQuantize_12"
+    op_type: "FakeQuantize"
+    attribute {
+      name: "levels"
+      i: 256
+      type: INT
+    }
+    domain: "org.openvinotoolkit"
+  }
+  node {
+    input: "886"
+    output: "887"
+    name: "MaxPool_13"
+    op_type: "MaxPool"
+    attribute {
+      name: "ceil_mode"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "kernel_shape"
+      ints: 3
+      ints: 3
+      type: INTS
+    }
+    attribute {
+      name: "pads"
+      ints: 1
+      ints: 1
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "strides"
+      ints: 2
+      ints: 2
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "883"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 64
+          }
+          dim {
+            dim_value: 112
+          }
+          dim {
+            dim_value: 112
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "887"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 64
+          }
+          dim {
+            dim_value: 56
+          }
+          dim {
+            dim_value: 56
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 10
+}
+opset_import {
+  domain: "org.openvinotoolkit"
+  version: 1
+}
--- a/inference-engine/tests/functional/plugin/shared/models/max_pool_qdq.prototxt
+++ b/inference-engine/tests/functional/plugin/shared/models/max_pool_qdq.prototxt
@ -0,0 +1,126 @@
+ir_version: 6
+producer_name: "pytorch"
+producer_version: "1.8"
+graph {
+  node {
+    output: "886"
+    name: "Constant_12"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 1
+        raw_data: "\242k\312<"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "887"
+    name: "Constant_13"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 2
+        raw_data: "\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "885"
+    input: "886"
+    input: "887"
+    output: "888"
+    name: "QuantizeLinear_14"
+    op_type: "QuantizeLinear"
+  }
+  node {
+    input: "888"
+    input: "886"
+    input: "887"
+    output: "889"
+    name: "DequantizeLinear_15"
+    op_type: "DequantizeLinear"
+  }
+  node {
+    input: "889"
+    output: "890"
+    name: "MaxPool_16"
+    op_type: "MaxPool"
+    attribute {
+      name: "ceil_mode"
+      i: 0
+      type: INT
+    }
+    attribute {
+      name: "kernel_shape"
+      ints: 3
+      ints: 3
+      type: INTS
+    }
+    attribute {
+      name: "pads"
+      ints: 1
+      ints: 1
+      ints: 1
+      ints: 1
+      type: INTS
+    }
+    attribute {
+      name: "strides"
+      ints: 2
+      ints: 2
+      type: INTS
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "885"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 64
+          }
+          dim {
+            dim_value: 112
+          }
+          dim {
+            dim_value: 112
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "890"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 1
+          }
+          dim {
+            dim_value: 64
+          }
+          dim {
+            dim_value: 56
+          }
+          dim {
+            dim_value: 56
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 10
+}
--- a/inference-engine/tests/functional/plugin/shared/src/onnx/quantized_models_tests.cpp
+++ b/inference-engine/tests/functional/plugin/shared/src/onnx/quantized_models_tests.cpp
@ -0,0 +1,49 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <file_utils.h>
+#include "onnx/quantized_models_tests.hpp"
+
+namespace ONNXTestsDefinitions {
+
+std::string QuantizedModelsTests::getTestCaseName(const testing::TestParamInfo<std::string>& obj) {
+    std::string targetDevice = obj.param;
+    std::ostringstream result;
+    result << "device=" << targetDevice;
+    return result.str();
+}
+
+void QuantizedModelsTests::SetUp() {
+    targetDevice = this->GetParam();
+}
+
+static std::string getModelFullPath(const char* path) {
+    return FileUtils::makePath<char>(TEST_MODELS, path);
+}
+
+void QuantizedModelsTests::runModel(const char* model, const std::unordered_map<std::string, ngraph::element::Type_t>& expected_layer_types) {
+    auto ie = getCore();
+    auto network = ie->ReadNetwork(getModelFullPath(model));
+    function = network.getFunction();
+    Run();
+    auto runtime_function = executableNetwork.GetExecGraphInfo().getFunction();
+    int ops_found = 0;
+    for (const auto& node : runtime_function->get_ordered_ops()) {
+        const auto& name = node->get_friendly_name();
+        if (expected_layer_types.count(name)) {
+            ops_found++;
+            ASSERT_EQ(expected_layer_types.at(name), node->get_element_type());
+        }
+    }
+    ASSERT_GT(ops_found, 0);
+}
+
+TEST_P(QuantizedModelsTests, MaxPoolQDQ) {
+    runModel("max_pool_qdq.onnx", {{"890_original", ngraph::element::u8}});
+}
+
+TEST_P(QuantizedModelsTests, MaxPoolFQ) {
+    runModel("max_pool_fq.onnx", {{"887_original", ngraph::element::u8}});
+}
+} // namespace ONNXTestsDefinitions
--- a/ngraph/core/src/op/equal.cpp
+++ b/ngraph/core/src/op/equal.cpp
@ -34,6 +34,8 @@ bool evaluate_equal(const HostTensorPtr& arg0,
    out->set_broadcast(broadcast_spec, arg0, arg1, element::boolean);
    switch (arg0->get_element_type()) {
        NGRAPH_TYPE_CASE(evaluate_equal, boolean, arg0, arg1, out, broadcast_spec);
+        NGRAPH_TYPE_CASE(evaluate_equal, i8, arg0, arg1, out, broadcast_spec);
+        NGRAPH_TYPE_CASE(evaluate_equal, u8, arg0, arg1, out, broadcast_spec);
        NGRAPH_TYPE_CASE(evaluate_equal, i32, arg0, arg1, out, broadcast_spec);
        NGRAPH_TYPE_CASE(evaluate_equal, i64, arg0, arg1, out, broadcast_spec);
        NGRAPH_TYPE_CASE(evaluate_equal, u32, arg0, arg1, out, broadcast_spec);
@ -72,6 +74,8 @@ bool op::v1::Equal::has_evaluate() const {
    NGRAPH_OP_SCOPE(v1_Equal_has_evaluate);
    switch (get_input_element_type(0)) {
    case ngraph::element::boolean:
+    case ngraph::element::i8:
+    case ngraph::element::u8:
    case ngraph::element::i32:
    case ngraph::element::i64:
    case ngraph::element::u32:
--- a/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp
+++ b/ngraph/frontend/onnx/frontend/src/op/quantize_linear.cpp
@ -88,9 +88,13 @@ std::tuple<std::shared_ptr<ngraph::Node>, std::shared_ptr<ngraph::Node>> get_inp
    input_low =
        std::make_shared<default_opset::Multiply>(y_scale,
                                                  std::make_shared<default_opset::Subtract>(output_low, zero_point));
+    if (auto constant = get_constant_from_source(input_low))
+        input_low = constant;
    input_high =
        std::make_shared<default_opset::Multiply>(y_scale,
                                                  std::make_shared<default_opset::Subtract>(output_high, zero_point));
+    if (auto constant = get_constant_from_source(input_high))
+        input_high = constant;

    return std::make_tuple(input_low, input_high);
 }
--- a/ngraph/test/constant_folding.cpp
+++ b/ngraph/test/constant_folding.cpp
@ -444,6 +444,26 @@ TEST(constant_folding, const_convert) {
        vector<int64_t> expected{1, 2, 3, 4, 5};
        test_const_convert(in, expected);
    }
+    {
+        vector<int8_t> in{-128, -2, 0, 1, 3, 127};
+        vector<float> expected{-128, -2, 0, 1, 3, 127};
+        test_const_convert(in, expected);
+    }
+    {
+        vector<uint8_t> in{0, 1, 3, 127, 255};
+        vector<float> expected{0, 1, 3, 127, 255};
+        test_const_convert(in, expected);
+    }
+    {
+        vector<float> in{-300, -128, -1, 0, 33, 127, 128};
+        vector<int8_t> expected{-44, -128, -1, 0, 33, 127, -128};
+        test_const_convert(in, expected);
+    }
+    {
+        vector<float> in{0, 33, 127, 255, 256};
+        vector<uint8_t> expected{0, 33, 127, 255, 0};
+        test_const_convert(in, expected);
+    }
 }

 TEST(constant_folding, shape_of_v0) {