DynamicQuantizeLinear op support (#10565)

2022-03-16 18:30:15 +01:00
parent 33d90c5c77
commit 6f64de4c27
11 changed files with 242 additions and 325 deletions
--- a/src/core/tests/models/onnx/quantization/dynamic_quantize_linear.prototxt
+++ b/src/core/tests/models/onnx/quantization/dynamic_quantize_linear.prototxt
--- a/src/core/tests/models/onnx/quantization/dynamic_quantize_linear_3x4.prototxt
+++ b/src/core/tests/models/onnx/quantization/dynamic_quantize_linear_3x4.prototxt
@@ -0,0 +1,67 @@
+ir_version: 5
+producer_name: "backend-test"
+graph {
+  node {
+    input: "x"
+    output: "y"
+    output: "y_scale"
+    output: "y_zero_point"
+    op_type: "DynamicQuantizeLinear"
+  }
+  name: "test_dynamicquantizelinear"
+  input {
+    name: "x"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 3
+          }
+          dim {
+            dim_value: 4
+          }
+        }
+      }
+    }
+  }
+  output {
+    name: "y_scale"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "y_zero_point"
+    type {
+      tensor_type {
+        elem_type: 2
+        shape {
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
--- a/src/core/tests/models/onnx/transformations/dynamic_quantize_linear.prototxt
+++ b/src/core/tests/models/onnx/transformations/dynamic_quantize_linear.prototxt
@@ -1,85 +0,0 @@
-ir_version: 6
-producer_name: "nGraph ONNX Importer"
-graph {
-  node {
-    input: "in1"
-    input: "in2"
-    output: "greater_or_equal_out"
-    op_type: "GreaterOrEqual"
-  }
-  node {
-    input: "greater_or_equal_out"
-    output: "cast_out"
-    op_type: "Cast"
-    attribute {
-      name: "to"
-      i: 6
-      type: INT
-    }
-  }
-  node {
-    input: "cast_out"
-    output: "y"
-    output: "y_scale"
-    output: "y_zero_point"
-    op_type: "DynamicQuantizeLinear"
-  }
-  node {
-    input: "y"
-    output: "abs_y"
-    op_type: "Abs"
-  }
-  input {
-    name: "in1"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "in2"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "abs_y"
-    type {
-      tensor_type {
-        elem_type: 6
-      }
-    }
-  }
-  output {
-    name: "y_scale"
-    type {
-      tensor_type {
-        elem_type: 1
-      }
-    }
-  }
-  output {
-    name: "y_zero_point"
-    type {
-      tensor_type {
-        elem_type: 6
-      }
-    }
-  }
-}
-opset_import {
-  version: 12
-}
--- a/src/core/tests/models/onnx/transformations/reference/dynamic_quantize_linear_expanded.prototxt
+++ b/src/core/tests/models/onnx/transformations/reference/dynamic_quantize_linear_expanded.prototxt
@@ -1,209 +0,0 @@
-ir_version: 6
-producer_name: "nGraph ONNX Importer"
-graph {
-  node {
-    input: "in1"
-    input: "in2"
-    output: "Func_GreaterOrEqual0x5601898ec4f0O1"
-    op_type: "Greater"
-  }
-  node {
-    input: "in1"
-    input: "in2"
-    output: "Func_GreaterOrEqual0x5601898ec4f0O2"
-    op_type: "Equal"
-  }
-  node {
-    input: "Func_GreaterOrEqual0x5601898ec4f0O1"
-    input: "Func_GreaterOrEqual0x5601898ec4f0O2"
-    output: "greater_or_equal_out"
-    op_type: "Or"
-  }
-  node {
-    input: "greater_or_equal_out"
-    output: "cast_out"
-    op_type: "Cast"
-    attribute {
-      name: "to"
-      i: 6
-      type: INT
-    }
-  }
-  node {
-    output: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
-    op_type: "Constant"
-    attribute {
-      name: "value"
-      t {
-        data_type: 1
-        float_data: 0
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    output: "Func_DynamicQuantizeLinear0x560189b38280Q_Max"
-    op_type: "Constant"
-    attribute {
-      name: "value"
-      t {
-        data_type: 1
-        float_data: 255
-      }
-      type: TENSOR
-    }
-  }
-  node {
-    input: "cast_out"
-    output: "Func_DynamicQuantizeLinear0x560189b38280X_Min"
-    op_type: "ReduceMin"
-    attribute {
-      name: "keepdims"
-      i: 0
-      type: INT
-    }
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280X_Min"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
-    output: "Func_DynamicQuantizeLinear0x560189b38280X_Min_Adjusted"
-    op_type: "Min"
-  }
-  node {
-    input: "cast_out"
-    output: "Func_DynamicQuantizeLinear0x560189b38280X_Max"
-    op_type: "ReduceMax"
-    attribute {
-      name: "keepdims"
-      i: 0
-      type: INT
-    }
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280X_Max"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
-    output: "Func_DynamicQuantizeLinear0x560189b38280X_Max_Adjusted"
-    op_type: "Max"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280X_Max_Adjusted"
-    input: "Func_DynamicQuantizeLinear0x560189b38280X_Min_Adjusted"
-    output: "Func_DynamicQuantizeLinear0x560189b38280X_Range"
-    op_type: "Sub"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280X_Range"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Q_Max"
-    output: "Func_DynamicQuantizeLinear0x560189b38280Scale"
-    op_type: "Div"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280X_Min_Adjusted"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Scale"
-    output: "Func_DynamicQuantizeLinear0x560189b38280Min_Scaled"
-    op_type: "Div"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Min_Scaled"
-    output: "Func_DynamicQuantizeLinear0x560189b38280Initial_ZeroPoint_FP"
-    op_type: "Sub"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280Initial_ZeroPoint_FP"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Q_Max"
-    output: "Func_DynamicQuantizeLinear0x560189b38280Clipped_ZeroPoint_FP"
-    op_type: "Clip"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280Clipped_ZeroPoint_FP"
-    output: "Func_DynamicQuantizeLinear0x560189b38280Rounded_ZeroPoint_FP"
-    op_type: "Round"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280Rounded_ZeroPoint_FP"
-    output: "Func_DynamicQuantizeLinear0x560189b38280Zeropoint"
-    op_type: "Cast"
-    attribute {
-      name: "to"
-      i: 2
-      type: INT
-    }
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280Scale"
-    output: "y_scale"
-    op_type: "Identity"
-  }
-  node {
-    input: "Func_DynamicQuantizeLinear0x560189b38280Zeropoint"
-    output: "y_zero_point"
-    op_type: "Identity"
-  }
-  node {
-    input: "cast_out"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Scale"
-    input: "Func_DynamicQuantizeLinear0x560189b38280Zeropoint"
-    output: "y"
-    op_type: "QuantizeLinear"
-  }
-  node {
-    input: "y"
-    output: "abs_y"
-    op_type: "Abs"
-  }
-  input {
-    name: "in1"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  input {
-    name: "in2"
-    type {
-      tensor_type {
-        elem_type: 1
-        shape {
-          dim {
-            dim_value: 5
-          }
-        }
-      }
-    }
-  }
-  output {
-    name: "abs_y"
-    type {
-      tensor_type {
-        elem_type: 6
-      }
-    }
-  }
-  output {
-    name: "y_scale"
-    type {
-      tensor_type {
-        elem_type: 1
-      }
-    }
-  }
-  output {
-    name: "y_zero_point"
-    type {
-      tensor_type {
-        elem_type: 6
-      }
-    }
-  }
-}
-opset_import {
-  version: 12
-}
--- a/src/core/tests/onnx/onnx_import.in.cpp
+++ b/src/core/tests/onnx/onnx_import.in.cpp
@@ -406,18 +406,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input) {
    test_case.run();
 }

-NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function) {
-    const auto function = onnx_import::import_onnx_model(
-        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.onnx"));
-
-    auto test_case = test::TestCase(function, s_device);
-    test_case.add_input<float>({-1.f, -2.1f, -1.3f, -2.5f, -3.34f, -4.f});
-    test_case.add_expected_output<uint8_t>(Shape{6}, {191, 121, 172, 96, 42, 0});
-    test_case.add_expected_output<float>(Shape{}, {0.0156862754f});
-    test_case.add_expected_output<uint8_t>(Shape{}, {255});
-    test_case.run();
-}
-
 NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) {
    const auto function = onnx_import::import_onnx_model(
        file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/greater_or_equal.onnx"));
--- a/src/core/tests/onnx/onnx_import_quant.in.cpp
+++ b/src/core/tests/onnx/onnx_import_quant.in.cpp
@@ -261,6 +261,51 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_ne
    test_case.run();
 }

+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dynamic_quantize_linear) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamic_quantize_linear.onnx"));
+
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>({0.f, 2.f, -3.f, -2.5f, 1.34f, 0.5f});
+    test_case.add_expected_output<uint8_t>(Shape{6}, {153, 255, 0, 25, 221, 179});
+    test_case.add_expected_output<float>(Shape{}, {0.0196078438f});
+    test_case.add_expected_output<uint8_t>(Shape{}, {153});
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dynamic_quantize_linear_255) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamic_quantize_linear.onnx"));
+
+    auto test_case = test::TestCase(function, s_device);
+    test_case.add_input<float>({-1.f, -2.1f, -1.3f, -2.5f, -3.34f, -4.f});
+    test_case.add_expected_output<uint8_t>(Shape{6}, {191, 121, 172, 96, 42, 0});
+    test_case.add_expected_output<float>(Shape{}, {0.0156862754f});
+    test_case.add_expected_output<uint8_t>(Shape{}, {255});
+    test_case.run();
+}
+
+NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dynamic_quantize_linear_3x4) {
+    const auto function = onnx_import::import_onnx_model(
+        file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamic_quantize_linear_3x4.onnx"));
+
+    auto test_case = test::TestCase(function, s_device);
+
+    // don't change style for better readibility
+    // clang-format off
+    test_case.add_input<float>({1.0f,  2.1f, 1.3f, 2.5f,
+                                3.34f, 4.0f, 1.5f, 2.6f,
+                                3.9f,  4.0f, 3.0f, 2.345f});
+    test_case.add_expected_output<uint8_t>(Shape{3, 4}, {  64, 134,  83, 159,
+                                                          213, 255,  96, 166,
+                                                          249, 255, 191, 149});
+    test_case.add_expected_output<float>(Shape{}, {0.0156862754f});
+    test_case.add_expected_output<uint8_t>(Shape{}, {0});
+
+    // clang-format on
+    test_case.run();
+}
+
 NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear) {
    auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.onnx"));

--- a/src/core/tests/onnx/onnx_transformations.cpp
+++ b/src/core/tests/onnx/onnx_transformations.cpp
@@ -85,15 +85,3 @@ NGRAPH_TEST(onnx_transformations, expand_function_softmax_crossentropy) {
    const auto result = compare_onnx_models(editor.model_string(), ref_model, after_func_expand_name_comp);
    EXPECT_TRUE(result.is_ok) << result.error_message;
 }
-
-NGRAPH_TEST(onnx_transformations, expand_function_dynamic_quantize_linear) {
-    ONNXModelEditor editor{file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/dynamic_quantize_linear.onnx")};
-    editor.decode();  // onnx transformations are applied
-
-    const auto ref_model = file_util::path_join(SERIALIZED_ZOO,
-                                                "onnx/transformations/reference/"
-                                                "dynamic_quantize_linear_expanded.onnx");
-
-    const auto result = compare_onnx_models(editor.model_string(), ref_model, after_func_expand_name_comp);
-    EXPECT_TRUE(result.is_ok) << result.error_message;
-}
--- a/src/frontends/onnx/frontend/src/core/transform.hpp
+++ b/src/frontends/onnx/frontend/src/core/transform.hpp
@@ -20,13 +20,8 @@ namespace transform {
 /// \param model_path Filesystem path to the ONNX model file.
 void update_external_data_paths(ONNX_NAMESPACE::ModelProto& model_proto, const std::string& model_path);

-static const std::vector<std::string> onnx_functions_to_expand = {"Bernoulli",
-                                                                  "Celu",
-                                                                  "DynamicQuantizeLinear",
-                                                                  "GreaterOrEqual",
-                                                                  "LessOrEqual",
-                                                                  "NegativeLogLikelihoodLoss",
-                                                                  "SoftmaxCrossEntropyLoss"};
+static const std::vector<std::string> onnx_functions_to_expand =
+    {"Bernoulli", "Celu", "GreaterOrEqual", "LessOrEqual", "NegativeLogLikelihoodLoss", "SoftmaxCrossEntropyLoss"};

 /// \brief Replace nodes with expanded body of ONNX functions
 ///
--- a/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.cpp
+++ b/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.cpp
@@ -0,0 +1,107 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "op/dynamic_quantize_linear.hpp"
+
+#include <cstdint>
+#include <memory>
+
+#include "default_opset.hpp"
+#include "ngraph/axis_set.hpp"
+#include "ngraph/builder/make_constant.hpp"
+#include "ngraph/op/convert.hpp"
+#include "ngraph/shape.hpp"
+#include "ngraph/validation_util.hpp"
+#include "onnx_import/core/null_node.hpp"
+#include "utils/common.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace {
+std::shared_ptr<ngraph::Node> find_min_value(const ov::Output<ov::Node>& input) {
+    const auto& zero_node = default_opset::Constant::create(element::i64, Shape{}, {0});
+    const auto& one_node = default_opset::Constant::create(element::i64, Shape{}, {1});
+
+    const auto& input_shape = std::make_shared<default_opset::ShapeOf>(input);
+    const auto& input_rank = std::make_shared<default_opset::ShapeOf>(input_shape);
+    const auto& input_rank_as_scalar = std::make_shared<default_opset::Squeeze>(input_rank);
+
+    const auto& reduce_axes =
+        std::make_shared<default_opset::Range>(zero_node, input_rank_as_scalar, one_node, element::i64);
+
+    const auto& input_min = std::make_shared<default_opset::ReduceMin>(input, reduce_axes);
+
+    const auto& zero_node_u8 = default_opset::Constant::create(element::f32, Shape{}, {0});
+    return std::make_shared<default_opset::Minimum>(zero_node_u8, input_min);
+}
+
+std::shared_ptr<ngraph::Node> find_max_value(const ov::Output<ov::Node>& input) {
+    const auto& zero_node = default_opset::Constant::create(element::i64, Shape{}, {0});
+    const auto& one_node = default_opset::Constant::create(element::i64, Shape{}, {1});
+
+    const auto& input_shape = std::make_shared<default_opset::ShapeOf>(input);
+    const auto& input_rank = std::make_shared<default_opset::ShapeOf>(input_shape);
+    const auto& input_rank_as_scalar = std::make_shared<default_opset::Squeeze>(input_rank);
+
+    const auto& reduce_axes =
+        std::make_shared<default_opset::Range>(zero_node, input_rank_as_scalar, one_node, element::i64);
+
+    const auto& input_max = std::make_shared<default_opset::ReduceMax>(input, reduce_axes);
+
+    const auto& zero_node_u8 = default_opset::Constant::create(element::f32, Shape{}, {0});
+    return std::make_shared<default_opset::Maximum>(zero_node_u8, input_max);
+}
+
+std::shared_ptr<ngraph::Node> quantize_linear(Output<ngraph::Node> x,
+                                              Output<ngraph::Node> x_span,
+                                              Output<ngraph::Node> quant_range_span,
+                                              Output<ngraph::Node> y_zero_point) {
+    const auto& x_scaled =
+        std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(x, quant_range_span), x_span);
+
+    const auto& x_rounded =
+        std::make_shared<default_opset::Round>(x_scaled, ov::op::v5::Round::RoundMode::HALF_TO_EVEN);
+
+    const auto& y_zero_point_f32 = std::make_shared<default_opset::Convert>(y_zero_point, ov::element::f32);
+
+    const auto& result_shifted = std::make_shared<default_opset::Add>(x_rounded, y_zero_point_f32);
+    const auto& result_clamped = std::make_shared<default_opset::Clamp>(result_shifted, 0, 255);
+
+    return std::make_shared<default_opset::Convert>(result_clamped, ov::element::u8);
+}
+}  // namespace
+namespace op {
+namespace set_1 {
+OutputVector dynamic_quantize_linear(const Node& node) {
+    const OutputVector& inputs = node.get_ng_inputs();
+    const auto& x = inputs.at(0);
+
+    // quantization range in case of uint8 is [0, 255]
+    const auto& quant_range_min = default_opset::Constant::create(element::f32, Shape{}, {0});
+    const auto& quant_range_max = default_opset::Constant::create(element::f32, Shape{}, {255});
+    const auto& quant_range_span = std::make_shared<default_opset::Subtract>(quant_range_max, quant_range_min);
+
+    const auto& x_max = find_max_value(x);
+    const auto& x_min = find_min_value(x);
+    const auto& x_span = std::make_shared<default_opset::Subtract>(x_max, x_min);
+
+    const auto& y_scale = std::make_shared<default_opset::Divide>(x_span, quant_range_max);
+
+    const auto& x_min_shifted = std::make_shared<default_opset::Subtract>(quant_range_min, x_min);
+    const auto& intermediate_zero_point =
+        std::make_shared<default_opset::Round>(std::make_shared<default_opset::Divide>(x_min_shifted, y_scale),
+                                               ov::op::v5::Round::RoundMode::HALF_TO_EVEN);
+
+    const auto& y_zero_point = std::make_shared<default_opset::Convert>(
+        std::make_shared<default_opset::Clamp>(intermediate_zero_point, 0, 255),
+        ov::element::u8);
+
+    const auto& y = quantize_linear(x, x_span, quant_range_span, y_zero_point);
+
+    return {y, y_scale, y_zero_point};
+}
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
--- a/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.hpp
+++ b/src/frontends/onnx/frontend/src/op/dynamic_quantize_linear.hpp
@@ -0,0 +1,19 @@
+// Copyright (C) 2018-2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "onnx_import/core/node.hpp"
+
+namespace ngraph {
+namespace onnx_import {
+namespace op {
+namespace set_1 {
+OutputVector dynamic_quantize_linear(const Node& node);
+
+}  // namespace set_1
+}  // namespace op
+}  // namespace onnx_import
+}  // namespace ngraph
--- a/src/frontends/onnx/frontend/src/ops_bridge.cpp
+++ b/src/frontends/onnx/frontend/src/ops_bridge.cpp
@@ -52,6 +52,7 @@
 #include "op/dequantize_linear.hpp"
 #include "op/div.hpp"
 #include "op/dropout.hpp"
+#include "op/dynamic_quantize_linear.hpp"
 #include "op/einsum.hpp"
 #include "op/elu.hpp"
 #include "op/equal.hpp"
@@ -329,6 +330,7 @@ void OperatorsBridge::_load_initial_state() {
    REGISTER_OPERATOR("Dropout", 1, dropout);
    REGISTER_OPERATOR("Dropout", 7, dropout);
    REGISTER_OPERATOR("Dropout", 12, dropout);
+    REGISTER_OPERATOR("DynamicQuantizeLinear", 1, dynamic_quantize_linear);
    REGISTER_OPERATOR("Einsum", 1, einsum);
    REGISTER_OPERATOR("Elu", 1, elu);
    REGISTER_OPERATOR("Equal", 1, equal);