DynamicQuantizeLinear op support (#10565)

This commit is contained in:
Dawid Kożykowski
2022-03-16 18:30:15 +01:00
committed by GitHub
parent 33d90c5c77
commit 6f64de4c27
11 changed files with 242 additions and 325 deletions

View File

@@ -0,0 +1,67 @@
ir_version: 5
producer_name: "backend-test"
graph {
node {
input: "x"
output: "y"
output: "y_scale"
output: "y_zero_point"
op_type: "DynamicQuantizeLinear"
}
name: "test_dynamicquantizelinear"
input {
name: "x"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 3
}
dim {
dim_value: 4
}
}
}
}
}
output {
name: "y"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 3
}
dim {
dim_value: 4
}
}
}
}
}
output {
name: "y_scale"
type {
tensor_type {
elem_type: 1
shape {
}
}
}
}
output {
name: "y_zero_point"
type {
tensor_type {
elem_type: 2
shape {
}
}
}
}
}
opset_import {
version: 11
}

View File

@@ -1,85 +0,0 @@
ir_version: 6
producer_name: "nGraph ONNX Importer"
graph {
node {
input: "in1"
input: "in2"
output: "greater_or_equal_out"
op_type: "GreaterOrEqual"
}
node {
input: "greater_or_equal_out"
output: "cast_out"
op_type: "Cast"
attribute {
name: "to"
i: 6
type: INT
}
}
node {
input: "cast_out"
output: "y"
output: "y_scale"
output: "y_zero_point"
op_type: "DynamicQuantizeLinear"
}
node {
input: "y"
output: "abs_y"
op_type: "Abs"
}
input {
name: "in1"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 5
}
}
}
}
}
input {
name: "in2"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 5
}
}
}
}
}
output {
name: "abs_y"
type {
tensor_type {
elem_type: 6
}
}
}
output {
name: "y_scale"
type {
tensor_type {
elem_type: 1
}
}
}
output {
name: "y_zero_point"
type {
tensor_type {
elem_type: 6
}
}
}
}
opset_import {
version: 12
}

View File

@@ -1,209 +0,0 @@
ir_version: 6
producer_name: "nGraph ONNX Importer"
graph {
node {
input: "in1"
input: "in2"
output: "Func_GreaterOrEqual0x5601898ec4f0O1"
op_type: "Greater"
}
node {
input: "in1"
input: "in2"
output: "Func_GreaterOrEqual0x5601898ec4f0O2"
op_type: "Equal"
}
node {
input: "Func_GreaterOrEqual0x5601898ec4f0O1"
input: "Func_GreaterOrEqual0x5601898ec4f0O2"
output: "greater_or_equal_out"
op_type: "Or"
}
node {
input: "greater_or_equal_out"
output: "cast_out"
op_type: "Cast"
attribute {
name: "to"
i: 6
type: INT
}
}
node {
output: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
op_type: "Constant"
attribute {
name: "value"
t {
data_type: 1
float_data: 0
}
type: TENSOR
}
}
node {
output: "Func_DynamicQuantizeLinear0x560189b38280Q_Max"
op_type: "Constant"
attribute {
name: "value"
t {
data_type: 1
float_data: 255
}
type: TENSOR
}
}
node {
input: "cast_out"
output: "Func_DynamicQuantizeLinear0x560189b38280X_Min"
op_type: "ReduceMin"
attribute {
name: "keepdims"
i: 0
type: INT
}
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280X_Min"
input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
output: "Func_DynamicQuantizeLinear0x560189b38280X_Min_Adjusted"
op_type: "Min"
}
node {
input: "cast_out"
output: "Func_DynamicQuantizeLinear0x560189b38280X_Max"
op_type: "ReduceMax"
attribute {
name: "keepdims"
i: 0
type: INT
}
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280X_Max"
input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
output: "Func_DynamicQuantizeLinear0x560189b38280X_Max_Adjusted"
op_type: "Max"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280X_Max_Adjusted"
input: "Func_DynamicQuantizeLinear0x560189b38280X_Min_Adjusted"
output: "Func_DynamicQuantizeLinear0x560189b38280X_Range"
op_type: "Sub"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280X_Range"
input: "Func_DynamicQuantizeLinear0x560189b38280Q_Max"
output: "Func_DynamicQuantizeLinear0x560189b38280Scale"
op_type: "Div"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280X_Min_Adjusted"
input: "Func_DynamicQuantizeLinear0x560189b38280Scale"
output: "Func_DynamicQuantizeLinear0x560189b38280Min_Scaled"
op_type: "Div"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
input: "Func_DynamicQuantizeLinear0x560189b38280Min_Scaled"
output: "Func_DynamicQuantizeLinear0x560189b38280Initial_ZeroPoint_FP"
op_type: "Sub"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280Initial_ZeroPoint_FP"
input: "Func_DynamicQuantizeLinear0x560189b38280Q_Min"
input: "Func_DynamicQuantizeLinear0x560189b38280Q_Max"
output: "Func_DynamicQuantizeLinear0x560189b38280Clipped_ZeroPoint_FP"
op_type: "Clip"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280Clipped_ZeroPoint_FP"
output: "Func_DynamicQuantizeLinear0x560189b38280Rounded_ZeroPoint_FP"
op_type: "Round"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280Rounded_ZeroPoint_FP"
output: "Func_DynamicQuantizeLinear0x560189b38280Zeropoint"
op_type: "Cast"
attribute {
name: "to"
i: 2
type: INT
}
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280Scale"
output: "y_scale"
op_type: "Identity"
}
node {
input: "Func_DynamicQuantizeLinear0x560189b38280Zeropoint"
output: "y_zero_point"
op_type: "Identity"
}
node {
input: "cast_out"
input: "Func_DynamicQuantizeLinear0x560189b38280Scale"
input: "Func_DynamicQuantizeLinear0x560189b38280Zeropoint"
output: "y"
op_type: "QuantizeLinear"
}
node {
input: "y"
output: "abs_y"
op_type: "Abs"
}
input {
name: "in1"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 5
}
}
}
}
}
input {
name: "in2"
type {
tensor_type {
elem_type: 1
shape {
dim {
dim_value: 5
}
}
}
}
}
output {
name: "abs_y"
type {
tensor_type {
elem_type: 6
}
}
}
output {
name: "y_scale"
type {
tensor_type {
elem_type: 1
}
}
}
output {
name: "y_zero_point"
type {
tensor_type {
elem_type: 6
}
}
}
}
opset_import {
version: 12
}

View File

@@ -406,18 +406,6 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_initializer_wo_input) {
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function) {
const auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamicquantizelinear.onnx"));
auto test_case = test::TestCase(function, s_device);
test_case.add_input<float>({-1.f, -2.1f, -1.3f, -2.5f, -3.34f, -4.f});
test_case.add_expected_output<uint8_t>(Shape{6}, {191, 121, 172, 96, 42, 0});
test_case.add_expected_output<float>(Shape{}, {0.0156862754f});
test_case.add_expected_output<uint8_t>(Shape{}, {255});
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, onnx_expand_function_dependency_to_created_subgraph) {
const auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/greater_or_equal.onnx"));

View File

@@ -261,6 +261,51 @@ NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dequantize_linear_1d_zero_scale_uint8_ne
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dynamic_quantize_linear) {
const auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamic_quantize_linear.onnx"));
auto test_case = test::TestCase(function, s_device);
test_case.add_input<float>({0.f, 2.f, -3.f, -2.5f, 1.34f, 0.5f});
test_case.add_expected_output<uint8_t>(Shape{6}, {153, 255, 0, 25, 221, 179});
test_case.add_expected_output<float>(Shape{}, {0.0196078438f});
test_case.add_expected_output<uint8_t>(Shape{}, {153});
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dynamic_quantize_linear_255) {
const auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamic_quantize_linear.onnx"));
auto test_case = test::TestCase(function, s_device);
test_case.add_input<float>({-1.f, -2.1f, -1.3f, -2.5f, -3.34f, -4.f});
test_case.add_expected_output<uint8_t>(Shape{6}, {191, 121, 172, 96, 42, 0});
test_case.add_expected_output<float>(Shape{}, {0.0156862754f});
test_case.add_expected_output<uint8_t>(Shape{}, {255});
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_dynamic_quantize_linear_3x4) {
const auto function = onnx_import::import_onnx_model(
file_util::path_join(SERIALIZED_ZOO, "onnx/quantization/dynamic_quantize_linear_3x4.onnx"));
auto test_case = test::TestCase(function, s_device);
// don't change style for better readibility
// clang-format off
test_case.add_input<float>({1.0f, 2.1f, 1.3f, 2.5f,
3.34f, 4.0f, 1.5f, 2.6f,
3.9f, 4.0f, 3.0f, 2.345f});
test_case.add_expected_output<uint8_t>(Shape{3, 4}, { 64, 134, 83, 159,
213, 255, 96, 166,
249, 255, 191, 149});
test_case.add_expected_output<float>(Shape{}, {0.0156862754f});
test_case.add_expected_output<uint8_t>(Shape{}, {0});
// clang-format on
test_case.run();
}
NGRAPH_TEST(${BACKEND_NAME}, onnx_model_quant_conv_linear) {
auto function = onnx_import::import_onnx_model(file_util::path_join(SERIALIZED_ZOO, "onnx/quant_conv_lin.onnx"));

View File

@@ -85,15 +85,3 @@ NGRAPH_TEST(onnx_transformations, expand_function_softmax_crossentropy) {
const auto result = compare_onnx_models(editor.model_string(), ref_model, after_func_expand_name_comp);
EXPECT_TRUE(result.is_ok) << result.error_message;
}
NGRAPH_TEST(onnx_transformations, expand_function_dynamic_quantize_linear) {
ONNXModelEditor editor{file_util::path_join(SERIALIZED_ZOO, "onnx/transformations/dynamic_quantize_linear.onnx")};
editor.decode(); // onnx transformations are applied
const auto ref_model = file_util::path_join(SERIALIZED_ZOO,
"onnx/transformations/reference/"
"dynamic_quantize_linear_expanded.onnx");
const auto result = compare_onnx_models(editor.model_string(), ref_model, after_func_expand_name_comp);
EXPECT_TRUE(result.is_ok) << result.error_message;
}

View File

@@ -20,13 +20,8 @@ namespace transform {
/// \param model_path Filesystem path to the ONNX model file.
void update_external_data_paths(ONNX_NAMESPACE::ModelProto& model_proto, const std::string& model_path);
static const std::vector<std::string> onnx_functions_to_expand = {"Bernoulli",
"Celu",
"DynamicQuantizeLinear",
"GreaterOrEqual",
"LessOrEqual",
"NegativeLogLikelihoodLoss",
"SoftmaxCrossEntropyLoss"};
static const std::vector<std::string> onnx_functions_to_expand =
{"Bernoulli", "Celu", "GreaterOrEqual", "LessOrEqual", "NegativeLogLikelihoodLoss", "SoftmaxCrossEntropyLoss"};
/// \brief Replace nodes with expanded body of ONNX functions
///

View File

@@ -0,0 +1,107 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "op/dynamic_quantize_linear.hpp"
#include <cstdint>
#include <memory>
#include "default_opset.hpp"
#include "ngraph/axis_set.hpp"
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/validation_util.hpp"
#include "onnx_import/core/null_node.hpp"
#include "utils/common.hpp"
namespace ngraph {
namespace onnx_import {
namespace {
std::shared_ptr<ngraph::Node> find_min_value(const ov::Output<ov::Node>& input) {
const auto& zero_node = default_opset::Constant::create(element::i64, Shape{}, {0});
const auto& one_node = default_opset::Constant::create(element::i64, Shape{}, {1});
const auto& input_shape = std::make_shared<default_opset::ShapeOf>(input);
const auto& input_rank = std::make_shared<default_opset::ShapeOf>(input_shape);
const auto& input_rank_as_scalar = std::make_shared<default_opset::Squeeze>(input_rank);
const auto& reduce_axes =
std::make_shared<default_opset::Range>(zero_node, input_rank_as_scalar, one_node, element::i64);
const auto& input_min = std::make_shared<default_opset::ReduceMin>(input, reduce_axes);
const auto& zero_node_u8 = default_opset::Constant::create(element::f32, Shape{}, {0});
return std::make_shared<default_opset::Minimum>(zero_node_u8, input_min);
}
std::shared_ptr<ngraph::Node> find_max_value(const ov::Output<ov::Node>& input) {
const auto& zero_node = default_opset::Constant::create(element::i64, Shape{}, {0});
const auto& one_node = default_opset::Constant::create(element::i64, Shape{}, {1});
const auto& input_shape = std::make_shared<default_opset::ShapeOf>(input);
const auto& input_rank = std::make_shared<default_opset::ShapeOf>(input_shape);
const auto& input_rank_as_scalar = std::make_shared<default_opset::Squeeze>(input_rank);
const auto& reduce_axes =
std::make_shared<default_opset::Range>(zero_node, input_rank_as_scalar, one_node, element::i64);
const auto& input_max = std::make_shared<default_opset::ReduceMax>(input, reduce_axes);
const auto& zero_node_u8 = default_opset::Constant::create(element::f32, Shape{}, {0});
return std::make_shared<default_opset::Maximum>(zero_node_u8, input_max);
}
std::shared_ptr<ngraph::Node> quantize_linear(Output<ngraph::Node> x,
Output<ngraph::Node> x_span,
Output<ngraph::Node> quant_range_span,
Output<ngraph::Node> y_zero_point) {
const auto& x_scaled =
std::make_shared<default_opset::Divide>(std::make_shared<default_opset::Multiply>(x, quant_range_span), x_span);
const auto& x_rounded =
std::make_shared<default_opset::Round>(x_scaled, ov::op::v5::Round::RoundMode::HALF_TO_EVEN);
const auto& y_zero_point_f32 = std::make_shared<default_opset::Convert>(y_zero_point, ov::element::f32);
const auto& result_shifted = std::make_shared<default_opset::Add>(x_rounded, y_zero_point_f32);
const auto& result_clamped = std::make_shared<default_opset::Clamp>(result_shifted, 0, 255);
return std::make_shared<default_opset::Convert>(result_clamped, ov::element::u8);
}
} // namespace
namespace op {
namespace set_1 {
OutputVector dynamic_quantize_linear(const Node& node) {
const OutputVector& inputs = node.get_ng_inputs();
const auto& x = inputs.at(0);
// quantization range in case of uint8 is [0, 255]
const auto& quant_range_min = default_opset::Constant::create(element::f32, Shape{}, {0});
const auto& quant_range_max = default_opset::Constant::create(element::f32, Shape{}, {255});
const auto& quant_range_span = std::make_shared<default_opset::Subtract>(quant_range_max, quant_range_min);
const auto& x_max = find_max_value(x);
const auto& x_min = find_min_value(x);
const auto& x_span = std::make_shared<default_opset::Subtract>(x_max, x_min);
const auto& y_scale = std::make_shared<default_opset::Divide>(x_span, quant_range_max);
const auto& x_min_shifted = std::make_shared<default_opset::Subtract>(quant_range_min, x_min);
const auto& intermediate_zero_point =
std::make_shared<default_opset::Round>(std::make_shared<default_opset::Divide>(x_min_shifted, y_scale),
ov::op::v5::Round::RoundMode::HALF_TO_EVEN);
const auto& y_zero_point = std::make_shared<default_opset::Convert>(
std::make_shared<default_opset::Clamp>(intermediate_zero_point, 0, 255),
ov::element::u8);
const auto& y = quantize_linear(x, x_span, quant_range_span, y_zero_point);
return {y, y_scale, y_zero_point};
}
} // namespace set_1
} // namespace op
} // namespace onnx_import
} // namespace ngraph

View File

@@ -0,0 +1,19 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include "ngraph/node.hpp"
#include "onnx_import/core/node.hpp"
namespace ngraph {
namespace onnx_import {
namespace op {
namespace set_1 {
OutputVector dynamic_quantize_linear(const Node& node);
} // namespace set_1
} // namespace op
} // namespace onnx_import
} // namespace ngraph

View File

@@ -52,6 +52,7 @@
#include "op/dequantize_linear.hpp"
#include "op/div.hpp"
#include "op/dropout.hpp"
#include "op/dynamic_quantize_linear.hpp"
#include "op/einsum.hpp"
#include "op/elu.hpp"
#include "op/equal.hpp"
@@ -329,6 +330,7 @@ void OperatorsBridge::_load_initial_state() {
REGISTER_OPERATOR("Dropout", 1, dropout);
REGISTER_OPERATOR("Dropout", 7, dropout);
REGISTER_OPERATOR("Dropout", 12, dropout);
REGISTER_OPERATOR("DynamicQuantizeLinear", 1, dynamic_quantize_linear);
REGISTER_OPERATOR("Einsum", 1, einsum);
REGISTER_OPERATOR("Elu", 1, elu);
REGISTER_OPERATOR("Equal", 1, equal);