[PT FE] Add translation for aten::fake_quantize_per_tensor_affine and aten::fake_quantize_per_channel_affine (#18176)

2023-07-07 11:05:23 +02:00
parent da84027b72
commit 63071b21d4
3 changed files with 206 additions and 0 deletions
--- a/src/frontends/pytorch/src/op/fake_quantize.cpp
+++ b/src/frontends/pytorch/src/op/fake_quantize.cpp
@@ -0,0 +1,94 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/op/fake_quantize.hpp"
+
+#include "openvino/frontend/pytorch/node_context.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/broadcast.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/maximum.hpp"
+#include "openvino/op/minimum.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/scatter_elements_update.hpp"
+#include "openvino/op/subtract.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace op {
+
+using namespace ov::op;
+
+OutputVector translate_fake_quantize_per_tensor_affine(const NodeContext& context) {
+    num_inputs_check(context, 5, 5);
+    auto input_node = context.get_input(0);
+    auto scale = std::make_shared<v0::Convert>(context.get_input(1), element::f32);
+    auto zero_point = std::make_shared<v0::Convert>(context.get_input(2), element::f32);
+    auto out_low_const = context.const_input<int64_t>(3);
+    auto out_high_const = context.const_input<int64_t>(4);
+    // Calculate levels value - distance between bounds.
+    auto levels = std::abs(out_high_const - out_low_const) + 1;
+    auto out_low = std::make_shared<v0::Convert>(context.get_input(3), element::f32);
+    auto out_high = std::make_shared<v0::Convert>(context.get_input(4), element::f32);
+
+    // Normalize bounds according to quantization zero point value.
+    auto out_low_normalized = std::make_shared<v1::Subtract>(out_low, zero_point);
+    auto out_high_normalized = std::make_shared<v1::Subtract>(out_high, zero_point);
+    // Rescale bounds according to scale value to calculate limits for input/output maximum/minimum values.
+    auto bound_a = std::make_shared<v1::Multiply>(scale, out_low_normalized);
+    auto bound_b = std::make_shared<v1::Multiply>(scale, out_high_normalized);
+    // In case of negative scale bounds may be inverted, select maximum bound as high and minimal bound as low.
+    auto bound_high = std::make_shared<v1::Maximum>(bound_a, bound_b);
+    auto bound_low = std::make_shared<v1::Minimum>(bound_a, bound_b);
+    return {context.mark_node(
+        std::make_shared<v0::FakeQuantize>(input_node, bound_low, bound_high, bound_low, bound_high, levels))};
+}
+
+OutputVector translate_fake_quantize_per_channel_affine(const NodeContext& context) {
+    num_inputs_check(context, 6, 6);
+    auto input_node = context.get_input(0);
+    auto scale = std::make_shared<v0::Convert>(context.get_input(1), element::f32);
+    auto zero_point = std::make_shared<v0::Convert>(context.get_input(2), element::f32);
+    auto axis = context.get_input(3);
+    auto out_low_const = context.const_input<int64_t>(4);
+    auto out_high_const = context.const_input<int64_t>(5);
+    // Calculate levels value - distance between bounds.
+    auto levels = std::abs(out_high_const - out_low_const) + 1;
+    auto out_low = std::make_shared<v0::Convert>(context.get_input(4), element::f32);
+    auto out_high = std::make_shared<v0::Convert>(context.get_input(5), element::f32);
+
+    auto const_neg_1 = v0::Constant::create(element::i32, Shape{1}, {-1});
+    auto const_0 = v0::Constant::create(element::i32, Shape{}, {0});
+    auto const_1 = v0::Constant::create(element::i32, Shape{}, {1});
+
+    auto rank = std::get<1>(get_shape_rank(context, input_node));
+    auto ones = std::make_shared<v3::Broadcast>(const_1, rank);
+    auto normalized_axis = normalize_axis(context, axis, input_node);
+    // Create vector of length of rank filled with ones, except single -1 value at place selected by axis element.
+    auto new_shape = std::make_shared<v3::ScatterElementsUpdate>(ones, normalized_axis, const_neg_1, const_0);
+    // Reshape scale and zero point to tensor of the same rank as input, having shape 1 everywhere except dimension
+    // selected by axis parameter, allowing for per-channel broadcasting.
+    auto scale_bc = std::make_shared<v1::Reshape>(scale, new_shape, false);
+    auto zero_point_bc = std::make_shared<v1::Reshape>(zero_point, new_shape, false);
+
+    // Normalize bounds according to per-channel quantization zero point values.
+    auto out_low_normalized = std::make_shared<v1::Subtract>(out_low, zero_point_bc);
+    auto out_high_normalized = std::make_shared<v1::Subtract>(out_high, zero_point_bc);
+    // Rescale bounds according to scale value to calculate limits for input/output maximum/minimum values.
+    auto bound_a = std::make_shared<v1::Multiply>(scale_bc, out_low_normalized);
+    auto bound_b = std::make_shared<v1::Multiply>(scale_bc, out_high_normalized);
+    // In case of negative scale bounds may be inverted, select maximum bound as high and minimal bound as low.
+    auto bound_high = std::make_shared<v1::Maximum>(bound_a, bound_b);
+    auto bound_low = std::make_shared<v1::Minimum>(bound_a, bound_b);
+    return {context.mark_node(
+        std::make_shared<v0::FakeQuantize>(input_node, bound_low, bound_high, bound_low, bound_high, levels))};
+}
+
+}  // namespace op
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@@ -50,6 +50,8 @@ OP_CONVERTER(translate_empty);
 OP_CONVERTER(translate_expand);
 OP_CONVERTER(translate_expand_as);
 OP_CONVERTER(translate_eye);
+OP_CONVERTER(translate_fake_quantize_per_channel_affine);
+OP_CONVERTER(translate_fake_quantize_per_tensor_affine);
 OP_CONVERTER(translate_fill_);
 OP_CONVERTER(translate_flatten);
 OP_CONVERTER(translate_flip);
@@ -240,6 +242,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
        {"aten::expand", op::translate_expand},
        {"aten::expand_as", op::translate_expand_as},
        {"aten::eye", op::translate_eye},
+        {"aten::fake_quantize_per_channel_affine", op::translate_fake_quantize_per_channel_affine},
+        {"aten::fake_quantize_per_tensor_affine", op::translate_fake_quantize_per_tensor_affine},
        {"aten::fill_", op::inplace_op<op::translate_fill_>},
        {"aten::flatten", op::translate_flatten},
        {"aten::flip", op::translate_flip},
--- a/tests/layer_tests/pytorch_tests/test_fake_quantize.py
+++ b/tests/layer_tests/pytorch_tests/test_fake_quantize.py
@@ -0,0 +1,108 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+import pytest
+import torch
+from pytorch_layer_test_class import PytorchLayerTest
+
+
+class TestFakeQuantizePerTensorAffine(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.randn(3, 2, 2).astype(np.float32),)
+
+    def create_model(self, scale, zero_point, quant_min, quant_max):
+        class fake_quantize_per_tensor_affine(torch.nn.Module):
+            def __init__(self, scale, zero_point, quant_min, quant_max):
+                super(fake_quantize_per_tensor_affine, self).__init__()
+                self.scale = scale
+                self.zero_point = zero_point
+                self.quant_min = quant_min
+                self.quant_max = quant_max
+
+            def forward(self, x):
+                return torch.fake_quantize_per_tensor_affine(
+                    x, self.scale, self.zero_point, self.quant_min, self.quant_max
+                )
+
+        ref_net = None
+
+        return (
+            fake_quantize_per_tensor_affine(scale, zero_point, quant_min, quant_max),
+            ref_net,
+            "aten::fake_quantize_per_tensor_affine",
+        )
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize(
+        "scale, zero_point, quant_min, quant_max",
+        [
+            (1.0, 1, 0, 255),
+            (0.01, 0, 0, 255),
+            (-0.01, 0, 0, 255),
+            (0.5, 0, -128, 127),
+            (0.5, -1, -128, 127),
+            (1.0, 0, 0, 127),
+        ],
+    )
+    def test_fake_quantize_per_tensor_affine(
+        self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max
+    ):
+        self._test(
+            *self.create_model(scale, zero_point, quant_min, quant_max),
+            ie_device,
+            precision,
+            ir_version,
+            freeze_model=False
+        )
+
+
+class TestFakeQuantizePerChannelAffine(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.randn(3, 2, 2).astype(np.float32),)
+
+    def create_model(self, scale, zero_point, axis, quant_min, quant_max):
+        class fake_quantize_per_channel_affine(torch.nn.Module):
+            def __init__(self, scale, zero_point, axis, quant_min, quant_max):
+                super(fake_quantize_per_channel_affine, self).__init__()
+                self.scale = scale
+                self.zero_point = zero_point
+                self.axis = axis
+                self.quant_min = quant_min
+                self.quant_max = quant_max
+
+            def forward(self, x):
+                return torch.fake_quantize_per_channel_affine(
+                    x, self.scale, self.zero_point, self.axis, self.quant_min, self.quant_max
+                )
+
+        ref_net = None
+
+        return (
+            fake_quantize_per_channel_affine(scale, zero_point, axis, quant_min, quant_max),
+            ref_net,
+            "aten::fake_quantize_per_channel_affine",
+        )
+
+    @pytest.mark.nightly
+    @pytest.mark.precommit
+    @pytest.mark.parametrize(
+        "scale, zero_point, axis, quant_min, quant_max",
+        [
+            (torch.tensor([0.005, 0.7]), torch.zeros(2), 1, 0, 255),
+            (torch.tensor([1.5, -0.7, -0.1]), torch.tensor([1, 0, -1], dtype=torch.int32), 0, -128, 127),
+            (torch.tensor([-0.005, 0.7]), torch.tensor([0, 1], dtype=torch.int32), 1, 0, 127),
+            (torch.tensor([-0.005, -0.7, 0.1]), torch.tensor([1, 0, 1], dtype=torch.int32), 0, 0, 255),
+        ],
+    )
+    def test_fake_quantize_per_channel_affine(
+        self, ie_device, precision, ir_version, scale, zero_point, axis, quant_min, quant_max
+    ):
+        self._test(
+            *self.create_model(scale, zero_point, axis, quant_min, quant_max),
+            ie_device,
+            precision,
+            ir_version,
+            freeze_model=False
+        )