[PT FE] Add quantized::conv2d and quantized::conv2d_relu (#18651)

* Add quantized conv2d * Fix schema * Remove mark_output * Remove tests from pre-commit
2023-07-20 17:35:11 +02:00 · 2023-07-20 17:35:11 +02:00 · bc261424ef
commit bc261424ef
parent 2dfb537bcb
5 changed files with 186 additions and 2 deletions
--- a/src/frontends/pytorch/src/op/quantized_convnd.cpp
+++ b/src/frontends/pytorch/src/op/quantized_convnd.cpp
@ -0,0 +1,95 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "openvino/frontend/pytorch/node_context.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convolution.hpp"
+#include "openvino/op/group_conv.hpp"
+#include "openvino/op/relu.hpp"
+#include "utils.hpp"
+#include "utils_quantize.hpp"
+
+namespace ov {
+namespace frontend {
+namespace pytorch {
+namespace op {
+
+using namespace ov::op;
+
+namespace {
+Output<ov::Node> translate_quantized_convnd_base(const NodeContext& context) {
+    auto input = context.get_input(0);
+    auto packed_params_node =
+        std::dynamic_pointer_cast<ov::op::util::FrameworkNode>(context.get_input(1).get_node_shared_ptr());
+    FRONT_END_OP_CONVERSION_CHECK(packed_params_node, "Packed params input node type is required to be FrameworkNode.");
+    const auto& attrs = packed_params_node->get_attrs();
+    FRONT_END_OP_CONVERSION_CHECK((attrs.find(PtFrameworkNode::op_type_key) != attrs.end()),
+                                  "Packed params input node does not contain information about op type.");
+    FRONT_END_OP_CONVERSION_CHECK((attrs.at(PtFrameworkNode::op_type_key) == "prim::GetAttr"),
+                                  "Incorrect packed params input node operator type, expected prim::GetAttr.");
+    auto packed_params = packed_params_node->inputs();
+
+    FRONT_END_OP_CONVERSION_CHECK(packed_params.size() == 6,
+                                  "Packed parameters for quantized conv should contain 6 items.");
+    // Packed params: weight, bias, stride, padding, dilation, groups
+    auto weight = packed_params[0].get_source_output();
+    auto bias = packed_params[1].get_source_output();
+    auto strides = std::dynamic_pointer_cast<v0::Constant>(packed_params[2].get_source_output().get_node_shared_ptr())
+                       ->cast_vector<Strides::value_type>();
+    auto pads = std::dynamic_pointer_cast<v0::Constant>(packed_params[3].get_source_output().get_node_shared_ptr())
+                    ->cast_vector<CoordinateDiff::value_type>();
+    auto dilations = std::dynamic_pointer_cast<v0::Constant>(packed_params[4].get_source_output().get_node_shared_ptr())
+                         ->cast_vector<Strides::value_type>();
+    int64_t groups = std::dynamic_pointer_cast<v0::Constant>(packed_params[5].get_source_output().get_node_shared_ptr())
+                         ->cast_vector<int64_t>()[0];
+
+    auto pad_type = ov::op::PadType::EXPLICIT;
+
+    std::shared_ptr<ov::Node> conv;
+    if (groups == 1) {
+        conv = std::make_shared<v1::Convolution>(input, weight, strides, pads, pads, dilations, pad_type);
+    } else {
+        conv = std::make_shared<v1::GroupConvolution>(input,
+                                                      reshape_kernel_for_group(context, weight, groups),
+                                                      strides,
+                                                      pads,
+                                                      pads,
+                                                      dilations,
+                                                      pad_type);
+    }
+    auto bias_rank = bias.get_partial_shape().rank();
+    if (bias_rank == 1) {
+        bias = reshape_channelwise(context, bias, conv);
+    }
+    conv = context.mark_node(std::make_shared<v1::Add>(conv, bias));
+
+    return conv->output(0);
+};
+};  // namespace
+
+OutputVector translate_quantized_convnd(const NodeContext& context) {
+    // "quantized::conv2d.new(Tensor qx, __torch__.torch.classes.quantized.Conv2dPackedParamsBase packed_weight, float
+    // output_scale, int output_zero_point) -> Tensor"
+    num_inputs_check(context, 4, 4);
+    auto scale = context.get_input(2);
+    auto zero_point = context.get_input(3);
+    return {quantize(context, translate_quantized_convnd_base(context), scale, zero_point, context.get_input(0))};
+}
+
+OutputVector translate_quantized_convnd_relu(const NodeContext& context) {
+    // "quantized::conv2d_relu.new(Tensor qx, __torch__.torch.classes.quantized.Conv2dPackedParamsBase packed_weight,
+    // float output_scale, int output_zero_point) -> Tensor"
+    num_inputs_check(context, 4, 4);
+    auto scale = context.get_input(2);
+    auto zero_point = context.get_input(3);
+    auto conv = translate_quantized_convnd_base(context);
+    auto relu = context.mark_node(std::make_shared<v0::Relu>(conv));
+    return {quantize(context, relu->output(0), scale, zero_point, context.get_input(0))};
+}
+
+}  // namespace op
+}  // namespace pytorch
+}  // namespace frontend
+}  // namespace ov
--- a/src/frontends/pytorch/src/op/quantized_linear.cpp
+++ b/src/frontends/pytorch/src/op/quantized_linear.cpp
@ -37,7 +37,7 @@ OutputVector translate_quantized_linear(const NodeContext& context) {
    linear = context.mark_node(std::make_shared<ov::op::v1::Add>(linear, bias));
    auto scale = context.get_input(2);
    auto zero_point = context.get_input(3);
-    return {context.mark_output(quantize(context, linear, scale, zero_point, x))};
+    return {quantize(context, linear, scale, zero_point, x)};
 };

 }  // namespace op
--- a/src/frontends/pytorch/src/op_table.cpp
+++ b/src/frontends/pytorch/src/op_table.cpp
@ -158,6 +158,8 @@ OP_CONVERTER(translate_var_mean);
 OP_CONVERTER(translate_where);
 OP_CONVERTER(translate_zeros);
 OP_CONVERTER(translate_zeros_like);
+OP_CONVERTER(translate_quantized_convnd);
+OP_CONVERTER(translate_quantized_convnd_relu);
 OP_CONVERTER(translate_quantized_linear);

 }  // namespace op
@ -419,6 +421,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
        {"prim::requires_grad", op::return_false_scalar},
        {"prim::PythonOp", op::translate_pythonop},
        {"prim::type", op::skip_node},  // Used with prim::device, pass PtFrameworkNode.
+        {"quantized::conv2d", op::translate_quantized_convnd},
+        {"quantized::conv2d_relu", op::translate_quantized_convnd_relu},
        {"quantized::linear", op::translate_quantized_linear},
        {"torchvision::deform_conv2d", op::translate_deform_conv},
        {"torchvision::nms", op::translate_nms},
--- a/tests/layer_tests/pytorch_tests/test_quantized_convnd.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_convnd.py
@ -0,0 +1,85 @@
+# Copyright (C) 2018-2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+import numpy as np
+import torch
+
+from openvino.frontend import FrontEndManager
+from openvino.frontend.pytorch.decoder import TorchScriptPythonDecoder
+from pytorch_layer_test_class import PytorchLayerTest
+
+
+class TestQuantizedConv2D(PytorchLayerTest):
+    def _prepare_input(self):
+        return (np.random.randn(2, 3, 25, 25).astype(np.float32),)
+
+    def create_model(self, weights_shape, strides, pads, dilations, groups, bias, relu, scale, zero_point):
+        class quantized_conv2d(torch.nn.Module):
+            def __init__(self):
+                super(quantized_conv2d, self).__init__()
+                if not relu:
+                    conv_func = torch.ao.nn.quantized.Conv2d
+                else:
+                    conv_func = torch.ao.nn.intrinsic.quantized.ConvReLU2d
+                self.conv = conv_func(
+                    weights_shape[1] * groups,
+                    weights_shape[0],
+                    weights_shape[2:],
+                    strides,
+                    pads,
+                    dilations,
+                    groups,
+                    bias,
+                )
+                if bias:
+                    torch.nn.init.normal_(self.conv.bias())
+                self.conv.scale = float(scale)
+                self.conv.zero_point = int(zero_point)
+
+            def forward(self, x):
+                x_quantized = torch.quantize_per_tensor(x, 1.0, 0, torch.quint8)
+                conv = self.conv(x_quantized)
+                return torch.dequantize(conv).contiguous()
+
+        ref_net = None
+        if not relu:
+            op_name = "quantized::conv2d"
+        else:
+            op_name = "quantized::conv2d_relu"
+
+        return quantized_conv2d(), ref_net, op_name
+
+    @pytest.mark.parametrize(
+        "params",
+        [
+            pytest.param(
+                {"weights_shape": [1, 3, 3, 3], "strides": 1, "pads": 0, "dilations": 1, "groups": 1},
+                marks=pytest.mark.xfail(
+                    reason="Output channels equal to 1 creates output that fails to cast to contiguous."
+                ),
+            ),
+            {"weights_shape": [2, 3, 3, 3], "strides": 1, "pads": 0, "dilations": 1, "groups": 1},
+            {"weights_shape": [2, 3, 3, 3], "strides": 2, "pads": 0, "dilations": 1, "groups": 1},
+            {"weights_shape": [2, 3, 3, 3], "strides": 1, "pads": 1, "dilations": 1, "groups": 1},
+            {"weights_shape": [2, 3, 3, 3], "strides": 1, "pads": 0, "dilations": 2, "groups": 1},
+            {"weights_shape": [2, 3, 3, 3], "strides": 1, "pads": [0, 1], "dilations": 1, "groups": 1},
+            {"weights_shape": [2, 3, 3, 3], "strides": 1, "pads": [1, 0], "dilations": 1, "groups": 1},
+            {"weights_shape": [3, 1, 3, 3], "strides": 1, "pads": 0, "dilations": 1, "groups": 3},
+        ],
+    )
+    @pytest.mark.parametrize("bias", [True, False])
+    @pytest.mark.parametrize("relu", [True, False])
+    @pytest.mark.parametrize("scale", [1, 0.3, 1.3])
+    @pytest.mark.parametrize("zero_point", [0, 1])
+    @pytest.mark.nightly
+    # @pytest.mark.precommit Test disabled due to sporadic issues
+    def test_quantized_conv2d(self, params, bias, relu, scale, zero_point, ie_device, precision, ir_version):
+        self._test(
+            *self.create_model(**params, bias=bias, relu=relu, scale=scale, zero_point=zero_point),
+            ie_device,
+            precision,
+            ir_version,
+            trace_model=True,
+            freeze_model=False
+        )
--- a/tests/layer_tests/pytorch_tests/test_quantized_linear.py
+++ b/tests/layer_tests/pytorch_tests/test_quantized_linear.py
@ -44,7 +44,7 @@ class TestQuantizedLinear(PytorchLayerTest):
    @pytest.mark.parametrize("zero_point", [0, 1])
    @pytest.mark.parametrize("trace", [True, False])
    @pytest.mark.nightly
-    @pytest.mark.precommit
+    # @pytest.mark.precommit Test disabled due to sporadic issues
    def test_quantized_linear(self, params, scale, zero_point, trace, ie_device, precision, ir_version):
        input_shape = params.get("input_shape")
        weight_shape = params.get("weight_shape")