[PT FE] Add translation for aten::fake_quantize_per_tensor_affine and aten::fake_quantize_per_channel_affine (#18176)

This commit is contained in:
Mateusz Mikolajczyk
2023-07-07 11:05:23 +02:00
committed by GitHub
parent da84027b72
commit 63071b21d4
3 changed files with 206 additions and 0 deletions

View File

@@ -0,0 +1,94 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "openvino/op/fake_quantize.hpp"
#include "openvino/frontend/pytorch/node_context.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/broadcast.hpp"
#include "openvino/op/convert.hpp"
#include "openvino/op/maximum.hpp"
#include "openvino/op/minimum.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/reshape.hpp"
#include "openvino/op/scatter_elements_update.hpp"
#include "openvino/op/subtract.hpp"
#include "utils.hpp"
namespace ov {
namespace frontend {
namespace pytorch {
namespace op {
using namespace ov::op;
OutputVector translate_fake_quantize_per_tensor_affine(const NodeContext& context) {
num_inputs_check(context, 5, 5);
auto input_node = context.get_input(0);
auto scale = std::make_shared<v0::Convert>(context.get_input(1), element::f32);
auto zero_point = std::make_shared<v0::Convert>(context.get_input(2), element::f32);
auto out_low_const = context.const_input<int64_t>(3);
auto out_high_const = context.const_input<int64_t>(4);
// Calculate levels value - distance between bounds.
auto levels = std::abs(out_high_const - out_low_const) + 1;
auto out_low = std::make_shared<v0::Convert>(context.get_input(3), element::f32);
auto out_high = std::make_shared<v0::Convert>(context.get_input(4), element::f32);
// Normalize bounds according to quantization zero point value.
auto out_low_normalized = std::make_shared<v1::Subtract>(out_low, zero_point);
auto out_high_normalized = std::make_shared<v1::Subtract>(out_high, zero_point);
// Rescale bounds according to scale value to calculate limits for input/output maximum/minimum values.
auto bound_a = std::make_shared<v1::Multiply>(scale, out_low_normalized);
auto bound_b = std::make_shared<v1::Multiply>(scale, out_high_normalized);
// In case of negative scale bounds may be inverted, select maximum bound as high and minimal bound as low.
auto bound_high = std::make_shared<v1::Maximum>(bound_a, bound_b);
auto bound_low = std::make_shared<v1::Minimum>(bound_a, bound_b);
return {context.mark_node(
std::make_shared<v0::FakeQuantize>(input_node, bound_low, bound_high, bound_low, bound_high, levels))};
}
OutputVector translate_fake_quantize_per_channel_affine(const NodeContext& context) {
num_inputs_check(context, 6, 6);
auto input_node = context.get_input(0);
auto scale = std::make_shared<v0::Convert>(context.get_input(1), element::f32);
auto zero_point = std::make_shared<v0::Convert>(context.get_input(2), element::f32);
auto axis = context.get_input(3);
auto out_low_const = context.const_input<int64_t>(4);
auto out_high_const = context.const_input<int64_t>(5);
// Calculate levels value - distance between bounds.
auto levels = std::abs(out_high_const - out_low_const) + 1;
auto out_low = std::make_shared<v0::Convert>(context.get_input(4), element::f32);
auto out_high = std::make_shared<v0::Convert>(context.get_input(5), element::f32);
auto const_neg_1 = v0::Constant::create(element::i32, Shape{1}, {-1});
auto const_0 = v0::Constant::create(element::i32, Shape{}, {0});
auto const_1 = v0::Constant::create(element::i32, Shape{}, {1});
auto rank = std::get<1>(get_shape_rank(context, input_node));
auto ones = std::make_shared<v3::Broadcast>(const_1, rank);
auto normalized_axis = normalize_axis(context, axis, input_node);
// Create vector of length of rank filled with ones, except single -1 value at place selected by axis element.
auto new_shape = std::make_shared<v3::ScatterElementsUpdate>(ones, normalized_axis, const_neg_1, const_0);
// Reshape scale and zero point to tensor of the same rank as input, having shape 1 everywhere except dimension
// selected by axis parameter, allowing for per-channel broadcasting.
auto scale_bc = std::make_shared<v1::Reshape>(scale, new_shape, false);
auto zero_point_bc = std::make_shared<v1::Reshape>(zero_point, new_shape, false);
// Normalize bounds according to per-channel quantization zero point values.
auto out_low_normalized = std::make_shared<v1::Subtract>(out_low, zero_point_bc);
auto out_high_normalized = std::make_shared<v1::Subtract>(out_high, zero_point_bc);
// Rescale bounds according to scale value to calculate limits for input/output maximum/minimum values.
auto bound_a = std::make_shared<v1::Multiply>(scale_bc, out_low_normalized);
auto bound_b = std::make_shared<v1::Multiply>(scale_bc, out_high_normalized);
// In case of negative scale bounds may be inverted, select maximum bound as high and minimal bound as low.
auto bound_high = std::make_shared<v1::Maximum>(bound_a, bound_b);
auto bound_low = std::make_shared<v1::Minimum>(bound_a, bound_b);
return {context.mark_node(
std::make_shared<v0::FakeQuantize>(input_node, bound_low, bound_high, bound_low, bound_high, levels))};
}
} // namespace op
} // namespace pytorch
} // namespace frontend
} // namespace ov

View File

@@ -50,6 +50,8 @@ OP_CONVERTER(translate_empty);
OP_CONVERTER(translate_expand);
OP_CONVERTER(translate_expand_as);
OP_CONVERTER(translate_eye);
OP_CONVERTER(translate_fake_quantize_per_channel_affine);
OP_CONVERTER(translate_fake_quantize_per_tensor_affine);
OP_CONVERTER(translate_fill_);
OP_CONVERTER(translate_flatten);
OP_CONVERTER(translate_flip);
@@ -240,6 +242,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
{"aten::expand", op::translate_expand},
{"aten::expand_as", op::translate_expand_as},
{"aten::eye", op::translate_eye},
{"aten::fake_quantize_per_channel_affine", op::translate_fake_quantize_per_channel_affine},
{"aten::fake_quantize_per_tensor_affine", op::translate_fake_quantize_per_tensor_affine},
{"aten::fill_", op::inplace_op<op::translate_fill_>},
{"aten::flatten", op::translate_flatten},
{"aten::flip", op::translate_flip},

View File

@@ -0,0 +1,108 @@
# Copyright (C) 2018-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import numpy as np
import pytest
import torch
from pytorch_layer_test_class import PytorchLayerTest
class TestFakeQuantizePerTensorAffine(PytorchLayerTest):
def _prepare_input(self):
return (np.random.randn(3, 2, 2).astype(np.float32),)
def create_model(self, scale, zero_point, quant_min, quant_max):
class fake_quantize_per_tensor_affine(torch.nn.Module):
def __init__(self, scale, zero_point, quant_min, quant_max):
super(fake_quantize_per_tensor_affine, self).__init__()
self.scale = scale
self.zero_point = zero_point
self.quant_min = quant_min
self.quant_max = quant_max
def forward(self, x):
return torch.fake_quantize_per_tensor_affine(
x, self.scale, self.zero_point, self.quant_min, self.quant_max
)
ref_net = None
return (
fake_quantize_per_tensor_affine(scale, zero_point, quant_min, quant_max),
ref_net,
"aten::fake_quantize_per_tensor_affine",
)
@pytest.mark.nightly
@pytest.mark.precommit
@pytest.mark.parametrize(
"scale, zero_point, quant_min, quant_max",
[
(1.0, 1, 0, 255),
(0.01, 0, 0, 255),
(-0.01, 0, 0, 255),
(0.5, 0, -128, 127),
(0.5, -1, -128, 127),
(1.0, 0, 0, 127),
],
)
def test_fake_quantize_per_tensor_affine(
self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max
):
self._test(
*self.create_model(scale, zero_point, quant_min, quant_max),
ie_device,
precision,
ir_version,
freeze_model=False
)
class TestFakeQuantizePerChannelAffine(PytorchLayerTest):
def _prepare_input(self):
return (np.random.randn(3, 2, 2).astype(np.float32),)
def create_model(self, scale, zero_point, axis, quant_min, quant_max):
class fake_quantize_per_channel_affine(torch.nn.Module):
def __init__(self, scale, zero_point, axis, quant_min, quant_max):
super(fake_quantize_per_channel_affine, self).__init__()
self.scale = scale
self.zero_point = zero_point
self.axis = axis
self.quant_min = quant_min
self.quant_max = quant_max
def forward(self, x):
return torch.fake_quantize_per_channel_affine(
x, self.scale, self.zero_point, self.axis, self.quant_min, self.quant_max
)
ref_net = None
return (
fake_quantize_per_channel_affine(scale, zero_point, axis, quant_min, quant_max),
ref_net,
"aten::fake_quantize_per_channel_affine",
)
@pytest.mark.nightly
@pytest.mark.precommit
@pytest.mark.parametrize(
"scale, zero_point, axis, quant_min, quant_max",
[
(torch.tensor([0.005, 0.7]), torch.zeros(2), 1, 0, 255),
(torch.tensor([1.5, -0.7, -0.1]), torch.tensor([1, 0, -1], dtype=torch.int32), 0, -128, 127),
(torch.tensor([-0.005, 0.7]), torch.tensor([0, 1], dtype=torch.int32), 1, 0, 127),
(torch.tensor([-0.005, -0.7, 0.1]), torch.tensor([1, 0, 1], dtype=torch.int32), 0, 0, 255),
],
)
def test_fake_quantize_per_channel_affine(
self, ie_device, precision, ir_version, scale, zero_point, axis, quant_min, quant_max
):
self._test(
*self.create_model(scale, zero_point, axis, quant_min, quant_max),
ie_device,
precision,
ir_version,
freeze_model=False
)