[PT FE] Add translation for aten::fake_quantize_per_tensor_affine and aten::fake_quantize_per_channel_affine (#18176)
This commit is contained in:
committed by
GitHub
parent
da84027b72
commit
63071b21d4
94
src/frontends/pytorch/src/op/fake_quantize.cpp
Normal file
94
src/frontends/pytorch/src/op/fake_quantize.cpp
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (C) 2018-2023 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "openvino/op/fake_quantize.hpp"
|
||||
|
||||
#include "openvino/frontend/pytorch/node_context.hpp"
|
||||
#include "openvino/op/add.hpp"
|
||||
#include "openvino/op/broadcast.hpp"
|
||||
#include "openvino/op/convert.hpp"
|
||||
#include "openvino/op/maximum.hpp"
|
||||
#include "openvino/op/minimum.hpp"
|
||||
#include "openvino/op/multiply.hpp"
|
||||
#include "openvino/op/reshape.hpp"
|
||||
#include "openvino/op/scatter_elements_update.hpp"
|
||||
#include "openvino/op/subtract.hpp"
|
||||
#include "utils.hpp"
|
||||
|
||||
namespace ov {
|
||||
namespace frontend {
|
||||
namespace pytorch {
|
||||
namespace op {
|
||||
|
||||
using namespace ov::op;
|
||||
|
||||
OutputVector translate_fake_quantize_per_tensor_affine(const NodeContext& context) {
|
||||
num_inputs_check(context, 5, 5);
|
||||
auto input_node = context.get_input(0);
|
||||
auto scale = std::make_shared<v0::Convert>(context.get_input(1), element::f32);
|
||||
auto zero_point = std::make_shared<v0::Convert>(context.get_input(2), element::f32);
|
||||
auto out_low_const = context.const_input<int64_t>(3);
|
||||
auto out_high_const = context.const_input<int64_t>(4);
|
||||
// Calculate levels value - distance between bounds.
|
||||
auto levels = std::abs(out_high_const - out_low_const) + 1;
|
||||
auto out_low = std::make_shared<v0::Convert>(context.get_input(3), element::f32);
|
||||
auto out_high = std::make_shared<v0::Convert>(context.get_input(4), element::f32);
|
||||
|
||||
// Normalize bounds according to quantization zero point value.
|
||||
auto out_low_normalized = std::make_shared<v1::Subtract>(out_low, zero_point);
|
||||
auto out_high_normalized = std::make_shared<v1::Subtract>(out_high, zero_point);
|
||||
// Rescale bounds according to scale value to calculate limits for input/output maximum/minimum values.
|
||||
auto bound_a = std::make_shared<v1::Multiply>(scale, out_low_normalized);
|
||||
auto bound_b = std::make_shared<v1::Multiply>(scale, out_high_normalized);
|
||||
// In case of negative scale bounds may be inverted, select maximum bound as high and minimal bound as low.
|
||||
auto bound_high = std::make_shared<v1::Maximum>(bound_a, bound_b);
|
||||
auto bound_low = std::make_shared<v1::Minimum>(bound_a, bound_b);
|
||||
return {context.mark_node(
|
||||
std::make_shared<v0::FakeQuantize>(input_node, bound_low, bound_high, bound_low, bound_high, levels))};
|
||||
}
|
||||
|
||||
OutputVector translate_fake_quantize_per_channel_affine(const NodeContext& context) {
|
||||
num_inputs_check(context, 6, 6);
|
||||
auto input_node = context.get_input(0);
|
||||
auto scale = std::make_shared<v0::Convert>(context.get_input(1), element::f32);
|
||||
auto zero_point = std::make_shared<v0::Convert>(context.get_input(2), element::f32);
|
||||
auto axis = context.get_input(3);
|
||||
auto out_low_const = context.const_input<int64_t>(4);
|
||||
auto out_high_const = context.const_input<int64_t>(5);
|
||||
// Calculate levels value - distance between bounds.
|
||||
auto levels = std::abs(out_high_const - out_low_const) + 1;
|
||||
auto out_low = std::make_shared<v0::Convert>(context.get_input(4), element::f32);
|
||||
auto out_high = std::make_shared<v0::Convert>(context.get_input(5), element::f32);
|
||||
|
||||
auto const_neg_1 = v0::Constant::create(element::i32, Shape{1}, {-1});
|
||||
auto const_0 = v0::Constant::create(element::i32, Shape{}, {0});
|
||||
auto const_1 = v0::Constant::create(element::i32, Shape{}, {1});
|
||||
|
||||
auto rank = std::get<1>(get_shape_rank(context, input_node));
|
||||
auto ones = std::make_shared<v3::Broadcast>(const_1, rank);
|
||||
auto normalized_axis = normalize_axis(context, axis, input_node);
|
||||
// Create vector of length of rank filled with ones, except single -1 value at place selected by axis element.
|
||||
auto new_shape = std::make_shared<v3::ScatterElementsUpdate>(ones, normalized_axis, const_neg_1, const_0);
|
||||
// Reshape scale and zero point to tensor of the same rank as input, having shape 1 everywhere except dimension
|
||||
// selected by axis parameter, allowing for per-channel broadcasting.
|
||||
auto scale_bc = std::make_shared<v1::Reshape>(scale, new_shape, false);
|
||||
auto zero_point_bc = std::make_shared<v1::Reshape>(zero_point, new_shape, false);
|
||||
|
||||
// Normalize bounds according to per-channel quantization zero point values.
|
||||
auto out_low_normalized = std::make_shared<v1::Subtract>(out_low, zero_point_bc);
|
||||
auto out_high_normalized = std::make_shared<v1::Subtract>(out_high, zero_point_bc);
|
||||
// Rescale bounds according to scale value to calculate limits for input/output maximum/minimum values.
|
||||
auto bound_a = std::make_shared<v1::Multiply>(scale_bc, out_low_normalized);
|
||||
auto bound_b = std::make_shared<v1::Multiply>(scale_bc, out_high_normalized);
|
||||
// In case of negative scale bounds may be inverted, select maximum bound as high and minimal bound as low.
|
||||
auto bound_high = std::make_shared<v1::Maximum>(bound_a, bound_b);
|
||||
auto bound_low = std::make_shared<v1::Minimum>(bound_a, bound_b);
|
||||
return {context.mark_node(
|
||||
std::make_shared<v0::FakeQuantize>(input_node, bound_low, bound_high, bound_low, bound_high, levels))};
|
||||
}
|
||||
|
||||
} // namespace op
|
||||
} // namespace pytorch
|
||||
} // namespace frontend
|
||||
} // namespace ov
|
||||
@@ -50,6 +50,8 @@ OP_CONVERTER(translate_empty);
|
||||
OP_CONVERTER(translate_expand);
|
||||
OP_CONVERTER(translate_expand_as);
|
||||
OP_CONVERTER(translate_eye);
|
||||
OP_CONVERTER(translate_fake_quantize_per_channel_affine);
|
||||
OP_CONVERTER(translate_fake_quantize_per_tensor_affine);
|
||||
OP_CONVERTER(translate_fill_);
|
||||
OP_CONVERTER(translate_flatten);
|
||||
OP_CONVERTER(translate_flip);
|
||||
@@ -240,6 +242,8 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
|
||||
{"aten::expand", op::translate_expand},
|
||||
{"aten::expand_as", op::translate_expand_as},
|
||||
{"aten::eye", op::translate_eye},
|
||||
{"aten::fake_quantize_per_channel_affine", op::translate_fake_quantize_per_channel_affine},
|
||||
{"aten::fake_quantize_per_tensor_affine", op::translate_fake_quantize_per_tensor_affine},
|
||||
{"aten::fill_", op::inplace_op<op::translate_fill_>},
|
||||
{"aten::flatten", op::translate_flatten},
|
||||
{"aten::flip", op::translate_flip},
|
||||
|
||||
108
tests/layer_tests/pytorch_tests/test_fake_quantize.py
Normal file
108
tests/layer_tests/pytorch_tests/test_fake_quantize.py
Normal file
@@ -0,0 +1,108 @@
|
||||
# Copyright (C) 2018-2023 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
from pytorch_layer_test_class import PytorchLayerTest
|
||||
|
||||
|
||||
class TestFakeQuantizePerTensorAffine(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
return (np.random.randn(3, 2, 2).astype(np.float32),)
|
||||
|
||||
def create_model(self, scale, zero_point, quant_min, quant_max):
|
||||
class fake_quantize_per_tensor_affine(torch.nn.Module):
|
||||
def __init__(self, scale, zero_point, quant_min, quant_max):
|
||||
super(fake_quantize_per_tensor_affine, self).__init__()
|
||||
self.scale = scale
|
||||
self.zero_point = zero_point
|
||||
self.quant_min = quant_min
|
||||
self.quant_max = quant_max
|
||||
|
||||
def forward(self, x):
|
||||
return torch.fake_quantize_per_tensor_affine(
|
||||
x, self.scale, self.zero_point, self.quant_min, self.quant_max
|
||||
)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return (
|
||||
fake_quantize_per_tensor_affine(scale, zero_point, quant_min, quant_max),
|
||||
ref_net,
|
||||
"aten::fake_quantize_per_tensor_affine",
|
||||
)
|
||||
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
@pytest.mark.parametrize(
|
||||
"scale, zero_point, quant_min, quant_max",
|
||||
[
|
||||
(1.0, 1, 0, 255),
|
||||
(0.01, 0, 0, 255),
|
||||
(-0.01, 0, 0, 255),
|
||||
(0.5, 0, -128, 127),
|
||||
(0.5, -1, -128, 127),
|
||||
(1.0, 0, 0, 127),
|
||||
],
|
||||
)
|
||||
def test_fake_quantize_per_tensor_affine(
|
||||
self, ie_device, precision, ir_version, scale, zero_point, quant_min, quant_max
|
||||
):
|
||||
self._test(
|
||||
*self.create_model(scale, zero_point, quant_min, quant_max),
|
||||
ie_device,
|
||||
precision,
|
||||
ir_version,
|
||||
freeze_model=False
|
||||
)
|
||||
|
||||
|
||||
class TestFakeQuantizePerChannelAffine(PytorchLayerTest):
|
||||
def _prepare_input(self):
|
||||
return (np.random.randn(3, 2, 2).astype(np.float32),)
|
||||
|
||||
def create_model(self, scale, zero_point, axis, quant_min, quant_max):
|
||||
class fake_quantize_per_channel_affine(torch.nn.Module):
|
||||
def __init__(self, scale, zero_point, axis, quant_min, quant_max):
|
||||
super(fake_quantize_per_channel_affine, self).__init__()
|
||||
self.scale = scale
|
||||
self.zero_point = zero_point
|
||||
self.axis = axis
|
||||
self.quant_min = quant_min
|
||||
self.quant_max = quant_max
|
||||
|
||||
def forward(self, x):
|
||||
return torch.fake_quantize_per_channel_affine(
|
||||
x, self.scale, self.zero_point, self.axis, self.quant_min, self.quant_max
|
||||
)
|
||||
|
||||
ref_net = None
|
||||
|
||||
return (
|
||||
fake_quantize_per_channel_affine(scale, zero_point, axis, quant_min, quant_max),
|
||||
ref_net,
|
||||
"aten::fake_quantize_per_channel_affine",
|
||||
)
|
||||
|
||||
@pytest.mark.nightly
|
||||
@pytest.mark.precommit
|
||||
@pytest.mark.parametrize(
|
||||
"scale, zero_point, axis, quant_min, quant_max",
|
||||
[
|
||||
(torch.tensor([0.005, 0.7]), torch.zeros(2), 1, 0, 255),
|
||||
(torch.tensor([1.5, -0.7, -0.1]), torch.tensor([1, 0, -1], dtype=torch.int32), 0, -128, 127),
|
||||
(torch.tensor([-0.005, 0.7]), torch.tensor([0, 1], dtype=torch.int32), 1, 0, 127),
|
||||
(torch.tensor([-0.005, -0.7, 0.1]), torch.tensor([1, 0, 1], dtype=torch.int32), 0, 0, 255),
|
||||
],
|
||||
)
|
||||
def test_fake_quantize_per_channel_affine(
|
||||
self, ie_device, precision, ir_version, scale, zero_point, axis, quant_min, quant_max
|
||||
):
|
||||
self._test(
|
||||
*self.create_model(scale, zero_point, axis, quant_min, quant_max),
|
||||
ie_device,
|
||||
precision,
|
||||
ir_version,
|
||||
freeze_model=False
|
||||
)
|
||||
Reference in New Issue
Block a user