[TF FE][TF Hub] Support FakeQuantWithMinMaxArgs operation (#19154)

Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
This commit is contained in:
Roman Kazantsev 2023-08-11 20:58:35 +04:00 committed by GitHub
parent 396a899b75
commit 545c5bbde1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 83 additions and 46 deletions

View File

@ -155,6 +155,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
{"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)},
{"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)},
{"FakeQuantWithMinMaxVarsPerChannel", CreatorFunction(translate_fake_quant_op)},
{"FakeQuantWithMinMaxArgs", CreatorFunction(translate_fake_quant_with_min_max_args)},
{"FIFOQueue", CreatorFunction(translate_fifo_queue_op)},
{"FIFOQueueV2", CreatorFunction(translate_fifo_queue_op)},
{"Fill", CreatorFunction(translate_fill_op)},

View File

@ -63,6 +63,7 @@ OP_CONVERTER(translate_elu_op);
OP_CONVERTER(translate_expand_dims_op);
OP_CONVERTER(translate_extract_image_patches_op);
OP_CONVERTER(translate_fake_quant_op);
OP_CONVERTER(translate_fake_quant_with_min_max_args);
OP_CONVERTER(translate_fill_op);
OP_CONVERTER(translate_floor_div_op);
OP_CONVERTER_NAMED(translate_fused_batch_norm_op);
@ -143,6 +144,7 @@ OP_CONVERTER(translate_unpack_op);
OP_CONVERTER(translate_unravel_index_op);
OP_CONVERTER(translate_where_op);
OP_CONVERTER(translate_x_div_y_op);
OP_CONVERTER(translate_xla_dot_op);
OP_CONVERTER(translate_zeros_like_op);
// Translators for internal operations

View File

@ -3,21 +3,27 @@
//
#include "common_op_table.hpp"
#include "openvino/opsets/opset8.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/divide.hpp"
#include "openvino/op/fake_quantize.hpp"
#include "openvino/op/greater.hpp"
#include "openvino/op/less.hpp"
#include "openvino/op/maximum.hpp"
#include "openvino/op/minimum.hpp"
#include "openvino/op/select.hpp"
#include "openvino/op/subtract.hpp"
using namespace std;
using namespace ov::opset8;
using namespace ov::op;
namespace ov {
namespace frontend {
namespace tensorflow {
namespace op {
OutputVector translate_fake_quant_op(const NodeContext& node) {
default_op_checks(node, 2, {"FakeQuantWithMinMaxVars", "FakeQuantWithMinMaxVarsPerChannel"});
auto inputs = node.get_input(0);
auto min = node.get_input(1);
auto max = node.get_input(2);
OutputVector translate_fake_quant_aux_op(const NodeContext& node,
const Output<Node>& inputs,
const Output<Node>& min,
const Output<Node>& max) {
// retrieve attributes
auto narrow_range = node.get_attribute<bool>("narrow_range", false);
auto num_bits = node.get_attribute<int64_t>("num_bits", 8);
@ -26,37 +32,58 @@ OutputVector translate_fake_quant_op(const NodeContext& node) {
levels = narrow_range ? levels - 1 : levels;
// compute real min and max values
Output<Node> minimum = make_shared<Minimum>(min, max);
Output<Node> maximum = make_shared<Maximum>(min, max);
Output<Node> minimum = make_shared<v1::Minimum>(min, max);
Output<Node> maximum = make_shared<v1::Maximum>(min, max);
// adjust min and max so that min <= 0
auto zero = create_same_type_const_scalar<float>(min, 0);
auto min_greater_zero = make_shared<Greater>(minimum, zero);
Output<Node> max_minus_min = make_shared<Subtract>(maximum, minimum);
minimum = make_shared<Select>(min_greater_zero, zero, minimum);
maximum = make_shared<Select>(min_greater_zero, max_minus_min, maximum);
auto min_greater_zero = make_shared<v1::Greater>(minimum, zero);
Output<Node> max_minus_min = make_shared<v1::Subtract>(maximum, minimum);
minimum = make_shared<v1::Select>(min_greater_zero, zero, minimum);
maximum = make_shared<v1::Select>(min_greater_zero, max_minus_min, maximum);
// adjust min and max so that 0 <= max
auto max_less_zero = make_shared<Less>(maximum, zero);
auto min_minus_max = make_shared<Subtract>(minimum, maximum);
minimum = make_shared<Select>(max_less_zero, min_minus_max, minimum);
maximum = make_shared<Select>(max_less_zero, zero, maximum);
auto max_less_zero = make_shared<v1::Less>(maximum, zero);
auto min_minus_max = make_shared<v1::Subtract>(minimum, maximum);
minimum = make_shared<v1::Select>(max_less_zero, min_minus_max, minimum);
maximum = make_shared<v1::Select>(max_less_zero, zero, maximum);
// adjust min and max so that scale = (max - min) / (2^num_bits - 1),
// min_adj = scale * round(min / scale) and max_adj = max + min_adj - min
max_minus_min = make_shared<Subtract>(maximum, minimum);
auto const_levels = make_shared<Constant>(element::f32, Shape{}, static_cast<float>(levels - 1));
auto scale = make_shared<Divide>(max_minus_min, const_levels);
auto descaled_min = make_shared<Divide>(minimum, scale);
auto rounded_descaled_min = make_shared<Round>(descaled_min, Round::RoundMode::HALF_TO_EVEN);
auto min_adj = make_shared<Multiply>(scale, rounded_descaled_min);
auto adjustment = make_shared<Subtract>(min_adj, minimum);
auto max_adj = make_shared<Add>(maximum, adjustment);
max_minus_min = make_shared<v1::Subtract>(maximum, minimum);
auto const_levels = make_shared<v0::Constant>(element::f32, Shape{}, static_cast<float>(levels - 1));
auto scale = make_shared<v1::Divide>(max_minus_min, const_levels);
auto descaled_min = make_shared<v1::Divide>(minimum, scale);
auto rounded_descaled_min = make_shared<v5::Round>(descaled_min, v5::Round::RoundMode::HALF_TO_EVEN);
auto min_adj = make_shared<v1::Multiply>(scale, rounded_descaled_min);
auto adjustment = make_shared<v1::Subtract>(min_adj, minimum);
auto max_adj = make_shared<v1::Add>(maximum, adjustment);
auto fake_quantize = make_shared<FakeQuantize>(inputs, min_adj, max_adj, min_adj, max_adj, levels);
auto fake_quantize = make_shared<v0::FakeQuantize>(inputs, min_adj, max_adj, min_adj, max_adj, levels);
set_node_name(node.get_name(), fake_quantize);
return {fake_quantize};
}
OutputVector translate_fake_quant_op(const NodeContext& node) {
default_op_checks(node, 2, {"FakeQuantWithMinMaxVars", "FakeQuantWithMinMaxVarsPerChannel"});
auto inputs = node.get_input(0);
auto min = node.get_input(1);
auto max = node.get_input(2);
return translate_fake_quant_aux_op(node, inputs, min, max);
}
OutputVector translate_fake_quant_with_min_max_args(const NodeContext& node) {
default_op_checks(node, 1, {"FakeQuantWithMinMaxArgs"});
auto inputs = node.get_input(0);
auto min_val = node.get_attribute<float>("min", -6.0f);
auto max_val = node.get_attribute<float>("max", 6.0f);
auto min = make_shared<v0::Constant>(element::f32, Shape{}, min_val);
auto max = make_shared<v0::Constant>(element::f32, Shape{}, max_val);
return translate_fake_quant_aux_op(node, inputs, min, max);
}
} // namespace op
} // namespace tensorflow
} // namespace frontend

View File

@ -21,9 +21,7 @@ class TestFakeQuantWithMinMaxVars(CommonTFLayerTest):
tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session() as sess:
inputs = tf.compat.v1.placeholder(tf.float32, inputs_shape, 'inputs')
min = tf.constant(min_value, dtype=tf.float32)
max = tf.constant(max_value, dtype=tf.float32)
fake_quant_op(inputs=inputs, min=min, max=max, num_bits=num_bits,
fake_quant_op(inputs=inputs, min=min_value, max=max_value, num_bits=num_bits,
narrow_range=narrow_range)
tf.compat.v1.global_variables_initializer()
tf_net = sess.graph_def
@ -32,27 +30,36 @@ class TestFakeQuantWithMinMaxVars(CommonTFLayerTest):
test_basic = [
# test FakeQuantWithMinMaxVars
dict(inputs_shape=[2, 6, 4], min_value=-3, max_value=4, num_bits=None, narrow_range=None,
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
dict(inputs_shape=[3, 2, 1, 5], min_value=-4, max_value=5, num_bits=14, narrow_range=True,
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
dict(inputs_shape=[3, 2, 4], min_value=2, max_value=4, num_bits=10, narrow_range=False,
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
dict(inputs_shape=[1, 2, 3], min_value=-6, max_value=-3, num_bits=8, narrow_range=True,
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
# test FakeQuantWithMinMaxVarsPerChannel
pytest.param(dict(inputs_shape=[2, 6, 4], min_value=[-4, -3, -5, -8], max_value=[4, 7, 9, 5], num_bits=None,
narrow_range=None,
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVarsPerChannel),
marks=pytest.mark.xfail(reason="104822"))
dict(inputs_shape=[2, 6, 4], min_value=-3, max_value=4, num_bits=None, narrow_range=None),
dict(inputs_shape=[3, 2, 1, 5], min_value=-4, max_value=5, num_bits=14, narrow_range=True),
dict(inputs_shape=[3, 2, 4], min_value=2, max_value=4, num_bits=10, narrow_range=False),
dict(inputs_shape=[1, 2, 3], min_value=-6, max_value=-3, num_bits=8, narrow_range=True),
]
@pytest.mark.parametrize("params", test_basic)
@pytest.mark.parametrize("fake_quant_op", [
tf.raw_ops.FakeQuantWithMinMaxVars, tf.raw_ops.FakeQuantWithMinMaxArgs
])
@pytest.mark.precommit_tf_fe
@pytest.mark.nightly
def test_fake_quant_with_min_max_vars_basic(self, params, ie_device, precision, ir_version, temp_dir,
def test_fake_quant_with_min_max_vars_basic(self, params, fake_quant_op, ie_device, precision, ir_version, temp_dir,
use_new_frontend,
use_old_api):
self._test(*self.create_fake_quant_with_min_max_vars_net(**params, fake_quant_op=fake_quant_op),
ie_device, precision, ir_version, temp_dir=temp_dir,
use_new_frontend=use_new_frontend, use_old_api=use_old_api)
test_per_channel_basic = [
dict(inputs_shape=[2, 6, 4], min_value=[-4, -3, -5, -8], max_value=[4, 7, 9, 5], num_bits=None,
narrow_range=None,
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVarsPerChannel),
]
@pytest.mark.parametrize("params", test_per_channel_basic)
@pytest.mark.precommit_tf_fe
@pytest.mark.nightly
@pytest.mark.xfail("104822")
def test_fake_quant_with_min_max_vars_per_channel_basic(self, params, ie_device, precision, ir_version, temp_dir,
use_new_frontend,
use_old_api):
self._test(*self.create_fake_quant_with_min_max_vars_net(**params),