[TF FE][TF Hub] Support FakeQuantWithMinMaxArgs operation (#19154)
Signed-off-by: Kazantsev, Roman <roman.kazantsev@intel.com>
This commit is contained in:
parent
396a899b75
commit
545c5bbde1
@ -155,6 +155,7 @@ const std::map<std::string, CreatorFunction> get_supported_ops() {
|
|||||||
{"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)},
|
{"ExtractImagePatches", CreatorFunction(translate_extract_image_patches_op)},
|
||||||
{"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)},
|
{"FakeQuantWithMinMaxVars", CreatorFunction(translate_fake_quant_op)},
|
||||||
{"FakeQuantWithMinMaxVarsPerChannel", CreatorFunction(translate_fake_quant_op)},
|
{"FakeQuantWithMinMaxVarsPerChannel", CreatorFunction(translate_fake_quant_op)},
|
||||||
|
{"FakeQuantWithMinMaxArgs", CreatorFunction(translate_fake_quant_with_min_max_args)},
|
||||||
{"FIFOQueue", CreatorFunction(translate_fifo_queue_op)},
|
{"FIFOQueue", CreatorFunction(translate_fifo_queue_op)},
|
||||||
{"FIFOQueueV2", CreatorFunction(translate_fifo_queue_op)},
|
{"FIFOQueueV2", CreatorFunction(translate_fifo_queue_op)},
|
||||||
{"Fill", CreatorFunction(translate_fill_op)},
|
{"Fill", CreatorFunction(translate_fill_op)},
|
||||||
|
@ -63,6 +63,7 @@ OP_CONVERTER(translate_elu_op);
|
|||||||
OP_CONVERTER(translate_expand_dims_op);
|
OP_CONVERTER(translate_expand_dims_op);
|
||||||
OP_CONVERTER(translate_extract_image_patches_op);
|
OP_CONVERTER(translate_extract_image_patches_op);
|
||||||
OP_CONVERTER(translate_fake_quant_op);
|
OP_CONVERTER(translate_fake_quant_op);
|
||||||
|
OP_CONVERTER(translate_fake_quant_with_min_max_args);
|
||||||
OP_CONVERTER(translate_fill_op);
|
OP_CONVERTER(translate_fill_op);
|
||||||
OP_CONVERTER(translate_floor_div_op);
|
OP_CONVERTER(translate_floor_div_op);
|
||||||
OP_CONVERTER_NAMED(translate_fused_batch_norm_op);
|
OP_CONVERTER_NAMED(translate_fused_batch_norm_op);
|
||||||
@ -143,6 +144,7 @@ OP_CONVERTER(translate_unpack_op);
|
|||||||
OP_CONVERTER(translate_unravel_index_op);
|
OP_CONVERTER(translate_unravel_index_op);
|
||||||
OP_CONVERTER(translate_where_op);
|
OP_CONVERTER(translate_where_op);
|
||||||
OP_CONVERTER(translate_x_div_y_op);
|
OP_CONVERTER(translate_x_div_y_op);
|
||||||
|
OP_CONVERTER(translate_xla_dot_op);
|
||||||
OP_CONVERTER(translate_zeros_like_op);
|
OP_CONVERTER(translate_zeros_like_op);
|
||||||
|
|
||||||
// Translators for internal operations
|
// Translators for internal operations
|
||||||
|
@ -3,21 +3,27 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
#include "common_op_table.hpp"
|
#include "common_op_table.hpp"
|
||||||
#include "openvino/opsets/opset8.hpp"
|
#include "openvino/op/constant.hpp"
|
||||||
|
#include "openvino/op/divide.hpp"
|
||||||
|
#include "openvino/op/fake_quantize.hpp"
|
||||||
|
#include "openvino/op/greater.hpp"
|
||||||
|
#include "openvino/op/less.hpp"
|
||||||
|
#include "openvino/op/maximum.hpp"
|
||||||
|
#include "openvino/op/minimum.hpp"
|
||||||
|
#include "openvino/op/select.hpp"
|
||||||
|
#include "openvino/op/subtract.hpp"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace ov::opset8;
|
using namespace ov::op;
|
||||||
|
|
||||||
namespace ov {
|
namespace ov {
|
||||||
namespace frontend {
|
namespace frontend {
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
namespace op {
|
namespace op {
|
||||||
OutputVector translate_fake_quant_op(const NodeContext& node) {
|
OutputVector translate_fake_quant_aux_op(const NodeContext& node,
|
||||||
default_op_checks(node, 2, {"FakeQuantWithMinMaxVars", "FakeQuantWithMinMaxVarsPerChannel"});
|
const Output<Node>& inputs,
|
||||||
auto inputs = node.get_input(0);
|
const Output<Node>& min,
|
||||||
auto min = node.get_input(1);
|
const Output<Node>& max) {
|
||||||
auto max = node.get_input(2);
|
|
||||||
|
|
||||||
// retrieve attributes
|
// retrieve attributes
|
||||||
auto narrow_range = node.get_attribute<bool>("narrow_range", false);
|
auto narrow_range = node.get_attribute<bool>("narrow_range", false);
|
||||||
auto num_bits = node.get_attribute<int64_t>("num_bits", 8);
|
auto num_bits = node.get_attribute<int64_t>("num_bits", 8);
|
||||||
@ -26,37 +32,58 @@ OutputVector translate_fake_quant_op(const NodeContext& node) {
|
|||||||
levels = narrow_range ? levels - 1 : levels;
|
levels = narrow_range ? levels - 1 : levels;
|
||||||
|
|
||||||
// compute real min and max values
|
// compute real min and max values
|
||||||
Output<Node> minimum = make_shared<Minimum>(min, max);
|
Output<Node> minimum = make_shared<v1::Minimum>(min, max);
|
||||||
Output<Node> maximum = make_shared<Maximum>(min, max);
|
Output<Node> maximum = make_shared<v1::Maximum>(min, max);
|
||||||
|
|
||||||
// adjust min and max so that min <= 0
|
// adjust min and max so that min <= 0
|
||||||
auto zero = create_same_type_const_scalar<float>(min, 0);
|
auto zero = create_same_type_const_scalar<float>(min, 0);
|
||||||
auto min_greater_zero = make_shared<Greater>(minimum, zero);
|
auto min_greater_zero = make_shared<v1::Greater>(minimum, zero);
|
||||||
Output<Node> max_minus_min = make_shared<Subtract>(maximum, minimum);
|
Output<Node> max_minus_min = make_shared<v1::Subtract>(maximum, minimum);
|
||||||
minimum = make_shared<Select>(min_greater_zero, zero, minimum);
|
minimum = make_shared<v1::Select>(min_greater_zero, zero, minimum);
|
||||||
maximum = make_shared<Select>(min_greater_zero, max_minus_min, maximum);
|
maximum = make_shared<v1::Select>(min_greater_zero, max_minus_min, maximum);
|
||||||
|
|
||||||
// adjust min and max so that 0 <= max
|
// adjust min and max so that 0 <= max
|
||||||
auto max_less_zero = make_shared<Less>(maximum, zero);
|
auto max_less_zero = make_shared<v1::Less>(maximum, zero);
|
||||||
auto min_minus_max = make_shared<Subtract>(minimum, maximum);
|
auto min_minus_max = make_shared<v1::Subtract>(minimum, maximum);
|
||||||
minimum = make_shared<Select>(max_less_zero, min_minus_max, minimum);
|
minimum = make_shared<v1::Select>(max_less_zero, min_minus_max, minimum);
|
||||||
maximum = make_shared<Select>(max_less_zero, zero, maximum);
|
maximum = make_shared<v1::Select>(max_less_zero, zero, maximum);
|
||||||
|
|
||||||
// adjust min and max so that scale = (max - min) / (2^num_bits - 1),
|
// adjust min and max so that scale = (max - min) / (2^num_bits - 1),
|
||||||
// min_adj = scale * round(min / scale) and max_adj = max + min_adj - min
|
// min_adj = scale * round(min / scale) and max_adj = max + min_adj - min
|
||||||
max_minus_min = make_shared<Subtract>(maximum, minimum);
|
max_minus_min = make_shared<v1::Subtract>(maximum, minimum);
|
||||||
auto const_levels = make_shared<Constant>(element::f32, Shape{}, static_cast<float>(levels - 1));
|
auto const_levels = make_shared<v0::Constant>(element::f32, Shape{}, static_cast<float>(levels - 1));
|
||||||
auto scale = make_shared<Divide>(max_minus_min, const_levels);
|
auto scale = make_shared<v1::Divide>(max_minus_min, const_levels);
|
||||||
auto descaled_min = make_shared<Divide>(minimum, scale);
|
auto descaled_min = make_shared<v1::Divide>(minimum, scale);
|
||||||
auto rounded_descaled_min = make_shared<Round>(descaled_min, Round::RoundMode::HALF_TO_EVEN);
|
auto rounded_descaled_min = make_shared<v5::Round>(descaled_min, v5::Round::RoundMode::HALF_TO_EVEN);
|
||||||
auto min_adj = make_shared<Multiply>(scale, rounded_descaled_min);
|
auto min_adj = make_shared<v1::Multiply>(scale, rounded_descaled_min);
|
||||||
auto adjustment = make_shared<Subtract>(min_adj, minimum);
|
auto adjustment = make_shared<v1::Subtract>(min_adj, minimum);
|
||||||
auto max_adj = make_shared<Add>(maximum, adjustment);
|
auto max_adj = make_shared<v1::Add>(maximum, adjustment);
|
||||||
|
|
||||||
auto fake_quantize = make_shared<FakeQuantize>(inputs, min_adj, max_adj, min_adj, max_adj, levels);
|
auto fake_quantize = make_shared<v0::FakeQuantize>(inputs, min_adj, max_adj, min_adj, max_adj, levels);
|
||||||
set_node_name(node.get_name(), fake_quantize);
|
set_node_name(node.get_name(), fake_quantize);
|
||||||
return {fake_quantize};
|
return {fake_quantize};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
OutputVector translate_fake_quant_op(const NodeContext& node) {
|
||||||
|
default_op_checks(node, 2, {"FakeQuantWithMinMaxVars", "FakeQuantWithMinMaxVarsPerChannel"});
|
||||||
|
auto inputs = node.get_input(0);
|
||||||
|
auto min = node.get_input(1);
|
||||||
|
auto max = node.get_input(2);
|
||||||
|
|
||||||
|
return translate_fake_quant_aux_op(node, inputs, min, max);
|
||||||
|
}
|
||||||
|
|
||||||
|
OutputVector translate_fake_quant_with_min_max_args(const NodeContext& node) {
|
||||||
|
default_op_checks(node, 1, {"FakeQuantWithMinMaxArgs"});
|
||||||
|
auto inputs = node.get_input(0);
|
||||||
|
auto min_val = node.get_attribute<float>("min", -6.0f);
|
||||||
|
auto max_val = node.get_attribute<float>("max", 6.0f);
|
||||||
|
auto min = make_shared<v0::Constant>(element::f32, Shape{}, min_val);
|
||||||
|
auto max = make_shared<v0::Constant>(element::f32, Shape{}, max_val);
|
||||||
|
|
||||||
|
return translate_fake_quant_aux_op(node, inputs, min, max);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace op
|
} // namespace op
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
} // namespace frontend
|
} // namespace frontend
|
||||||
|
@ -21,9 +21,7 @@ class TestFakeQuantWithMinMaxVars(CommonTFLayerTest):
|
|||||||
tf.compat.v1.reset_default_graph()
|
tf.compat.v1.reset_default_graph()
|
||||||
with tf.compat.v1.Session() as sess:
|
with tf.compat.v1.Session() as sess:
|
||||||
inputs = tf.compat.v1.placeholder(tf.float32, inputs_shape, 'inputs')
|
inputs = tf.compat.v1.placeholder(tf.float32, inputs_shape, 'inputs')
|
||||||
min = tf.constant(min_value, dtype=tf.float32)
|
fake_quant_op(inputs=inputs, min=min_value, max=max_value, num_bits=num_bits,
|
||||||
max = tf.constant(max_value, dtype=tf.float32)
|
|
||||||
fake_quant_op(inputs=inputs, min=min, max=max, num_bits=num_bits,
|
|
||||||
narrow_range=narrow_range)
|
narrow_range=narrow_range)
|
||||||
tf.compat.v1.global_variables_initializer()
|
tf.compat.v1.global_variables_initializer()
|
||||||
tf_net = sess.graph_def
|
tf_net = sess.graph_def
|
||||||
@ -32,29 +30,38 @@ class TestFakeQuantWithMinMaxVars(CommonTFLayerTest):
|
|||||||
|
|
||||||
test_basic = [
|
test_basic = [
|
||||||
# test FakeQuantWithMinMaxVars
|
# test FakeQuantWithMinMaxVars
|
||||||
dict(inputs_shape=[2, 6, 4], min_value=-3, max_value=4, num_bits=None, narrow_range=None,
|
dict(inputs_shape=[2, 6, 4], min_value=-3, max_value=4, num_bits=None, narrow_range=None),
|
||||||
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
|
dict(inputs_shape=[3, 2, 1, 5], min_value=-4, max_value=5, num_bits=14, narrow_range=True),
|
||||||
dict(inputs_shape=[3, 2, 1, 5], min_value=-4, max_value=5, num_bits=14, narrow_range=True,
|
dict(inputs_shape=[3, 2, 4], min_value=2, max_value=4, num_bits=10, narrow_range=False),
|
||||||
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
|
dict(inputs_shape=[1, 2, 3], min_value=-6, max_value=-3, num_bits=8, narrow_range=True),
|
||||||
dict(inputs_shape=[3, 2, 4], min_value=2, max_value=4, num_bits=10, narrow_range=False,
|
|
||||||
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
|
|
||||||
dict(inputs_shape=[1, 2, 3], min_value=-6, max_value=-3, num_bits=8, narrow_range=True,
|
|
||||||
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVars),
|
|
||||||
|
|
||||||
# test FakeQuantWithMinMaxVarsPerChannel
|
|
||||||
pytest.param(dict(inputs_shape=[2, 6, 4], min_value=[-4, -3, -5, -8], max_value=[4, 7, 9, 5], num_bits=None,
|
|
||||||
narrow_range=None,
|
|
||||||
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVarsPerChannel),
|
|
||||||
marks=pytest.mark.xfail(reason="104822"))
|
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
@pytest.mark.parametrize("params", test_basic)
|
@pytest.mark.parametrize("params", test_basic)
|
||||||
|
@pytest.mark.parametrize("fake_quant_op", [
|
||||||
|
tf.raw_ops.FakeQuantWithMinMaxVars, tf.raw_ops.FakeQuantWithMinMaxArgs
|
||||||
|
])
|
||||||
@pytest.mark.precommit_tf_fe
|
@pytest.mark.precommit_tf_fe
|
||||||
@pytest.mark.nightly
|
@pytest.mark.nightly
|
||||||
def test_fake_quant_with_min_max_vars_basic(self, params, ie_device, precision, ir_version, temp_dir,
|
def test_fake_quant_with_min_max_vars_basic(self, params, fake_quant_op, ie_device, precision, ir_version, temp_dir,
|
||||||
use_new_frontend,
|
use_new_frontend,
|
||||||
use_old_api):
|
use_old_api):
|
||||||
|
self._test(*self.create_fake_quant_with_min_max_vars_net(**params, fake_quant_op=fake_quant_op),
|
||||||
|
ie_device, precision, ir_version, temp_dir=temp_dir,
|
||||||
|
use_new_frontend=use_new_frontend, use_old_api=use_old_api)
|
||||||
|
|
||||||
|
test_per_channel_basic = [
|
||||||
|
dict(inputs_shape=[2, 6, 4], min_value=[-4, -3, -5, -8], max_value=[4, 7, 9, 5], num_bits=None,
|
||||||
|
narrow_range=None,
|
||||||
|
fake_quant_op=tf.raw_ops.FakeQuantWithMinMaxVarsPerChannel),
|
||||||
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("params", test_per_channel_basic)
|
||||||
|
@pytest.mark.precommit_tf_fe
|
||||||
|
@pytest.mark.nightly
|
||||||
|
@pytest.mark.xfail("104822")
|
||||||
|
def test_fake_quant_with_min_max_vars_per_channel_basic(self, params, ie_device, precision, ir_version, temp_dir,
|
||||||
|
use_new_frontend,
|
||||||
|
use_old_api):
|
||||||
self._test(*self.create_fake_quant_with_min_max_vars_net(**params),
|
self._test(*self.create_fake_quant_with_min_max_vars_net(**params),
|
||||||
ie_device, precision, ir_version, temp_dir=temp_dir,
|
ie_device, precision, ir_version, temp_dir=temp_dir,
|
||||||
use_new_frontend=use_new_frontend, use_old_api=use_old_api)
|
use_new_frontend=use_new_frontend, use_old_api=use_old_api)
|
||||||
|
Loading…
Reference in New Issue
Block a user