Remove Fake Quantize OP decomposition (#3506)

* Remove Fake Quantize OP decomposition

* Fix FQ OP inheritance
This commit is contained in:
Mikhail Treskin 2021-01-26 14:53:31 +03:00 committed by GitHub
parent 9817b22295
commit 0284cd69a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 4 additions and 1196 deletions

View File

@ -41,7 +41,7 @@ namespace ngraph
/// (levels-1) * (output_high - output_low) + output_low
///
///
class NGRAPH_API FakeQuantize : public ngraph::op::util::FusedOp
class NGRAPH_API FakeQuantize : public ngraph::op::Op
{
public:
NGRAPH_RTTI_DECLARATION;
@ -69,7 +69,6 @@ namespace ngraph
AutoBroadcastSpec(AutoBroadcastType::NUMPY));
bool visit_attributes(AttributeVisitor& visitor) override;
virtual OutputVector decompose_op() const override;
virtual void validate_and_infer_types() override;
virtual std::shared_ptr<Node>

View File

@ -1,120 +0,0 @@
//*****************************************************************************
// Copyright 2017-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/axis_set.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/type/element_type.hpp"
namespace ngraph
{
namespace op
{
namespace v0
{
/// \brief Quantize operation
/// Maps real input (r) to quantized output (q) using scale (s), zero point (z)
/// and
/// round mode: q = ROUND(r / s) + o
class NGRAPH_DEPRECATED(
"This operation is deprecated and will be removed soon. Please do not use it.")
NGRAPH_API Quantize : public ngraph::op::Op
{
NGRAPH_SUPPRESS_DEPRECATED_START
public:
static constexpr NodeTypeInfo type_info{"Quantize", 0};
const NodeTypeInfo& get_type_info() const override { return type_info; }
enum class RoundMode
{
// round to nearest integer
// in case of two equidistant integers round away from zero e.g.
// 2.5 -> 3
// -3.5 -> -4
ROUND_NEAREST_TOWARD_INFINITY,
// round to nearest integer
// in case of two equidistant integers round toward zero e.g.
// 2.5 -> 2
// -3.5 -> -3
ROUND_NEAREST_TOWARD_ZERO,
// round to nearest integer
// in case of two equidistant integers round up e.g.
// 2.5 -> 3
// -3.5 -> -3
ROUND_NEAREST_UPWARD,
// round to nearest integer
// in case of two equidistant integers round down e.g.
// 2.5 -> 2
// -3.5 -> -4
ROUND_NEAREST_DOWNWARD,
// round to nearest integer
// in case of two equidistant integers round to even e.g.
// 2.5 -> 2
// -3.5 -> -4
ROUND_NEAREST_TOWARD_EVEN,
// round to nearest integer away from zero
ROUND_TOWARD_INFINITY,
// round to nearest integer toward zero
ROUND_TOWARD_ZERO,
// round to nearest integer toward infinity (ceiling)
ROUND_UP,
// round to nearest integer toward negative infinity (floor)
ROUND_DOWN,
};
/// \brief Constructs a Quantize operation
/// \param input real input
/// \param scale scale used for mapping
/// \param zero_point zero point used for mapping
/// \param type output element type
/// \param axes axis positions on which `scale` and `zero_point` are specified
/// \param round_mode describes how to perform ROUND function (see above)
Quantize(const Output<Node>& input,
const Output<Node>& scale,
const Output<Node>& zero_point,
const ngraph::element::Type& type,
const ngraph::AxisSet& axes,
RoundMode round_mode);
Quantize() = default;
void validate_and_infer_types() override;
virtual std::shared_ptr<Node>
clone_with_new_inputs(const OutputVector& new_args) const override;
const ngraph::AxisSet& get_axes() const { return m_axes; }
RoundMode get_round_mode() const { return m_round_mode; }
private:
ngraph::element::Type m_type;
ngraph::AxisSet m_axes;
RoundMode m_round_mode;
NGRAPH_SUPPRESS_DEPRECATED_END
};
} // namespace v0
NGRAPH_SUPPRESS_DEPRECATED_START
using v0::Quantize;
NGRAPH_SUPPRESS_DEPRECATED_END
} // namespace op
} // namespace ngraph

View File

@ -117,7 +117,6 @@
#include "ngraph/op/prior_box_clustered.hpp"
#include "ngraph/op/proposal.hpp"
#include "ngraph/op/psroi_pooling.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/range.hpp"
#include "ngraph/op/read_value.hpp"
#include "ngraph/op/reduce_l1.hpp"

View File

@ -18,20 +18,10 @@
#include "itt.hpp"
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/fake_quantize.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/shape.hpp"
using namespace std;
@ -42,7 +32,7 @@ NGRAPH_SUPPRESS_DEPRECATED_START
NGRAPH_RTTI_DEFINITION(op::FakeQuantize, "FakeQuantize", 0);
op::FakeQuantize::FakeQuantize()
: FusedOp()
: Op()
, m_levels()
{
}
@ -54,7 +44,7 @@ op::FakeQuantize::FakeQuantize(const Output<Node>& data,
const Output<Node>& output_high,
size_t levels,
const AutoBroadcastSpec& auto_broadcast)
: FusedOp({data, input_low, input_high, output_low, output_high})
: Op({data, input_low, input_high, output_low, output_high})
, m_levels(levels)
, m_auto_broadcast(auto_broadcast)
{
@ -98,80 +88,6 @@ bool ngraph::op::v0::FakeQuantize::visit_attributes(AttributeVisitor& visitor)
return true;
}
OutputVector op::FakeQuantize::decompose_op() const
{
Output<Node> data{input_value(0)};
Output<Node> input_low{input_value(1)};
Output<Node> input_high{input_value(2)};
Output<Node> output_low{input_value(3)};
Output<Node> output_high{input_value(4)};
if (m_auto_broadcast.m_type == AutoBroadcastType::NUMPY)
{
OutputVector broadcasted_nodes = builder::numpy_broadcast_outputs(
OutputVector{data, input_low, input_high, output_low, output_high});
data = broadcasted_nodes.at(0);
input_low = broadcasted_nodes.at(1);
input_high = broadcasted_nodes.at(2);
output_low = broadcasted_nodes.at(3);
output_high = broadcasted_nodes.at(4);
}
else if (m_auto_broadcast.m_type == AutoBroadcastType::PDPD)
{
OutputVector broadcasted_nodes = builder::pdpd_broadcast(
OutputVector{data, input_low, input_high, output_low, output_high},
m_auto_broadcast.m_axis);
data = broadcasted_nodes.at(0);
input_low = broadcasted_nodes.at(1);
input_high = broadcasted_nodes.at(2);
output_low = broadcasted_nodes.at(3);
output_high = broadcasted_nodes.at(4);
}
const auto input_data_shape = data.get_shape();
const auto input_data_type = data.get_element_type();
const auto levels_minus_one =
Constant::create(input_data_type,
input_data_shape,
vector<size_t>(shape_size(input_data_shape), m_levels - 1));
// map the number of quantization levels to the nGraph's quantization and dequantization scales
const auto quant_scale = std::make_shared<op::v1::Divide>(
std::make_shared<op::v1::Subtract>(input_high, input_low), levels_minus_one);
const auto dequant_scale = std::make_shared<op::v1::Divide>(
std::make_shared<op::v1::Subtract>(output_high, output_low), levels_minus_one);
// zero_point type needs to match the quantization output type
const auto zero_point = Constant::create(element::i32, data.get_shape(), {0.0});
const auto axes = get_default_order(input_data_shape);
// clip the input data to the range <input_low;input_high>
data = std::make_shared<op::v1::Minimum>(input_high,
std::make_shared<op::v1::Maximum>(input_low, data));
// shift the input data so that it contains only positive values (and zeros)
data = std::make_shared<op::v1::Subtract>(data, input_low);
shared_ptr<Node> quantized_data =
make_shared<op::Quantize>(data,
quant_scale,
zero_point,
element::i32,
axes,
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
quantized_data = make_shared<op::Convert>(quantized_data, input_data_type);
// dequantization without using the Dequantize op (just a multiplication by the dequant_scale)
const auto dequantized_data = make_shared<op::v1::Multiply>(quantized_data, dequant_scale);
// shift the results so that they fall into the <output_low;output_high> range
return {std::make_shared<op::v1::Add>(dequantized_data, output_low)};
}
shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const
{
NGRAPH_OP_SCOPE(v0_FakeQuantize_clone_with_new_inputs);

View File

@ -1,168 +0,0 @@
//*****************************************************************************
// Copyright 2017-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/op/quantize.hpp"
#include "itt.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/reference/quantize.hpp"
#include "ngraph/shape_util.hpp"
NGRAPH_SUPPRESS_DEPRECATED_START
using namespace std;
using namespace ngraph;
constexpr NodeTypeInfo op::Quantize::type_info;
op::Quantize::Quantize(const Output<Node>& input,
const Output<Node>& scale,
const Output<Node>& zero_point,
const element::Type& type,
const AxisSet& axes,
RoundMode round_mode)
: Op({input, scale, zero_point})
, m_type(type)
, m_axes(axes)
, m_round_mode(round_mode)
{
constructor_validate_and_infer_types();
}
void op::Quantize::validate_and_infer_types()
{
NGRAPH_OP_SCOPE(v0_Quantize_validate_and_infer_types);
enum
{
INPUT,
SCALE,
ZERO_POINT
};
NODE_VALIDATION_CHECK(this, m_type.is_static(), "Output element type must not be dynamic");
NODE_VALIDATION_CHECK(
this, m_type.is_quantized(), "Output element type (", m_type, ") must be a quantized type");
element::Type unquantized_type;
NODE_VALIDATION_CHECK(this,
element::Type::merge(unquantized_type,
get_input_element_type(INPUT),
get_input_element_type(SCALE)),
"Scale element type (",
get_input_element_type(SCALE),
") must match input element type (",
get_input_element_type(INPUT),
")");
NODE_VALIDATION_CHECK(this,
unquantized_type.is_dynamic() || unquantized_type.is_real(),
"Scale / input element type (",
unquantized_type,
") must be a floating point number");
element::Type quantized_type;
NODE_VALIDATION_CHECK(
this,
element::Type::merge(quantized_type, get_input_element_type(ZERO_POINT), m_type),
"Zero point element type (",
get_input_element_type(ZERO_POINT),
") must match output element type (",
m_type,
")");
PartialShape input_shape = get_input_partial_shape(0);
Dimension input_rank = input_shape.rank();
for (auto axis : m_axes)
{
NODE_VALIDATION_CHECK(this,
input_rank.is_dynamic() || axis < input_rank.get_length(),
"Quantization axis (",
axis,
") must be less than input shape rank (",
input_rank,
")");
}
PartialShape scale_zero_point_shape = get_input_partial_shape(SCALE);
NODE_VALIDATION_CHECK(
this,
PartialShape::merge_into(scale_zero_point_shape, get_input_partial_shape(ZERO_POINT)),
"Scale shape (",
get_input_partial_shape(SCALE),
") and zero point shape (",
get_input_partial_shape(ZERO_POINT),
") must match");
NODE_VALIDATION_CHECK(this,
scale_zero_point_shape.rank().compatible(m_axes.size()),
"Scale / zero point rank (",
scale_zero_point_shape.rank(),
") does not match the number of ",
"quantization axes (",
m_axes.size(),
")");
set_output_size(1);
if (input_shape.rank().is_static() && scale_zero_point_shape.rank().is_static())
{
size_t i = 0;
vector<Dimension> injected_scale_zero_point_dims;
for (size_t j = 0; j < input_shape.rank().get_length(); j++)
{
if (m_axes.count(j) != 0)
{
injected_scale_zero_point_dims.push_back(scale_zero_point_shape[i++]);
}
else
{
injected_scale_zero_point_dims.push_back(Dimension::dynamic());
}
}
PartialShape result_shape = input_shape;
NODE_VALIDATION_CHECK(
this,
PartialShape::merge_into(result_shape, PartialShape{injected_scale_zero_point_dims}),
"Scale / zero point shape (",
scale_zero_point_shape,
") must match input shape (",
input_shape,
") at the quantization axes (",
m_axes,
")");
set_output_type(0, quantized_type, result_shape);
}
else
{
set_output_type(0, quantized_type, PartialShape::dynamic());
}
}
shared_ptr<Node> op::Quantize::clone_with_new_inputs(const OutputVector& new_args) const
{
NGRAPH_OP_SCOPE(v0_Quantize_clone_with_new_inputs);
check_new_args_count(this, new_args);
return make_shared<Quantize>(
new_args.at(0), new_args.at(1), new_args.at(2), m_type, m_axes, m_round_mode);
}

View File

@ -167,7 +167,6 @@ set(SRC
type_prop/prelu.cpp
type_prop/proposal.cpp
type_prop/psroi_pooling.cpp
type_prop/quantize.cpp
type_prop/range.cpp
type_prop/read_value.cpp
type_prop/reduce_l1.cpp

View File

@ -488,15 +488,6 @@ namespace
EXPECT_FALSE(op::is_binary_elementwise_logical(&node));
}
void op_is_Quantize()
{
op::Quantize node;
EXPECT_FALSE(op::is_unary_elementwise_arithmetic(&node));
EXPECT_FALSE(op::is_binary_elementwise_arithmetic(&node));
EXPECT_FALSE(op::is_binary_elementwise_comparison(&node));
EXPECT_FALSE(op::is_binary_elementwise_logical(&node));
}
void op_is_Range()
{
op::Range node;

View File

@ -116,7 +116,6 @@ NGRAPH_OP(Power, ngraph::op::v1, 1)
NGRAPH_OP(PriorBox, ngraph::op::v0, 0)
NGRAPH_OP(PriorBoxClustered, ngraph::op::v0, 0)
NGRAPH_OP(Proposal, ngraph::op::v0, 0)
NGRAPH_OP(Quantize, ngraph::op::v0, 0)
NGRAPH_OP(RNNCell, ngraph::op::v0, 0)
NGRAPH_OP(ROIPooling, ngraph::op::v0, 0)
NGRAPH_OP(Range, ngraph::op::v0, 0)

View File

@ -196,7 +196,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
// get op type
element::Type type;
if (is_type<op::Convert>(op) || is_type<op::Quantize>(op) || is_type<op::PriorBox>(op))
if (is_type<op::Convert>(op) || is_type<op::PriorBox>(op))
{
type = op->get_input_element_type(0);
}

View File

@ -91,7 +91,6 @@ NGRAPH_OP(NormalizeL2, ngraph::op)
NGRAPH_OP(Parameter, ngraph::op)
NGRAPH_OP(PRelu, ngraph::op)
NGRAPH_OP(PriorBox, ngraph::op)
NGRAPH_OP(Quantize, ngraph::op)
NGRAPH_OP(Range, ngraph::op)
NGRAPH_OP(Relu, ngraph::op)
NGRAPH_OP(Result, ngraph::op)

View File

@ -1,806 +0,0 @@
//*****************************************************************************
// Copyright 2017-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "gtest/gtest.h"
#include "ngraph/ngraph.hpp"
#include "util/type_prop.hpp"
NGRAPH_SUPPRESS_DEPRECATED_START
using namespace std;
using namespace ngraph;
TEST(type_prop, quantize_f32_to_i8_nchw_per_channel_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{3};
Shape zero_point_shape{3};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f32_to_i8_nchw_per_image_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{64};
Shape zero_point_shape{64};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f32_to_i8_nchw_per_row_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{480};
Shape zero_point_shape{480};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{2};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f32_to_i8_nchw_per_image_channel_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{64, 3};
Shape zero_point_shape{64, 3};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f32_to_i8_nchw_whole_batch_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f64_to_i8_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f64;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f64_to_u8_ok)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f64;
element::Type quantized_type = element::u8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
}
TEST(type_prop, quantize_f64_to_dyn_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f64;
element::Type quantized_type = element::dynamic;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Attempt to quantize to dynamic type not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), "Output element type must not be dynamic");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_i8_to_u8_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::i8;
element::Type quantized_type = element::u8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Attempt to quantize non-floating point type not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Scale / input element type (i8) must be a floating point number");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_f32_to_f32_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::f32;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Attempt to quantize to non-quantized type not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(), "Output element type (f32) must be a quantized type");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_batch_scale_type_mismatch_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = element::f64;
element::Type zero_point_type = quantized_type;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of batch and scale element types not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Scale element type (f64) must match input element type (f32)");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_zero_point_type_mismatch_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{};
Shape zero_point_shape{};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = element::u8;
AxisSet axes{};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of zero point element type with zero point argument not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Zero point element type (u8) must match output element type (i8)");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_oob_axis_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{320};
Shape zero_point_shape{320};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{3, 4};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Out-of-bounds quantization axis not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Quantization axis (4) must be less than input shape rank (4)");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_scale_shape_mismatch_same_rank_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{64, 4};
Shape zero_point_shape{64, 3};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of scale argument shape with required shape not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Scale shape ({64,4}) and zero point shape ({64,3}) must match");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_scale_shape_mismatch_different_rank_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{64, 3, 2};
Shape zero_point_shape{64, 3};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of scale argument shape with required shape not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Scale shape ({64,3,2}) and zero point shape ({64,3}) must match");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_zero_point_shape_mismatch_same_rank_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{64, 3};
Shape zero_point_shape{64, 4};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of zero point argument shape with required shape not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Scale shape ({64,3}) and zero point shape ({64,4}) must match");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_zero_point_shape_mismatch_different_rank_fails)
{
Shape batch_shape{64, 3, 480, 640};
Shape scale_shape{64, 3};
Shape zero_point_shape{64, 3, 2};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of zero point argument shape with required shape not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Scale shape ({64,3}) and zero point shape ({64,3,2}) must match");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, quantize_partial_all_rank_dynamic_ok)
{
PartialShape batch_shape{PartialShape::dynamic()};
PartialShape scale_shape{PartialShape::dynamic()};
PartialShape zero_point_shape{PartialShape::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1, 2000};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_TRUE(quant->get_output_partial_shape(0).rank().is_dynamic());
}
TEST(type_prop,
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_dynamic_ok)
{
PartialShape batch_shape{PartialShape::dynamic()};
PartialShape scale_shape{64, Dimension::dynamic(), 96};
PartialShape zero_point_shape{PartialShape::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1, 2000};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_TRUE(quant->get_output_partial_shape(0).rank().is_dynamic());
}
TEST(
type_prop,
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_dynamic_axis_count_inconsistent)
{
PartialShape batch_shape{PartialShape::dynamic()};
PartialShape scale_shape{64, Dimension::dynamic(), 96};
PartialShape zero_point_shape{PartialShape::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Mismatch of scale / zero point rank with axis count not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(
error.what(),
"Scale / zero point rank (3) does not match the number of quantization axes (2)");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(
type_prop,
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_ok)
{
PartialShape batch_shape{PartialShape::dynamic()};
PartialShape scale_shape{64, Dimension::dynamic(), 96, Dimension::dynamic()};
PartialShape zero_point_shape{64, 22, Dimension::dynamic(), Dimension::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1, 5, 88};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_TRUE(quant->get_output_partial_shape(0).rank().is_dynamic());
}
TEST(
type_prop,
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_ranks_inconsistent)
{
PartialShape batch_shape{PartialShape::dynamic()};
PartialShape scale_shape{64, Dimension::dynamic(), 96, Dimension::dynamic()};
PartialShape zero_point_shape{64, 22, Dimension::dynamic(), Dimension::dynamic(), 3};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1, 5, 88};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Inconsistent scale / zero point ranks not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(
error.what(),
"Scale shape ({64,?,96,?}) and zero point shape ({64,22,?,?,3}) must match");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(
type_prop,
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_dims_inconsistent)
{
PartialShape batch_shape{PartialShape::dynamic()};
PartialShape scale_shape{64, Dimension::dynamic(), 96, Dimension::dynamic()};
PartialShape zero_point_shape{65, 22, Dimension::dynamic(), Dimension::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{0, 1, 5, 88};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Inconsistent scale / zero point dims not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(
error.what(),
"Scale shape ({64,?,96,?}) and zero point shape ({65,22,?,?}) must match");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(
type_prop,
quantize_partial_input_static_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_ok)
{
PartialShape batch_shape{2, 4, 6, Dimension::dynamic(), 10, Dimension::dynamic()};
PartialShape scale_shape{4, Dimension::dynamic(), Dimension::dynamic()};
PartialShape zero_point_shape{Dimension::dynamic(), 8, Dimension::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{1, 3, 5};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
ASSERT_TRUE(quant->get_output_partial_shape(0).same_scheme(
PartialShape{2, 4, 6, 8, 10, Dimension::dynamic()}));
}
TEST(
type_prop,
quantize_partial_input_static_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_axis_oob)
{
PartialShape batch_shape{2, 4, 6, Dimension::dynamic(), 10, Dimension::dynamic()};
PartialShape scale_shape{4, Dimension::dynamic(), Dimension::dynamic()};
PartialShape zero_point_shape{Dimension::dynamic(), 8, Dimension::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{1, 3, 6};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Out-of-bound quantization axis not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(error.what(),
"Quantization axis (6) must be less than input shape rank (6)");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(
type_prop,
quantize_partial_input_static_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_dims_inconsistent)
{
PartialShape batch_shape{2, 5, 6, Dimension::dynamic(), 10, Dimension::dynamic()};
PartialShape scale_shape{4, Dimension::dynamic(), Dimension::dynamic()};
PartialShape zero_point_shape{Dimension::dynamic(), 8, Dimension::dynamic()};
element::Type unquantized_type = element::f32;
element::Type quantized_type = element::i8;
element::Type batch_type = unquantized_type;
element::Type scale_type = unquantized_type;
element::Type zero_point_type = quantized_type;
AxisSet axes{1, 3, 5};
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
try
{
auto quant =
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
FAIL() << "Inconsistent dimensions not detected";
}
catch (const NodeValidationFailure& error)
{
EXPECT_HAS_SUBSTRING(
error.what(),
"Scale / zero point shape ({4,8,?}) must match input shape ({2,5,6,?,10,?}) "
"at the quantization axes (AxisSet{1, 3, 5})");
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}