Remove Fake Quantize OP decomposition (#3506)
* Remove Fake Quantize OP decomposition * Fix FQ OP inheritance
This commit is contained in:
parent
9817b22295
commit
0284cd69a8
@ -41,7 +41,7 @@ namespace ngraph
|
||||
/// (levels-1) * (output_high - output_low) + output_low
|
||||
///
|
||||
///
|
||||
class NGRAPH_API FakeQuantize : public ngraph::op::util::FusedOp
|
||||
class NGRAPH_API FakeQuantize : public ngraph::op::Op
|
||||
{
|
||||
public:
|
||||
NGRAPH_RTTI_DECLARATION;
|
||||
@ -69,7 +69,6 @@ namespace ngraph
|
||||
AutoBroadcastSpec(AutoBroadcastType::NUMPY));
|
||||
|
||||
bool visit_attributes(AttributeVisitor& visitor) override;
|
||||
virtual OutputVector decompose_op() const override;
|
||||
virtual void validate_and_infer_types() override;
|
||||
|
||||
virtual std::shared_ptr<Node>
|
||||
|
@ -1,120 +0,0 @@
|
||||
//*****************************************************************************
|
||||
// Copyright 2017-2021 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//*****************************************************************************
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ngraph/axis_set.hpp"
|
||||
#include "ngraph/op/op.hpp"
|
||||
#include "ngraph/type/element_type.hpp"
|
||||
|
||||
namespace ngraph
|
||||
{
|
||||
namespace op
|
||||
{
|
||||
namespace v0
|
||||
{
|
||||
/// \brief Quantize operation
|
||||
/// Maps real input (r) to quantized output (q) using scale (s), zero point (z)
|
||||
/// and
|
||||
/// round mode: q = ROUND(r / s) + o
|
||||
class NGRAPH_DEPRECATED(
|
||||
"This operation is deprecated and will be removed soon. Please do not use it.")
|
||||
NGRAPH_API Quantize : public ngraph::op::Op
|
||||
{
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
public:
|
||||
static constexpr NodeTypeInfo type_info{"Quantize", 0};
|
||||
const NodeTypeInfo& get_type_info() const override { return type_info; }
|
||||
enum class RoundMode
|
||||
{
|
||||
// round to nearest integer
|
||||
// in case of two equidistant integers round away from zero e.g.
|
||||
// 2.5 -> 3
|
||||
// -3.5 -> -4
|
||||
ROUND_NEAREST_TOWARD_INFINITY,
|
||||
|
||||
// round to nearest integer
|
||||
// in case of two equidistant integers round toward zero e.g.
|
||||
// 2.5 -> 2
|
||||
// -3.5 -> -3
|
||||
ROUND_NEAREST_TOWARD_ZERO,
|
||||
|
||||
// round to nearest integer
|
||||
// in case of two equidistant integers round up e.g.
|
||||
// 2.5 -> 3
|
||||
// -3.5 -> -3
|
||||
ROUND_NEAREST_UPWARD,
|
||||
|
||||
// round to nearest integer
|
||||
// in case of two equidistant integers round down e.g.
|
||||
// 2.5 -> 2
|
||||
// -3.5 -> -4
|
||||
ROUND_NEAREST_DOWNWARD,
|
||||
|
||||
// round to nearest integer
|
||||
// in case of two equidistant integers round to even e.g.
|
||||
// 2.5 -> 2
|
||||
// -3.5 -> -4
|
||||
ROUND_NEAREST_TOWARD_EVEN,
|
||||
|
||||
// round to nearest integer away from zero
|
||||
ROUND_TOWARD_INFINITY,
|
||||
|
||||
// round to nearest integer toward zero
|
||||
ROUND_TOWARD_ZERO,
|
||||
|
||||
// round to nearest integer toward infinity (ceiling)
|
||||
ROUND_UP,
|
||||
|
||||
// round to nearest integer toward negative infinity (floor)
|
||||
ROUND_DOWN,
|
||||
};
|
||||
|
||||
/// \brief Constructs a Quantize operation
|
||||
/// \param input real input
|
||||
/// \param scale scale used for mapping
|
||||
/// \param zero_point zero point used for mapping
|
||||
/// \param type output element type
|
||||
/// \param axes axis positions on which `scale` and `zero_point` are specified
|
||||
/// \param round_mode describes how to perform ROUND function (see above)
|
||||
Quantize(const Output<Node>& input,
|
||||
const Output<Node>& scale,
|
||||
const Output<Node>& zero_point,
|
||||
const ngraph::element::Type& type,
|
||||
const ngraph::AxisSet& axes,
|
||||
RoundMode round_mode);
|
||||
|
||||
Quantize() = default;
|
||||
|
||||
void validate_and_infer_types() override;
|
||||
|
||||
virtual std::shared_ptr<Node>
|
||||
clone_with_new_inputs(const OutputVector& new_args) const override;
|
||||
|
||||
const ngraph::AxisSet& get_axes() const { return m_axes; }
|
||||
RoundMode get_round_mode() const { return m_round_mode; }
|
||||
private:
|
||||
ngraph::element::Type m_type;
|
||||
ngraph::AxisSet m_axes;
|
||||
RoundMode m_round_mode;
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
};
|
||||
} // namespace v0
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
using v0::Quantize;
|
||||
NGRAPH_SUPPRESS_DEPRECATED_END
|
||||
} // namespace op
|
||||
} // namespace ngraph
|
@ -117,7 +117,6 @@
|
||||
#include "ngraph/op/prior_box_clustered.hpp"
|
||||
#include "ngraph/op/proposal.hpp"
|
||||
#include "ngraph/op/psroi_pooling.hpp"
|
||||
#include "ngraph/op/quantize.hpp"
|
||||
#include "ngraph/op/range.hpp"
|
||||
#include "ngraph/op/read_value.hpp"
|
||||
#include "ngraph/op/reduce_l1.hpp"
|
||||
|
@ -18,20 +18,10 @@
|
||||
#include "itt.hpp"
|
||||
|
||||
#include "ngraph/attribute_visitor.hpp"
|
||||
#include "ngraph/builder/autobroadcast.hpp"
|
||||
#include "ngraph/op/add.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
#include "ngraph/op/convert.hpp"
|
||||
#include "ngraph/op/divide.hpp"
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
#include "ngraph/op/greater.hpp"
|
||||
#include "ngraph/op/less_eq.hpp"
|
||||
#include "ngraph/op/maximum.hpp"
|
||||
#include "ngraph/op/minimum.hpp"
|
||||
#include "ngraph/op/multiply.hpp"
|
||||
#include "ngraph/op/quantize.hpp"
|
||||
#include "ngraph/op/select.hpp"
|
||||
#include "ngraph/op/subtract.hpp"
|
||||
#include "ngraph/shape.hpp"
|
||||
|
||||
using namespace std;
|
||||
@ -42,7 +32,7 @@ NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
NGRAPH_RTTI_DEFINITION(op::FakeQuantize, "FakeQuantize", 0);
|
||||
|
||||
op::FakeQuantize::FakeQuantize()
|
||||
: FusedOp()
|
||||
: Op()
|
||||
, m_levels()
|
||||
{
|
||||
}
|
||||
@ -54,7 +44,7 @@ op::FakeQuantize::FakeQuantize(const Output<Node>& data,
|
||||
const Output<Node>& output_high,
|
||||
size_t levels,
|
||||
const AutoBroadcastSpec& auto_broadcast)
|
||||
: FusedOp({data, input_low, input_high, output_low, output_high})
|
||||
: Op({data, input_low, input_high, output_low, output_high})
|
||||
, m_levels(levels)
|
||||
, m_auto_broadcast(auto_broadcast)
|
||||
{
|
||||
@ -98,80 +88,6 @@ bool ngraph::op::v0::FakeQuantize::visit_attributes(AttributeVisitor& visitor)
|
||||
return true;
|
||||
}
|
||||
|
||||
OutputVector op::FakeQuantize::decompose_op() const
|
||||
{
|
||||
Output<Node> data{input_value(0)};
|
||||
Output<Node> input_low{input_value(1)};
|
||||
Output<Node> input_high{input_value(2)};
|
||||
Output<Node> output_low{input_value(3)};
|
||||
Output<Node> output_high{input_value(4)};
|
||||
|
||||
if (m_auto_broadcast.m_type == AutoBroadcastType::NUMPY)
|
||||
{
|
||||
OutputVector broadcasted_nodes = builder::numpy_broadcast_outputs(
|
||||
OutputVector{data, input_low, input_high, output_low, output_high});
|
||||
|
||||
data = broadcasted_nodes.at(0);
|
||||
input_low = broadcasted_nodes.at(1);
|
||||
input_high = broadcasted_nodes.at(2);
|
||||
output_low = broadcasted_nodes.at(3);
|
||||
output_high = broadcasted_nodes.at(4);
|
||||
}
|
||||
else if (m_auto_broadcast.m_type == AutoBroadcastType::PDPD)
|
||||
{
|
||||
OutputVector broadcasted_nodes = builder::pdpd_broadcast(
|
||||
OutputVector{data, input_low, input_high, output_low, output_high},
|
||||
m_auto_broadcast.m_axis);
|
||||
|
||||
data = broadcasted_nodes.at(0);
|
||||
input_low = broadcasted_nodes.at(1);
|
||||
input_high = broadcasted_nodes.at(2);
|
||||
output_low = broadcasted_nodes.at(3);
|
||||
output_high = broadcasted_nodes.at(4);
|
||||
}
|
||||
|
||||
const auto input_data_shape = data.get_shape();
|
||||
const auto input_data_type = data.get_element_type();
|
||||
|
||||
const auto levels_minus_one =
|
||||
Constant::create(input_data_type,
|
||||
input_data_shape,
|
||||
vector<size_t>(shape_size(input_data_shape), m_levels - 1));
|
||||
|
||||
// map the number of quantization levels to the nGraph's quantization and dequantization scales
|
||||
const auto quant_scale = std::make_shared<op::v1::Divide>(
|
||||
std::make_shared<op::v1::Subtract>(input_high, input_low), levels_minus_one);
|
||||
const auto dequant_scale = std::make_shared<op::v1::Divide>(
|
||||
std::make_shared<op::v1::Subtract>(output_high, output_low), levels_minus_one);
|
||||
|
||||
// zero_point type needs to match the quantization output type
|
||||
const auto zero_point = Constant::create(element::i32, data.get_shape(), {0.0});
|
||||
const auto axes = get_default_order(input_data_shape);
|
||||
|
||||
// clip the input data to the range <input_low;input_high>
|
||||
data = std::make_shared<op::v1::Minimum>(input_high,
|
||||
std::make_shared<op::v1::Maximum>(input_low, data));
|
||||
|
||||
// shift the input data so that it contains only positive values (and zeros)
|
||||
data = std::make_shared<op::v1::Subtract>(data, input_low);
|
||||
|
||||
shared_ptr<Node> quantized_data =
|
||||
make_shared<op::Quantize>(data,
|
||||
quant_scale,
|
||||
zero_point,
|
||||
element::i32,
|
||||
axes,
|
||||
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
|
||||
|
||||
quantized_data = make_shared<op::Convert>(quantized_data, input_data_type);
|
||||
|
||||
// dequantization without using the Dequantize op (just a multiplication by the dequant_scale)
|
||||
const auto dequantized_data = make_shared<op::v1::Multiply>(quantized_data, dequant_scale);
|
||||
|
||||
// shift the results so that they fall into the <output_low;output_high> range
|
||||
return {std::make_shared<op::v1::Add>(dequantized_data, output_low)};
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const
|
||||
{
|
||||
NGRAPH_OP_SCOPE(v0_FakeQuantize_clone_with_new_inputs);
|
||||
|
@ -1,168 +0,0 @@
|
||||
//*****************************************************************************
|
||||
// Copyright 2017-2021 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//*****************************************************************************
|
||||
|
||||
#include "ngraph/op/quantize.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/runtime/host_tensor.hpp"
|
||||
#include "ngraph/runtime/reference/quantize.hpp"
|
||||
#include "ngraph/shape_util.hpp"
|
||||
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
constexpr NodeTypeInfo op::Quantize::type_info;
|
||||
|
||||
op::Quantize::Quantize(const Output<Node>& input,
|
||||
const Output<Node>& scale,
|
||||
const Output<Node>& zero_point,
|
||||
const element::Type& type,
|
||||
const AxisSet& axes,
|
||||
RoundMode round_mode)
|
||||
|
||||
: Op({input, scale, zero_point})
|
||||
, m_type(type)
|
||||
, m_axes(axes)
|
||||
, m_round_mode(round_mode)
|
||||
{
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
void op::Quantize::validate_and_infer_types()
|
||||
{
|
||||
NGRAPH_OP_SCOPE(v0_Quantize_validate_and_infer_types);
|
||||
enum
|
||||
{
|
||||
INPUT,
|
||||
SCALE,
|
||||
ZERO_POINT
|
||||
};
|
||||
|
||||
NODE_VALIDATION_CHECK(this, m_type.is_static(), "Output element type must not be dynamic");
|
||||
|
||||
NODE_VALIDATION_CHECK(
|
||||
this, m_type.is_quantized(), "Output element type (", m_type, ") must be a quantized type");
|
||||
|
||||
element::Type unquantized_type;
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
element::Type::merge(unquantized_type,
|
||||
get_input_element_type(INPUT),
|
||||
get_input_element_type(SCALE)),
|
||||
"Scale element type (",
|
||||
get_input_element_type(SCALE),
|
||||
") must match input element type (",
|
||||
get_input_element_type(INPUT),
|
||||
")");
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
unquantized_type.is_dynamic() || unquantized_type.is_real(),
|
||||
"Scale / input element type (",
|
||||
unquantized_type,
|
||||
") must be a floating point number");
|
||||
|
||||
element::Type quantized_type;
|
||||
|
||||
NODE_VALIDATION_CHECK(
|
||||
this,
|
||||
element::Type::merge(quantized_type, get_input_element_type(ZERO_POINT), m_type),
|
||||
"Zero point element type (",
|
||||
get_input_element_type(ZERO_POINT),
|
||||
") must match output element type (",
|
||||
m_type,
|
||||
")");
|
||||
|
||||
PartialShape input_shape = get_input_partial_shape(0);
|
||||
Dimension input_rank = input_shape.rank();
|
||||
|
||||
for (auto axis : m_axes)
|
||||
{
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
input_rank.is_dynamic() || axis < input_rank.get_length(),
|
||||
"Quantization axis (",
|
||||
axis,
|
||||
") must be less than input shape rank (",
|
||||
input_rank,
|
||||
")");
|
||||
}
|
||||
|
||||
PartialShape scale_zero_point_shape = get_input_partial_shape(SCALE);
|
||||
|
||||
NODE_VALIDATION_CHECK(
|
||||
this,
|
||||
PartialShape::merge_into(scale_zero_point_shape, get_input_partial_shape(ZERO_POINT)),
|
||||
"Scale shape (",
|
||||
get_input_partial_shape(SCALE),
|
||||
") and zero point shape (",
|
||||
get_input_partial_shape(ZERO_POINT),
|
||||
") must match");
|
||||
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
scale_zero_point_shape.rank().compatible(m_axes.size()),
|
||||
"Scale / zero point rank (",
|
||||
scale_zero_point_shape.rank(),
|
||||
") does not match the number of ",
|
||||
"quantization axes (",
|
||||
m_axes.size(),
|
||||
")");
|
||||
|
||||
set_output_size(1);
|
||||
|
||||
if (input_shape.rank().is_static() && scale_zero_point_shape.rank().is_static())
|
||||
{
|
||||
size_t i = 0;
|
||||
|
||||
vector<Dimension> injected_scale_zero_point_dims;
|
||||
|
||||
for (size_t j = 0; j < input_shape.rank().get_length(); j++)
|
||||
{
|
||||
if (m_axes.count(j) != 0)
|
||||
{
|
||||
injected_scale_zero_point_dims.push_back(scale_zero_point_shape[i++]);
|
||||
}
|
||||
else
|
||||
{
|
||||
injected_scale_zero_point_dims.push_back(Dimension::dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
PartialShape result_shape = input_shape;
|
||||
NODE_VALIDATION_CHECK(
|
||||
this,
|
||||
PartialShape::merge_into(result_shape, PartialShape{injected_scale_zero_point_dims}),
|
||||
"Scale / zero point shape (",
|
||||
scale_zero_point_shape,
|
||||
") must match input shape (",
|
||||
input_shape,
|
||||
") at the quantization axes (",
|
||||
m_axes,
|
||||
")");
|
||||
set_output_type(0, quantized_type, result_shape);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_output_type(0, quantized_type, PartialShape::dynamic());
|
||||
}
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::Quantize::clone_with_new_inputs(const OutputVector& new_args) const
|
||||
{
|
||||
NGRAPH_OP_SCOPE(v0_Quantize_clone_with_new_inputs);
|
||||
check_new_args_count(this, new_args);
|
||||
return make_shared<Quantize>(
|
||||
new_args.at(0), new_args.at(1), new_args.at(2), m_type, m_axes, m_round_mode);
|
||||
}
|
@ -167,7 +167,6 @@ set(SRC
|
||||
type_prop/prelu.cpp
|
||||
type_prop/proposal.cpp
|
||||
type_prop/psroi_pooling.cpp
|
||||
type_prop/quantize.cpp
|
||||
type_prop/range.cpp
|
||||
type_prop/read_value.cpp
|
||||
type_prop/reduce_l1.cpp
|
||||
|
@ -488,15 +488,6 @@ namespace
|
||||
EXPECT_FALSE(op::is_binary_elementwise_logical(&node));
|
||||
}
|
||||
|
||||
void op_is_Quantize()
|
||||
{
|
||||
op::Quantize node;
|
||||
EXPECT_FALSE(op::is_unary_elementwise_arithmetic(&node));
|
||||
EXPECT_FALSE(op::is_binary_elementwise_arithmetic(&node));
|
||||
EXPECT_FALSE(op::is_binary_elementwise_comparison(&node));
|
||||
EXPECT_FALSE(op::is_binary_elementwise_logical(&node));
|
||||
}
|
||||
|
||||
void op_is_Range()
|
||||
{
|
||||
op::Range node;
|
||||
|
@ -116,7 +116,6 @@ NGRAPH_OP(Power, ngraph::op::v1, 1)
|
||||
NGRAPH_OP(PriorBox, ngraph::op::v0, 0)
|
||||
NGRAPH_OP(PriorBoxClustered, ngraph::op::v0, 0)
|
||||
NGRAPH_OP(Proposal, ngraph::op::v0, 0)
|
||||
NGRAPH_OP(Quantize, ngraph::op::v0, 0)
|
||||
NGRAPH_OP(RNNCell, ngraph::op::v0, 0)
|
||||
NGRAPH_OP(ROIPooling, ngraph::op::v0, 0)
|
||||
NGRAPH_OP(Range, ngraph::op::v0, 0)
|
||||
|
@ -196,7 +196,7 @@ bool runtime::interpreter::INTExecutable::call(const vector<shared_ptr<runtime::
|
||||
|
||||
// get op type
|
||||
element::Type type;
|
||||
if (is_type<op::Convert>(op) || is_type<op::Quantize>(op) || is_type<op::PriorBox>(op))
|
||||
if (is_type<op::Convert>(op) || is_type<op::PriorBox>(op))
|
||||
{
|
||||
type = op->get_input_element_type(0);
|
||||
}
|
||||
|
@ -91,7 +91,6 @@ NGRAPH_OP(NormalizeL2, ngraph::op)
|
||||
NGRAPH_OP(Parameter, ngraph::op)
|
||||
NGRAPH_OP(PRelu, ngraph::op)
|
||||
NGRAPH_OP(PriorBox, ngraph::op)
|
||||
NGRAPH_OP(Quantize, ngraph::op)
|
||||
NGRAPH_OP(Range, ngraph::op)
|
||||
NGRAPH_OP(Relu, ngraph::op)
|
||||
NGRAPH_OP(Result, ngraph::op)
|
||||
|
@ -1,806 +0,0 @@
|
||||
//*****************************************************************************
|
||||
// Copyright 2017-2021 Intel Corporation
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//*****************************************************************************
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "util/type_prop.hpp"
|
||||
|
||||
NGRAPH_SUPPRESS_DEPRECATED_START
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
|
||||
TEST(type_prop, quantize_f32_to_i8_nchw_per_channel_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{3};
|
||||
Shape zero_point_shape{3};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f32_to_i8_nchw_per_image_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{64};
|
||||
Shape zero_point_shape{64};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f32_to_i8_nchw_per_row_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{480};
|
||||
Shape zero_point_shape{480};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{2};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f32_to_i8_nchw_per_image_channel_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{64, 3};
|
||||
Shape zero_point_shape{64, 3};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f32_to_i8_nchw_whole_batch_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f64_to_i8_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f64;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f64_to_u8_ok)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f64;
|
||||
element::Type quantized_type = element::u8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_EQ(quant->get_output_shape(0), batch_shape);
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f64_to_dyn_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f64;
|
||||
element::Type quantized_type = element::dynamic;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Attempt to quantize to dynamic type not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(), "Output element type must not be dynamic");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_i8_to_u8_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::i8;
|
||||
element::Type quantized_type = element::u8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Attempt to quantize non-floating point type not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Scale / input element type (i8) must be a floating point number");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_f32_to_f32_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::f32;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Attempt to quantize to non-quantized type not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(), "Output element type (f32) must be a quantized type");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_batch_scale_type_mismatch_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = element::f64;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of batch and scale element types not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Scale element type (f64) must match input element type (f32)");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_zero_point_type_mismatch_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{};
|
||||
Shape zero_point_shape{};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = element::u8;
|
||||
AxisSet axes{};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of zero point element type with zero point argument not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Zero point element type (u8) must match output element type (i8)");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_oob_axis_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{320};
|
||||
Shape zero_point_shape{320};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{3, 4};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Out-of-bounds quantization axis not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Quantization axis (4) must be less than input shape rank (4)");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_scale_shape_mismatch_same_rank_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{64, 4};
|
||||
Shape zero_point_shape{64, 3};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of scale argument shape with required shape not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Scale shape ({64,4}) and zero point shape ({64,3}) must match");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_scale_shape_mismatch_different_rank_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{64, 3, 2};
|
||||
Shape zero_point_shape{64, 3};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of scale argument shape with required shape not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Scale shape ({64,3,2}) and zero point shape ({64,3}) must match");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_zero_point_shape_mismatch_same_rank_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{64, 3};
|
||||
Shape zero_point_shape{64, 4};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of zero point argument shape with required shape not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Scale shape ({64,3}) and zero point shape ({64,4}) must match");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_zero_point_shape_mismatch_different_rank_fails)
|
||||
{
|
||||
Shape batch_shape{64, 3, 480, 640};
|
||||
Shape scale_shape{64, 3};
|
||||
Shape zero_point_shape{64, 3, 2};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of zero point argument shape with required shape not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Scale shape ({64,3}) and zero point shape ({64,3,2}) must match");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(type_prop, quantize_partial_all_rank_dynamic_ok)
|
||||
{
|
||||
PartialShape batch_shape{PartialShape::dynamic()};
|
||||
PartialShape scale_shape{PartialShape::dynamic()};
|
||||
PartialShape zero_point_shape{PartialShape::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1, 2000};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_TRUE(quant->get_output_partial_shape(0).rank().is_dynamic());
|
||||
}
|
||||
|
||||
TEST(type_prop,
|
||||
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_dynamic_ok)
|
||||
{
|
||||
PartialShape batch_shape{PartialShape::dynamic()};
|
||||
PartialShape scale_shape{64, Dimension::dynamic(), 96};
|
||||
PartialShape zero_point_shape{PartialShape::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1, 2000};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_TRUE(quant->get_output_partial_shape(0).rank().is_dynamic());
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_dynamic_axis_count_inconsistent)
|
||||
{
|
||||
PartialShape batch_shape{PartialShape::dynamic()};
|
||||
PartialShape scale_shape{64, Dimension::dynamic(), 96};
|
||||
PartialShape zero_point_shape{PartialShape::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Mismatch of scale / zero point rank with axis count not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(
|
||||
error.what(),
|
||||
"Scale / zero point rank (3) does not match the number of quantization axes (2)");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_ok)
|
||||
{
|
||||
PartialShape batch_shape{PartialShape::dynamic()};
|
||||
PartialShape scale_shape{64, Dimension::dynamic(), 96, Dimension::dynamic()};
|
||||
PartialShape zero_point_shape{64, 22, Dimension::dynamic(), Dimension::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1, 5, 88};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_TRUE(quant->get_output_partial_shape(0).rank().is_dynamic());
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_ranks_inconsistent)
|
||||
{
|
||||
PartialShape batch_shape{PartialShape::dynamic()};
|
||||
PartialShape scale_shape{64, Dimension::dynamic(), 96, Dimension::dynamic()};
|
||||
PartialShape zero_point_shape{64, 22, Dimension::dynamic(), Dimension::dynamic(), 3};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1, 5, 88};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Inconsistent scale / zero point ranks not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(
|
||||
error.what(),
|
||||
"Scale shape ({64,?,96,?}) and zero point shape ({64,22,?,?,3}) must match");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_dims_inconsistent)
|
||||
{
|
||||
PartialShape batch_shape{PartialShape::dynamic()};
|
||||
PartialShape scale_shape{64, Dimension::dynamic(), 96, Dimension::dynamic()};
|
||||
PartialShape zero_point_shape{65, 22, Dimension::dynamic(), Dimension::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{0, 1, 5, 88};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Inconsistent scale / zero point dims not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(
|
||||
error.what(),
|
||||
"Scale shape ({64,?,96,?}) and zero point shape ({65,22,?,?}) must match");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_static_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_ok)
|
||||
{
|
||||
PartialShape batch_shape{2, 4, 6, Dimension::dynamic(), 10, Dimension::dynamic()};
|
||||
PartialShape scale_shape{4, Dimension::dynamic(), Dimension::dynamic()};
|
||||
PartialShape zero_point_shape{Dimension::dynamic(), 8, Dimension::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{1, 3, 5};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
|
||||
ASSERT_EQ(quant->get_output_element_type(0), quantized_type);
|
||||
ASSERT_TRUE(quant->get_output_partial_shape(0).same_scheme(
|
||||
PartialShape{2, 4, 6, 8, 10, Dimension::dynamic()}));
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_static_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_axis_oob)
|
||||
{
|
||||
PartialShape batch_shape{2, 4, 6, Dimension::dynamic(), 10, Dimension::dynamic()};
|
||||
PartialShape scale_shape{4, Dimension::dynamic(), Dimension::dynamic()};
|
||||
PartialShape zero_point_shape{Dimension::dynamic(), 8, Dimension::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{1, 3, 6};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Out-of-bound quantization axis not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(error.what(),
|
||||
"Quantization axis (6) must be less than input shape rank (6)");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
||||
|
||||
TEST(
|
||||
type_prop,
|
||||
quantize_partial_input_static_rank_dynamic_scale_rank_static_dynamic_zero_point_rank_static_dynamic_dims_inconsistent)
|
||||
{
|
||||
PartialShape batch_shape{2, 5, 6, Dimension::dynamic(), 10, Dimension::dynamic()};
|
||||
PartialShape scale_shape{4, Dimension::dynamic(), Dimension::dynamic()};
|
||||
PartialShape zero_point_shape{Dimension::dynamic(), 8, Dimension::dynamic()};
|
||||
element::Type unquantized_type = element::f32;
|
||||
element::Type quantized_type = element::i8;
|
||||
element::Type batch_type = unquantized_type;
|
||||
element::Type scale_type = unquantized_type;
|
||||
element::Type zero_point_type = quantized_type;
|
||||
AxisSet axes{1, 3, 5};
|
||||
auto round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_INFINITY;
|
||||
|
||||
auto batch = make_shared<op::Parameter>(batch_type, batch_shape);
|
||||
auto scale = make_shared<op::Parameter>(scale_type, scale_shape);
|
||||
auto zero_point = make_shared<op::Parameter>(zero_point_type, zero_point_shape);
|
||||
|
||||
try
|
||||
{
|
||||
auto quant =
|
||||
make_shared<op::Quantize>(batch, scale, zero_point, quantized_type, axes, round_mode);
|
||||
FAIL() << "Inconsistent dimensions not detected";
|
||||
}
|
||||
catch (const NodeValidationFailure& error)
|
||||
{
|
||||
EXPECT_HAS_SUBSTRING(
|
||||
error.what(),
|
||||
"Scale / zero point shape ({4,8,?}) must match input shape ({2,5,6,?,10,?}) "
|
||||
"at the quantization axes (AxisSet{1, 3, 5})");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
FAIL() << "Deduced type check failed for unexpected reason";
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user