[core]Migrate FakeQuantize operator to new API (#20895)

* Migrate FakeQuantize operator to new API

* Minor refactor in FakeQuantize reference
re-use existing functions in `get_inner_stride`
This commit is contained in:
Pawel Raasz 2023-11-08 10:52:21 +01:00 committed by GitHub
parent 87cef53088
commit 6210deba49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 104 additions and 119 deletions

View File

@ -67,9 +67,7 @@ public:
m_auto_broadcast = auto_broadcast;
}
OPENVINO_SUPPRESS_DEPRECATED_START
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
OPENVINO_SUPPRESS_DEPRECATED_END
bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override;
bool has_evaluate() const override;
bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override {
return false;

View File

@ -318,19 +318,15 @@ std::tuple<size_t, size_t> get_inner_stride(size_t num_output_elements,
return (last == 1 && dim > 1) || (last > 1 && dim == 1);
});
if (it == shape.rend()) {
const size_t num_elements = shape_size(shape);
return std::tuple<size_t, size_t>{
num_elements,
last == 1 ? current_output_inner_stride : std::min(current_output_inner_stride, num_elements)};
const auto num_elements = shape_size(shape);
return {num_elements,
last == 1 ? current_output_inner_stride : std::min(current_output_inner_stride, num_elements)};
}
const size_t idx = std::distance(it, shape.rbegin()) + static_cast<int64_t>(shape.size());
const size_t inner_stride =
std::accumulate(shape.begin() + idx, shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
const size_t output_inner_stride = std::accumulate(output_shape.begin() + output_shape.size() - shape.size() + idx,
output_shape.end(),
static_cast<size_t>(1),
std::multiplies<size_t>());
return std::tuple<size_t, size_t>{inner_stride, std::min(current_output_inner_stride, output_inner_stride)};
const auto idx = std::distance(it, shape.rbegin()) + static_cast<std::ptrdiff_t>(shape.size());
const auto inner_stride = shape_size(shape.begin() + idx, shape.end());
const auto output_inner_stride =
shape_size(output_shape.begin() + (output_shape.size() - shape.size() + idx), output_shape.end());
return {inner_stride, std::min(current_output_inner_stride, output_inner_stride)};
}
template <typename T, typename F>

View File

@ -2,51 +2,81 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/op/fake_quantize.hpp"
#include <memory>
#include "openvino/op/fake_quantize.hpp"
#include "element_visitor.hpp"
#include "itt.hpp"
#include "ngraph/attribute_visitor.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
#include "openvino/reference/fake_quantize.hpp"
using namespace std;
using namespace ngraph;
namespace ov {
namespace op {
namespace fake_quantize {
op::FakeQuantize::FakeQuantize() : Op(), m_levels() {}
struct Evaluate : element::NoAction<bool> {
using element::NoAction<bool>::visit;
op::FakeQuantize::FakeQuantize(const Output<Node>& data,
const Output<Node>& input_low,
const Output<Node>& input_high,
const Output<Node>& output_low,
const Output<Node>& output_high,
size_t levels,
const AutoBroadcastSpec& auto_broadcast)
template <element::Type_t ET, class T = fundamental_type_for<ET>>
static result_type visit(const Tensor& arg0,
const Tensor& arg1,
const Tensor& arg2,
const Tensor& arg3,
const Tensor& arg4,
Tensor& out,
const Shape& shape0,
const Shape& shape1,
const Shape& shape2,
const Shape& shape3,
const Shape& shape4,
const size_t levels,
const AutoBroadcastSpec& broadcast_spec) {
reference::fake_quantize(arg0.data<const T>(),
arg1.data<const T>(),
arg2.data<const T>(),
arg3.data<const T>(),
arg4.data<const T>(),
out.data<T>(),
shape0,
shape1,
shape2,
shape3,
shape4,
levels,
broadcast_spec);
return true;
}
};
} // namespace fake_quantize
namespace v0 {
FakeQuantize::FakeQuantize() : Op(), m_levels() {}
FakeQuantize::FakeQuantize(const Output<Node>& data,
const Output<Node>& input_low,
const Output<Node>& input_high,
const Output<Node>& output_low,
const Output<Node>& output_high,
size_t levels,
const AutoBroadcastSpec& auto_broadcast)
: Op({data, input_low, input_high, output_low, output_high}),
m_levels(levels),
m_auto_broadcast(auto_broadcast) {
constructor_validate_and_infer_types();
}
void op::FakeQuantize::validate_and_infer_types() {
void FakeQuantize::validate_and_infer_types() {
OV_OP_SCOPE(v0_FakeQuantize_validate_and_infer_types);
ov::PartialShape data_pshape = get_input_partial_shape(0);
auto data_pshape = get_input_partial_shape(0);
for (auto i = 1; i <= 4; i++) {
if (m_auto_broadcast.m_type == op::AutoBroadcastType::NONE) {
NODE_VALIDATION_CHECK(this,
ov::PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
"Argument shapes are inconsistent.");
} else if (m_auto_broadcast.m_type == op::AutoBroadcastType::NUMPY ||
m_auto_broadcast.m_type == op::AutoBroadcastType::PDPD) {
NODE_VALIDATION_CHECK(
this,
ov::PartialShape::broadcast_merge_into(data_pshape, get_input_partial_shape(i), m_auto_broadcast),
PartialShape::broadcast_merge_into(data_pshape, get_input_partial_shape(i), m_auto_broadcast),
"Argument shapes are inconsistent.");
} else {
NODE_VALIDATION_CHECK(this, false, "Unsupported auto broadcast specification");
@ -55,103 +85,64 @@ void op::FakeQuantize::validate_and_infer_types() {
set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
}
bool ngraph::op::v0::FakeQuantize::visit_attributes(AttributeVisitor& visitor) {
bool FakeQuantize::visit_attributes(AttributeVisitor& visitor) {
OV_OP_SCOPE(v0_FakeQuantize_visit_attributes);
visitor.on_attribute("levels", m_levels);
visitor.on_attribute("auto_broadcast", m_auto_broadcast);
return true;
}
shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const {
std::shared_ptr<Node> FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const {
OV_OP_SCOPE(v0_FakeQuantize_clone_with_new_inputs);
check_new_args_count(this, new_args);
return make_shared<FakeQuantize>(new_args.at(0), // X
new_args.at(1), // input_low
new_args.at(2), // input_high
new_args.at(3), // output_low
new_args.at(4), // output_high
m_levels,
m_auto_broadcast);
return std::make_shared<FakeQuantize>(new_args.at(0), // X
new_args.at(1), // input_low
new_args.at(2), // input_high
new_args.at(3), // output_low
new_args.at(4), // output_high
m_levels,
m_auto_broadcast);
}
OPENVINO_SUPPRESS_DEPRECATED_START
namespace fakequantizeop {
namespace {
template <element::Type_t ET>
bool evaluate(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& arg4,
const HostTensorPtr& out,
const ngraph::op::FakeQuantize* parent) {
bool FakeQuantize::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
OV_OP_SCOPE(v0_FakeQuantize_evaluate);
using T = typename element_type_traits<ET>::value_type;
out->set_shape(arg0->get_shape());
out->set_element_type(arg0->get_element_type());
ov::reference::fake_quantize<T>(arg0->get_data_ptr<const T>(),
arg1->get_data_ptr<const T>(),
arg2->get_data_ptr<const T>(),
arg3->get_data_ptr<const T>(),
arg4->get_data_ptr<const T>(),
out->get_data_ptr<T>(),
arg0->get_shape(),
arg1->get_shape(),
arg2->get_shape(),
arg3->get_shape(),
arg4->get_shape(),
parent->get_levels(),
parent->get_auto_broadcast());
return true;
OPENVINO_ASSERT(outputs.size() == 1);
OPENVINO_ASSERT(inputs.size() == 5);
const auto& shape0 = inputs[0].get_shape();
outputs[0].set_shape(shape0);
using namespace ov::element;
return IfTypeOf<f16, f32, i32, i64, u32, u64>::apply<fake_quantize::Evaluate>(inputs[0].get_element_type(),
inputs[0],
inputs[1],
inputs[2],
inputs[3],
inputs[4],
outputs[0],
shape0,
inputs[1].get_shape(),
inputs[2].get_shape(),
inputs[3].get_shape(),
inputs[4].get_shape(),
get_levels(),
get_auto_broadcast());
}
bool evaluate_fakequantize(const HostTensorPtr& arg0,
const HostTensorPtr& arg1,
const HostTensorPtr& arg2,
const HostTensorPtr& arg3,
const HostTensorPtr& arg4,
const HostTensorPtr& out,
const ngraph::op::FakeQuantize* parent) {
bool rc = true;
switch (arg0->get_element_type()) {
OPENVINO_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent);
OPENVINO_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent);
OPENVINO_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent);
OPENVINO_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent);
OPENVINO_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent);
OPENVINO_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent);
default:
rc = false;
break;
}
return rc;
}
} // namespace
} // namespace fakequantizeop
bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
OV_OP_SCOPE(v0_FakeQuantize_evaluate);
return fakequantizeop::evaluate_fakequantize(inputs[0],
inputs[1],
inputs[2],
inputs[3],
inputs[4],
outputs[0],
this);
}
bool ngraph::op::FakeQuantize::has_evaluate() const {
bool FakeQuantize::has_evaluate() const {
OV_OP_SCOPE(v0_FakeQuantize_has_evaluate);
switch (get_input_element_type(0)) {
case ngraph::element::i32:
case ngraph::element::i64:
case ngraph::element::u32:
case ngraph::element::u64:
case ngraph::element::f16:
case ngraph::element::f32:
case element::f16:
case element::f32:
case element::i32:
case element::i64:
case element::u32:
case element::u64:
return true;
default:
break;
return false;
}
return false;
}
} // namespace v0
} // namespace op
} // namespace ov