[core]Migrate FakeQuantize operator to new API (#20895)
* Migrate FakeQuantize operator to new API * Minor refactor in FakeQuantize reference re-use existing functions in `get_inner_stride`
This commit is contained in:
parent
87cef53088
commit
6210deba49
@ -67,9 +67,7 @@ public:
|
||||
m_auto_broadcast = auto_broadcast;
|
||||
}
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
|
||||
OPENVINO_SUPPRESS_DEPRECATED_END
|
||||
bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override;
|
||||
bool has_evaluate() const override;
|
||||
bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override {
|
||||
return false;
|
||||
|
@ -318,19 +318,15 @@ std::tuple<size_t, size_t> get_inner_stride(size_t num_output_elements,
|
||||
return (last == 1 && dim > 1) || (last > 1 && dim == 1);
|
||||
});
|
||||
if (it == shape.rend()) {
|
||||
const size_t num_elements = shape_size(shape);
|
||||
return std::tuple<size_t, size_t>{
|
||||
num_elements,
|
||||
last == 1 ? current_output_inner_stride : std::min(current_output_inner_stride, num_elements)};
|
||||
const auto num_elements = shape_size(shape);
|
||||
return {num_elements,
|
||||
last == 1 ? current_output_inner_stride : std::min(current_output_inner_stride, num_elements)};
|
||||
}
|
||||
const size_t idx = std::distance(it, shape.rbegin()) + static_cast<int64_t>(shape.size());
|
||||
const size_t inner_stride =
|
||||
std::accumulate(shape.begin() + idx, shape.end(), static_cast<size_t>(1), std::multiplies<size_t>());
|
||||
const size_t output_inner_stride = std::accumulate(output_shape.begin() + output_shape.size() - shape.size() + idx,
|
||||
output_shape.end(),
|
||||
static_cast<size_t>(1),
|
||||
std::multiplies<size_t>());
|
||||
return std::tuple<size_t, size_t>{inner_stride, std::min(current_output_inner_stride, output_inner_stride)};
|
||||
const auto idx = std::distance(it, shape.rbegin()) + static_cast<std::ptrdiff_t>(shape.size());
|
||||
const auto inner_stride = shape_size(shape.begin() + idx, shape.end());
|
||||
const auto output_inner_stride =
|
||||
shape_size(output_shape.begin() + (output_shape.size() - shape.size() + idx), output_shape.end());
|
||||
return {inner_stride, std::min(current_output_inner_stride, output_inner_stride)};
|
||||
}
|
||||
|
||||
template <typename T, typename F>
|
||||
|
@ -2,51 +2,81 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "ngraph/op/fake_quantize.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include "openvino/op/fake_quantize.hpp"
|
||||
|
||||
#include "element_visitor.hpp"
|
||||
#include "itt.hpp"
|
||||
#include "ngraph/attribute_visitor.hpp"
|
||||
#include "ngraph/op/constant.hpp"
|
||||
#include "ngraph/op/convert.hpp"
|
||||
#include "ngraph/op/select.hpp"
|
||||
#include "ngraph/shape.hpp"
|
||||
#include "ngraph/type/element_type.hpp"
|
||||
#include "openvino/reference/fake_quantize.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace ngraph;
|
||||
namespace ov {
|
||||
namespace op {
|
||||
namespace fake_quantize {
|
||||
|
||||
op::FakeQuantize::FakeQuantize() : Op(), m_levels() {}
|
||||
struct Evaluate : element::NoAction<bool> {
|
||||
using element::NoAction<bool>::visit;
|
||||
|
||||
op::FakeQuantize::FakeQuantize(const Output<Node>& data,
|
||||
const Output<Node>& input_low,
|
||||
const Output<Node>& input_high,
|
||||
const Output<Node>& output_low,
|
||||
const Output<Node>& output_high,
|
||||
size_t levels,
|
||||
const AutoBroadcastSpec& auto_broadcast)
|
||||
template <element::Type_t ET, class T = fundamental_type_for<ET>>
|
||||
static result_type visit(const Tensor& arg0,
|
||||
const Tensor& arg1,
|
||||
const Tensor& arg2,
|
||||
const Tensor& arg3,
|
||||
const Tensor& arg4,
|
||||
Tensor& out,
|
||||
const Shape& shape0,
|
||||
const Shape& shape1,
|
||||
const Shape& shape2,
|
||||
const Shape& shape3,
|
||||
const Shape& shape4,
|
||||
const size_t levels,
|
||||
const AutoBroadcastSpec& broadcast_spec) {
|
||||
reference::fake_quantize(arg0.data<const T>(),
|
||||
arg1.data<const T>(),
|
||||
arg2.data<const T>(),
|
||||
arg3.data<const T>(),
|
||||
arg4.data<const T>(),
|
||||
out.data<T>(),
|
||||
shape0,
|
||||
shape1,
|
||||
shape2,
|
||||
shape3,
|
||||
shape4,
|
||||
levels,
|
||||
broadcast_spec);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace fake_quantize
|
||||
namespace v0 {
|
||||
|
||||
FakeQuantize::FakeQuantize() : Op(), m_levels() {}
|
||||
|
||||
FakeQuantize::FakeQuantize(const Output<Node>& data,
|
||||
const Output<Node>& input_low,
|
||||
const Output<Node>& input_high,
|
||||
const Output<Node>& output_low,
|
||||
const Output<Node>& output_high,
|
||||
size_t levels,
|
||||
const AutoBroadcastSpec& auto_broadcast)
|
||||
: Op({data, input_low, input_high, output_low, output_high}),
|
||||
m_levels(levels),
|
||||
m_auto_broadcast(auto_broadcast) {
|
||||
constructor_validate_and_infer_types();
|
||||
}
|
||||
|
||||
void op::FakeQuantize::validate_and_infer_types() {
|
||||
void FakeQuantize::validate_and_infer_types() {
|
||||
OV_OP_SCOPE(v0_FakeQuantize_validate_and_infer_types);
|
||||
ov::PartialShape data_pshape = get_input_partial_shape(0);
|
||||
auto data_pshape = get_input_partial_shape(0);
|
||||
|
||||
for (auto i = 1; i <= 4; i++) {
|
||||
if (m_auto_broadcast.m_type == op::AutoBroadcastType::NONE) {
|
||||
NODE_VALIDATION_CHECK(this,
|
||||
ov::PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
|
||||
PartialShape::merge_into(data_pshape, get_input_partial_shape(i)),
|
||||
"Argument shapes are inconsistent.");
|
||||
} else if (m_auto_broadcast.m_type == op::AutoBroadcastType::NUMPY ||
|
||||
m_auto_broadcast.m_type == op::AutoBroadcastType::PDPD) {
|
||||
NODE_VALIDATION_CHECK(
|
||||
this,
|
||||
ov::PartialShape::broadcast_merge_into(data_pshape, get_input_partial_shape(i), m_auto_broadcast),
|
||||
PartialShape::broadcast_merge_into(data_pshape, get_input_partial_shape(i), m_auto_broadcast),
|
||||
"Argument shapes are inconsistent.");
|
||||
} else {
|
||||
NODE_VALIDATION_CHECK(this, false, "Unsupported auto broadcast specification");
|
||||
@ -55,103 +85,64 @@ void op::FakeQuantize::validate_and_infer_types() {
|
||||
set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
|
||||
}
|
||||
|
||||
bool ngraph::op::v0::FakeQuantize::visit_attributes(AttributeVisitor& visitor) {
|
||||
bool FakeQuantize::visit_attributes(AttributeVisitor& visitor) {
|
||||
OV_OP_SCOPE(v0_FakeQuantize_visit_attributes);
|
||||
visitor.on_attribute("levels", m_levels);
|
||||
visitor.on_attribute("auto_broadcast", m_auto_broadcast);
|
||||
return true;
|
||||
}
|
||||
|
||||
shared_ptr<Node> op::FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const {
|
||||
std::shared_ptr<Node> FakeQuantize::clone_with_new_inputs(const OutputVector& new_args) const {
|
||||
OV_OP_SCOPE(v0_FakeQuantize_clone_with_new_inputs);
|
||||
check_new_args_count(this, new_args);
|
||||
return make_shared<FakeQuantize>(new_args.at(0), // X
|
||||
new_args.at(1), // input_low
|
||||
new_args.at(2), // input_high
|
||||
new_args.at(3), // output_low
|
||||
new_args.at(4), // output_high
|
||||
m_levels,
|
||||
m_auto_broadcast);
|
||||
return std::make_shared<FakeQuantize>(new_args.at(0), // X
|
||||
new_args.at(1), // input_low
|
||||
new_args.at(2), // input_high
|
||||
new_args.at(3), // output_low
|
||||
new_args.at(4), // output_high
|
||||
m_levels,
|
||||
m_auto_broadcast);
|
||||
}
|
||||
|
||||
OPENVINO_SUPPRESS_DEPRECATED_START
|
||||
namespace fakequantizeop {
|
||||
namespace {
|
||||
template <element::Type_t ET>
|
||||
bool evaluate(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& arg4,
|
||||
const HostTensorPtr& out,
|
||||
const ngraph::op::FakeQuantize* parent) {
|
||||
bool FakeQuantize::evaluate(TensorVector& outputs, const TensorVector& inputs) const {
|
||||
OV_OP_SCOPE(v0_FakeQuantize_evaluate);
|
||||
using T = typename element_type_traits<ET>::value_type;
|
||||
out->set_shape(arg0->get_shape());
|
||||
out->set_element_type(arg0->get_element_type());
|
||||
ov::reference::fake_quantize<T>(arg0->get_data_ptr<const T>(),
|
||||
arg1->get_data_ptr<const T>(),
|
||||
arg2->get_data_ptr<const T>(),
|
||||
arg3->get_data_ptr<const T>(),
|
||||
arg4->get_data_ptr<const T>(),
|
||||
out->get_data_ptr<T>(),
|
||||
arg0->get_shape(),
|
||||
arg1->get_shape(),
|
||||
arg2->get_shape(),
|
||||
arg3->get_shape(),
|
||||
arg4->get_shape(),
|
||||
parent->get_levels(),
|
||||
parent->get_auto_broadcast());
|
||||
return true;
|
||||
OPENVINO_ASSERT(outputs.size() == 1);
|
||||
OPENVINO_ASSERT(inputs.size() == 5);
|
||||
|
||||
const auto& shape0 = inputs[0].get_shape();
|
||||
outputs[0].set_shape(shape0);
|
||||
|
||||
using namespace ov::element;
|
||||
return IfTypeOf<f16, f32, i32, i64, u32, u64>::apply<fake_quantize::Evaluate>(inputs[0].get_element_type(),
|
||||
inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
inputs[4],
|
||||
outputs[0],
|
||||
shape0,
|
||||
inputs[1].get_shape(),
|
||||
inputs[2].get_shape(),
|
||||
inputs[3].get_shape(),
|
||||
inputs[4].get_shape(),
|
||||
get_levels(),
|
||||
get_auto_broadcast());
|
||||
}
|
||||
|
||||
bool evaluate_fakequantize(const HostTensorPtr& arg0,
|
||||
const HostTensorPtr& arg1,
|
||||
const HostTensorPtr& arg2,
|
||||
const HostTensorPtr& arg3,
|
||||
const HostTensorPtr& arg4,
|
||||
const HostTensorPtr& out,
|
||||
const ngraph::op::FakeQuantize* parent) {
|
||||
bool rc = true;
|
||||
switch (arg0->get_element_type()) {
|
||||
OPENVINO_TYPE_CASE(evaluate_fakequantize, i32, arg0, arg1, arg2, arg3, arg4, out, parent);
|
||||
OPENVINO_TYPE_CASE(evaluate_fakequantize, i64, arg0, arg1, arg2, arg3, arg4, out, parent);
|
||||
OPENVINO_TYPE_CASE(evaluate_fakequantize, u32, arg0, arg1, arg2, arg3, arg4, out, parent);
|
||||
OPENVINO_TYPE_CASE(evaluate_fakequantize, u64, arg0, arg1, arg2, arg3, arg4, out, parent);
|
||||
OPENVINO_TYPE_CASE(evaluate_fakequantize, f16, arg0, arg1, arg2, arg3, arg4, out, parent);
|
||||
OPENVINO_TYPE_CASE(evaluate_fakequantize, f32, arg0, arg1, arg2, arg3, arg4, out, parent);
|
||||
default:
|
||||
rc = false;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
} // namespace
|
||||
} // namespace fakequantizeop
|
||||
|
||||
bool ngraph::op::FakeQuantize::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
|
||||
OV_OP_SCOPE(v0_FakeQuantize_evaluate);
|
||||
return fakequantizeop::evaluate_fakequantize(inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
inputs[3],
|
||||
inputs[4],
|
||||
outputs[0],
|
||||
this);
|
||||
}
|
||||
|
||||
bool ngraph::op::FakeQuantize::has_evaluate() const {
|
||||
bool FakeQuantize::has_evaluate() const {
|
||||
OV_OP_SCOPE(v0_FakeQuantize_has_evaluate);
|
||||
switch (get_input_element_type(0)) {
|
||||
case ngraph::element::i32:
|
||||
case ngraph::element::i64:
|
||||
case ngraph::element::u32:
|
||||
case ngraph::element::u64:
|
||||
case ngraph::element::f16:
|
||||
case ngraph::element::f32:
|
||||
case element::f16:
|
||||
case element::f32:
|
||||
case element::i32:
|
||||
case element::i64:
|
||||
case element::u32:
|
||||
case element::u64:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace v0
|
||||
} // namespace op
|
||||
} // namespace ov
|
||||
|
Loading…
Reference in New Issue
Block a user