Extend RIC fusion transformation to support quantized weights (#12028)
This commit is contained in:
@@ -24,13 +24,17 @@ namespace ngraph {
|
||||
namespace pass {
|
||||
namespace ric_attr {
|
||||
|
||||
namespace {
|
||||
std::shared_ptr<opset8::Constant> create_1d_const(const std::vector<int64_t>& values) {
|
||||
return opset8::Constant::create(ov::element::i64, ov::Shape{values.size()}, values);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
// Attribute describes RIC type which we propagate.
|
||||
// Also, it contains callback which can expand this attribute to the real RIC sub-graph.
|
||||
// In addition, attribute has some functionality and properties for propagation.
|
||||
class Attribute {
|
||||
public:
|
||||
using callback_t = std::function<void(Input<Node>, const Attribute&)>;
|
||||
|
||||
Attribute(std::vector<int64_t> order, int64_t axis, bool is_final = false, bool is_initial = false)
|
||||
: m_order(std::move(order)),
|
||||
m_axis(axis),
|
||||
@@ -60,13 +64,27 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
void set_callback(callback_t callback) {
|
||||
m_callback = std::move(callback);
|
||||
}
|
||||
|
||||
// Apply callback to materialize RIC inside graph
|
||||
void operator()(Input<Node> input) const {
|
||||
m_callback(input, *this);
|
||||
void materialize(Input<Node> input) const {
|
||||
if (get_axis() >= input.get_partial_shape().size()) {
|
||||
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is out of range";
|
||||
return;
|
||||
}
|
||||
const auto& axis_dim = input.get_partial_shape()[get_axis()];
|
||||
if (axis_dim.is_dynamic()) {
|
||||
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is dynamic";
|
||||
return;
|
||||
}
|
||||
auto output = input.get_source_output();
|
||||
// Handle case when the RIC order is default
|
||||
auto order = get_order();
|
||||
if (order.empty()) {
|
||||
order.resize(axis_dim.get_length());
|
||||
std::iota(order.rbegin(), order.rend(), 0);
|
||||
}
|
||||
auto gather = std::make_shared<opset8::Gather>(output, create_1d_const(order), create_1d_const({get_axis()}));
|
||||
input.replace_source_output(gather);
|
||||
// TODO: copy runtime info from RIC sub-graph (ticket 88597)
|
||||
}
|
||||
|
||||
bool can_be_fused() const {
|
||||
@@ -129,10 +147,6 @@ private:
|
||||
// true - means that current RIC attribute is an initial attribute and belongs to real RIC output
|
||||
// false - means that current RIC attribute is temporary and need only for propagation
|
||||
bool m_is_initial;
|
||||
|
||||
// Callback specifies the action for RIC materialization for given input port.
|
||||
// In most cases it should insert Gather operation for the input.
|
||||
std::function<void(Input<Node>, const Attribute&)> m_callback = [](Input<Node>, const Attribute&) {};
|
||||
};
|
||||
|
||||
namespace {
|
||||
@@ -287,11 +301,6 @@ public:
|
||||
} // namespace init
|
||||
|
||||
namespace prop {
|
||||
namespace {
|
||||
std::shared_ptr<opset8::Constant> create_const(const std::vector<int64_t>& values) {
|
||||
return opset8::Constant::create(ov::element::i64, ov::Shape{values.size()}, values);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class Binary : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
@@ -366,19 +375,6 @@ public:
|
||||
auto ric_const = ric;
|
||||
ric_const.set_axis(new_axis);
|
||||
ric_const.set_is_final(true);
|
||||
ric_const.set_callback([axis_dim](Input<Node> input, const ric_attr::Attribute& attr) {
|
||||
auto output = input.get_source_output();
|
||||
// Handle case when the RIC order is default
|
||||
auto order = attr.get_order();
|
||||
if (order.empty()) {
|
||||
order.resize(axis_dim);
|
||||
std::iota(order.rbegin(), order.rend(), 0);
|
||||
}
|
||||
auto gather =
|
||||
std::make_shared<opset8::Gather>(output, create_const(order), create_const({attr.get_axis()}));
|
||||
input.replace_source_output(gather);
|
||||
// TODO: copy runtime info from RIC sub-graph
|
||||
});
|
||||
ric_attr::set(input, ric_const);
|
||||
}
|
||||
|
||||
@@ -395,48 +391,16 @@ class Convolution : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
Convolution() {
|
||||
MATCHER_SCOPE(Convolution);
|
||||
// Handle Convolution with Constant and FQ on weights. As Convolution is
|
||||
// a terminal node, so we do not propagate RIC attribute further and insert
|
||||
// final RIC attribute to the weights input.
|
||||
auto input_p = pattern::any_input(ric_attr::has<Output<Node>>);
|
||||
auto pattern_root =
|
||||
pattern::wrap_type<opset8::Convolution>({input_p,
|
||||
pattern::wrap_type<opset8::Constant, opset8::FakeQuantize>(
|
||||
pattern::has_static_dim(1 /*output channel*/))});
|
||||
auto pattern_root = pattern::wrap_type<opset8::Convolution>(
|
||||
{input_p, pattern::any_input(pattern::has_static_dim(1 /*output channel*/))});
|
||||
auto callback = [=](pattern::Matcher& m) {
|
||||
auto conv = m.get_match_root();
|
||||
auto ric = ric_attr::get(conv->input_value(0)).propagate();
|
||||
if (ric.get_axis() != 1)
|
||||
return false;
|
||||
|
||||
ric.set_is_final(true);
|
||||
ric.set_callback([](Input<Node> input, const ric_attr::Attribute& attr) {
|
||||
const auto output_channel_index = 1;
|
||||
auto order = attr.get_order();
|
||||
// Handle case when the RIC order is default
|
||||
if (order.empty()) {
|
||||
order.resize(input.get_partial_shape()[output_channel_index].get_length());
|
||||
std::iota(order.rbegin(), order.rend(), 0);
|
||||
}
|
||||
auto weights = input.get_source_output();
|
||||
auto gather = std::make_shared<opset8::Gather>(weights,
|
||||
create_const(order),
|
||||
create_const({output_channel_index}));
|
||||
input.replace_source_output(gather);
|
||||
// TODO: copy runtime info from RIC sub-graph
|
||||
});
|
||||
|
||||
if (auto fq = std::dynamic_pointer_cast<opset8::FakeQuantize>(conv->get_input_node_shared_ptr(1))) {
|
||||
// Set final RIC attr to the first FQ input
|
||||
ric_attr::set(fq->input(0), ric);
|
||||
|
||||
// Apply Binary transformation for FQ to handle 1..5 inputs
|
||||
ric.set_is_final(false);
|
||||
ric_attr::set(fq->input_value(0), ric); // set ric attr to simulate propagation flow
|
||||
Binary().apply(fq);
|
||||
} else {
|
||||
ric_attr::set(conv->input(1), ric);
|
||||
}
|
||||
ric_attr::set(conv->input(1), ric);
|
||||
return true;
|
||||
};
|
||||
|
||||
@@ -450,8 +414,8 @@ public:
|
||||
GroupConvolution() {
|
||||
MATCHER_SCOPE(GroupConvolution);
|
||||
auto input_p = pattern::any_input(ric_attr::has<Output<Node>>);
|
||||
auto pattern_root = pattern::wrap_type<opset8::GroupConvolution>(
|
||||
{input_p, pattern::wrap_type<opset8::Constant, opset8::FakeQuantize>(pattern::has_static_shape())});
|
||||
auto pattern_root =
|
||||
pattern::wrap_type<opset8::GroupConvolution>({input_p, pattern::any_input(pattern::has_static_shape())});
|
||||
|
||||
auto callback = [=](pattern::Matcher& m) {
|
||||
auto conv = m.get_match_root();
|
||||
@@ -476,28 +440,9 @@ public:
|
||||
|
||||
// Update weights with RIC attribute
|
||||
auto ric_weights = ric;
|
||||
ric_weights.set_is_final(true);
|
||||
ric_weights.set_axis(0);
|
||||
ric_weights.set_callback([](Input<Node> input, const ric_attr::Attribute& attr) {
|
||||
auto weights = input.get_source_output();
|
||||
auto gather = std::make_shared<opset8::Gather>(weights,
|
||||
create_const(attr.get_order()),
|
||||
create_const({0} /* output channel */));
|
||||
input.replace_source_output(gather);
|
||||
// TODO: copy runtime info from RIC sub-graph
|
||||
});
|
||||
|
||||
if (auto fq = std::dynamic_pointer_cast<opset8::FakeQuantize>(conv->get_input_node_shared_ptr(1))) {
|
||||
// Set final RIC attr to the first FQ input
|
||||
ric_attr::set(fq->input(0), ric_weights);
|
||||
|
||||
// Apply Binary transformation for FQ to handle 1..5 inputs
|
||||
ric_weights.set_is_final(false);
|
||||
ric_attr::set(fq->input_value(0), ric_weights); // set ric attr to simulate propagation flow
|
||||
Binary().apply(fq);
|
||||
} else {
|
||||
ric_attr::set(conv->input(1), ric_weights);
|
||||
}
|
||||
ric_attr::set(conv->input(1), ric_weights);
|
||||
|
||||
// Calculate new order for RIC propagation
|
||||
const int64_t output_channels = group * channels;
|
||||
@@ -594,6 +539,9 @@ public:
|
||||
for (const auto& input : m.get_match_root()->input_values()) {
|
||||
if (ric_attr::has(input)) {
|
||||
auto ric = ric_attr::get(input);
|
||||
if (ric.is_final()) {
|
||||
continue;
|
||||
}
|
||||
ric.set_can_be_fused(false);
|
||||
NGRAPH_DEBUG << "Node is unsupported by RIC Fusion: " << *m.get_match_root() << std::endl;
|
||||
}
|
||||
@@ -629,7 +577,7 @@ public:
|
||||
continue;
|
||||
const auto& ric = ric_attr::get(input);
|
||||
if (ric.can_be_fused() && ric.is_final()) {
|
||||
ric(input);
|
||||
ric.materialize(input);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
@@ -682,6 +630,174 @@ public:
|
||||
};
|
||||
} // namespace fuse
|
||||
|
||||
namespace back_prop {
|
||||
class Binary : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
Binary() {
|
||||
MATCHER_SCOPE(Binary);
|
||||
auto fake_quantize_pattern =
|
||||
pattern::wrap_type<opset8::FakeQuantize>({pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank()),
|
||||
pattern::any_input(pattern::has_static_rank())},
|
||||
pattern::has_static_rank());
|
||||
auto binary_elementwise_pattern = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>(
|
||||
{pattern::any_input(pattern::has_static_rank()), pattern::any_input(pattern::has_static_rank())},
|
||||
pattern::has_static_rank());
|
||||
|
||||
auto pattern_root =
|
||||
std::make_shared<pattern::op::Or>(OutputVector{fake_quantize_pattern, binary_elementwise_pattern});
|
||||
|
||||
auto callback = [=](pattern::Matcher& m) {
|
||||
const auto& root = m.get_match_root();
|
||||
const auto& output = root->output(0);
|
||||
auto inputs = output.get_target_inputs();
|
||||
|
||||
// Check if an output of matched root is consumed as input labeled with reverse_input_channel_index
|
||||
std::vector<ric_attr::Attribute> attrs;
|
||||
for (const auto& input : inputs) {
|
||||
if (ric_attr::has(input)) {
|
||||
attrs.push_back(ric_attr::get(input).propagate());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (attrs.empty())
|
||||
return false;
|
||||
|
||||
// Check that all RIC attrs from consumers can be merged and then merge them
|
||||
auto ric = attrs[0];
|
||||
for (const auto& item : attrs) {
|
||||
if (ric.can_be_merged_with(item)) {
|
||||
ric.merge_with(item);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto data_rank = root->get_output_partial_shape(0).rank().get_length();
|
||||
for (const auto& input : root->inputs()) {
|
||||
auto output = input.get_source_output();
|
||||
const auto& shape = output.get_partial_shape();
|
||||
const int64_t& shape_rank = shape.rank().get_length();
|
||||
if (shape_rank > data_rank) {
|
||||
// TODO: handle case when constant input broadcast another one
|
||||
return false;
|
||||
}
|
||||
|
||||
if (data_rank - shape_rank > ric.get_axis()) {
|
||||
// we don't have to insert RIC for constant, so we keep propagating
|
||||
continue;
|
||||
}
|
||||
|
||||
const int64_t& new_axis = ric.get_axis() - (data_rank - shape_rank);
|
||||
const auto& axis_dim = shape[new_axis];
|
||||
if (axis_dim.is_dynamic())
|
||||
return false;
|
||||
if (axis_dim == 1) {
|
||||
// we don't have to insert RIC, because the channel dimension is 1
|
||||
continue;
|
||||
}
|
||||
|
||||
// finally, insert RIC
|
||||
auto ric_const = ric;
|
||||
ric_const.set_axis(new_axis);
|
||||
ric_attr::set(input, ric_const);
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pattern::Matcher>(pattern_root, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
};
|
||||
|
||||
class ConvertPassThrough : public ngraph::pass::MatcherPass {
|
||||
public:
|
||||
ConvertPassThrough() {
|
||||
MATCHER_SCOPE(ConvertPassThrough);
|
||||
auto pattern_root = pattern::wrap_type<opset8::Convert>(pattern::has_static_rank());
|
||||
auto callback = [=](pattern::Matcher& m) {
|
||||
auto root = m.get_match_root();
|
||||
const auto& output = root->output(0);
|
||||
auto consumers = output.get_target_inputs();
|
||||
std::vector<ric_attr::Attribute> attrs;
|
||||
|
||||
for (const auto& consumer : consumers) {
|
||||
if (ric_attr::has(consumer)) {
|
||||
attrs.push_back(ric_attr::get(consumer).propagate());
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto ric = attrs[0];
|
||||
auto data_rank = root->get_output_partial_shape(0).rank().get_length();
|
||||
|
||||
for (const auto& item : attrs) {
|
||||
if (ric.can_be_merged_with(item)) {
|
||||
ric.merge_with(item);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
auto input = root->input(0);
|
||||
auto const_output = input.get_source_output();
|
||||
const auto& shape = const_output.get_partial_shape();
|
||||
if (shape.rank().is_dynamic())
|
||||
return false;
|
||||
|
||||
const int64_t& shape_rank = shape.rank().get_length();
|
||||
const int64_t& new_axis = ric.get_axis() - (data_rank - shape_rank);
|
||||
|
||||
// finally, insert RIC
|
||||
ric.set_axis(new_axis);
|
||||
ric_attr::set(input, ric);
|
||||
return true;
|
||||
};
|
||||
|
||||
auto m = std::make_shared<pattern::Matcher>(pattern_root, matcher_name);
|
||||
register_matcher(m, callback);
|
||||
}
|
||||
};
|
||||
|
||||
class Constant : public ov::pass::ModelPass {
|
||||
public:
|
||||
OPENVINO_RTTI("Constant", "0");
|
||||
Constant() = default;
|
||||
bool run_on_model(const std::shared_ptr<ov::Model>& model) override {
|
||||
RUN_ON_FUNCTION_SCOPE(Constant);
|
||||
for (const auto& node : model->get_ordered_ops()) {
|
||||
if ((std::dynamic_pointer_cast<op::util::BinaryElementwiseArithmetic>(node) ||
|
||||
std::dynamic_pointer_cast<opset8::FakeQuantize>(node) ||
|
||||
std::dynamic_pointer_cast<opset8::Convert>(node)) &&
|
||||
node->get_output_partial_shape(0).rank().is_static()) {
|
||||
continue;
|
||||
}
|
||||
for (const auto& output : node->outputs()) {
|
||||
for (const auto& consumer : output.get_target_inputs()) {
|
||||
if (ric_attr::has(consumer)) {
|
||||
auto ric = ric_attr::get(consumer);
|
||||
if (std::dynamic_pointer_cast<opset8::Constant>(node)) {
|
||||
ric.set_is_final(true);
|
||||
ric_attr::set(consumer, ric);
|
||||
} else { // Unsupported
|
||||
if (!ric.is_final()) {
|
||||
ric.set_can_be_fused(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace back_prop
|
||||
|
||||
bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_ptr<ov::Model>& model) {
|
||||
Manager m;
|
||||
m.set_per_pass_validation(false);
|
||||
@@ -698,6 +814,11 @@ bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_pt
|
||||
ric_prop->add_matcher<prop::PassThrough>();
|
||||
ric_prop->add_matcher<prop::Unsupported>();
|
||||
|
||||
// Handle quantized weights case (dequantize sub-graph is on the weights path)
|
||||
auto ric_back_prop = m.register_pass<ov::pass::BackwardGraphRewrite>();
|
||||
ric_back_prop->add_matcher<back_prop::Binary>();
|
||||
ric_back_prop->add_matcher<back_prop::ConvertPassThrough>();
|
||||
m.register_pass<back_prop::Constant>();
|
||||
// TODO: validate attributes by request
|
||||
|
||||
// Second we fuse available RIC into nodes and remove original nodes related to fused RIC
|
||||
|
||||
@@ -63,14 +63,18 @@ std::shared_ptr<GroupConvolution> create_group_conv_with_gather(Output<Node> inp
|
||||
ov::Strides{1, 1});
|
||||
}
|
||||
|
||||
std::shared_ptr<Convolution> create_conv_with_gather(Output<Node> input, const Shape & weigts_shape, const std::vector<int64_t> & order) {
|
||||
auto gather = std::make_shared<Gather>(create_weights(weigts_shape), Constant::create(element::i64, Shape{order.size()}, order),
|
||||
std::shared_ptr<Convolution> create_conv_with_gather(Output<Node> input, Output<Node> weigts, const std::vector<int64_t> & order) {
|
||||
auto gather = std::make_shared<Gather>(weigts, Constant::create(element::i64, Shape{order.size()}, order),
|
||||
Constant::create(element::i64, Shape{1}, {1}));
|
||||
return std::make_shared<Convolution>(input, gather, ov::Strides{1, 1},
|
||||
ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0},
|
||||
ov::Strides{1, 1});
|
||||
}
|
||||
|
||||
std::shared_ptr<Convolution> create_conv_with_gather(Output<Node> input, const Shape & weigts_shape, const std::vector<int64_t> & order) {
|
||||
return create_conv_with_gather(input, create_weights(weigts_shape), order);
|
||||
}
|
||||
|
||||
std::shared_ptr<Parameter> create_param(const PartialShape & shape) {
|
||||
return std::make_shared<Parameter>(element::f32, shape);
|
||||
}
|
||||
@@ -781,3 +785,394 @@ TEST_F(TransformationTestsF, FuseScaleValues) {
|
||||
disable_rt_info_check();
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionConvertMultiply) {
|
||||
// Input graph:
|
||||
//
|
||||
// Parameter
|
||||
// |F32
|
||||
// |
|
||||
// FakeQuantize
|
||||
// |F32
|
||||
// |
|
||||
// Convert Constant
|
||||
// |U8 |I8
|
||||
// | |
|
||||
// Convert Constant Convert(DCF) Constant
|
||||
// \FP32 /FP32 \FP32 /F32
|
||||
// \ / \ /
|
||||
// Multiply Multiply
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Convolution
|
||||
//
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto gather = create_gather(weights, {2, 1, 0}, 1);
|
||||
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionConvertMultiplyGroupConv) {
|
||||
Shape data_shape{1, 3, 14, 14};
|
||||
{
|
||||
auto data = std::make_shared<opset8::Parameter>(element::f32, data_shape);
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::f32, Shape{3, 3, 1, 4, 4}, {-2});
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
|
||||
auto group_conv = std::make_shared<opset8::GroupConvolution>(data, multiply, Strides{1, 1},
|
||||
CoordinateDiff{1, 1}, CoordinateDiff{3, 3}, Shape{1, 1},
|
||||
op::PadType::EXPLICIT);
|
||||
auto relu = std::make_shared<Relu>(group_conv);
|
||||
auto conv = create_conv(relu, {6, 9, 3, 3});
|
||||
function = std::make_shared<Function>(NodeVector{conv}, ParameterVector{data});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
{
|
||||
auto data = std::make_shared<opset8::Parameter>(element::f32, data_shape);
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::f32, Shape{3, 3, 1, 4, 4}, {-2});
|
||||
auto gather = create_gather(weights, {2, 1, 0}, 1);
|
||||
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
|
||||
auto group_conv = std::make_shared<opset8::GroupConvolution>(data, multiply, Strides{1, 1},
|
||||
CoordinateDiff{1, 1}, CoordinateDiff{3, 3}, Shape{1, 1},
|
||||
op::PadType::EXPLICIT);
|
||||
auto relu = std::make_shared<Relu>(group_conv);
|
||||
std::shared_ptr<Node> weights2 = opset8::Constant::create(element::f32, Shape{6, 9, 3, 3}, {-2});
|
||||
auto gather2 = create_gather(weights2, {6, 7, 8, 3, 4, 5, 0, 1, 2}, 1);
|
||||
auto conv = std::make_shared<opset8::Convolution>(relu, gather2, ov::Strides{1, 1},
|
||||
ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0},
|
||||
ov::Strides{1, 1});
|
||||
function_ref = std::make_shared<Function>(NodeVector{conv}, ParameterVector{data});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionConvertMultiplyNegative1) {
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{1, 1, 1, 1}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto gather = create_gather(weights, {2, 1, 0}, 1);
|
||||
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{1, 1, 1, 1}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionConvertMultiplyNegativeBroadcast) {
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{3, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{4, 3, 1, 1}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{3, 1, 1}, {-2});
|
||||
{
|
||||
auto gather = create_gather(weights, {2, 1, 0}, 0);
|
||||
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{4, 3, 1, 1}, {0.2});
|
||||
auto gather2 = create_gather(scale, {2, 1, 0}, 1);
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, gather2);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionNegativeUnsupported) {
|
||||
{
|
||||
auto input = create_param({1, 3, 64, 64});
|
||||
auto relu = std::make_shared<Relu>(input);
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{6, 3, 3, 3}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
auto relu2 = std::make_shared<Relu>(multiply);
|
||||
weights = relu2;
|
||||
}
|
||||
auto conv = std::make_shared<opset8::Convolution>(relu, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
|
||||
function = std::make_shared<Function>(NodeVector{conv}, ParameterVector{input});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
manager.register_pass<pass::ReverseInputChannelsFusion>();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionConvertMultiplyNonScalarFQInput) {
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
std::make_shared<opset8::Constant>(element::f32, Shape{1, 3, 14, 14}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
auto gather = create_gather(std::make_shared<opset8::Constant>(element::f32, Shape{1, 3, 14, 14}), {2, 1, 0}, 1);
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
gather,
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
gather = create_gather(weights, {2, 1, 0}, 1);
|
||||
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionConvertMultiplySkipIfFQLowNonConst) {
|
||||
{
|
||||
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
|
||||
auto input_low = std::make_shared<opset8::Parameter>(element::f32, Shape{});
|
||||
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
|
||||
input_low,
|
||||
opset8::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
|
||||
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
|
||||
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter, input_low});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
}
|
||||
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionTwoConvolutions) {
|
||||
auto input = create_param({1, 3, 16, 16});
|
||||
{
|
||||
auto conv1 = create_conv(input, create_weights({3, 3, 1, 1}));
|
||||
auto conv2 = create_conv(conv1, create_weights({3, 3, 1, 1}));
|
||||
function = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{input});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
|
||||
manager.register_pass<pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
}
|
||||
{
|
||||
auto conv1_with_gather = create_conv_with_gather(input, create_weights({3, 3, 1, 1}), {2, 1, 0});
|
||||
auto conv2 = create_conv(conv1_with_gather, create_weights({3, 3, 1, 1}));
|
||||
function_ref = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{ input });
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, RICFusionTwoConvolutionsTheSameWeights) {
|
||||
auto input = create_param({1, 3, 16, 16});
|
||||
auto weights = create_weights({3, 3, 1, 1});
|
||||
{
|
||||
auto conv1 = create_conv(input, weights);
|
||||
auto conv2 = create_conv(conv1, weights);
|
||||
function = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{input});
|
||||
apply_reverse_input_channels(function, {{0, "NCHW"}});
|
||||
|
||||
manager.register_pass<pass::ReverseInputChannelsFusion>();
|
||||
disable_rt_info_check();
|
||||
}
|
||||
{
|
||||
auto conv1_with_gather = create_conv_with_gather(input, weights, {2, 1, 0});
|
||||
auto conv2 = create_conv(conv1_with_gather, weights);
|
||||
function_ref = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{ input });
|
||||
}
|
||||
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user