Extend RIC fusion transformation to support quantized weights (#12028)

This commit is contained in:
Mateusz Bencer
2022-08-05 12:03:49 +02:00
committed by GitHub
parent 141e1f8dd7
commit 3d337ccbbb
2 changed files with 606 additions and 90 deletions

View File

@@ -24,13 +24,17 @@ namespace ngraph {
namespace pass {
namespace ric_attr {
namespace {
std::shared_ptr<opset8::Constant> create_1d_const(const std::vector<int64_t>& values) {
return opset8::Constant::create(ov::element::i64, ov::Shape{values.size()}, values);
}
} // namespace
// Attribute describes RIC type which we propagate.
// Also, it contains callback which can expand this attribute to the real RIC sub-graph.
// In addition, attribute has some functionality and properties for propagation.
class Attribute {
public:
using callback_t = std::function<void(Input<Node>, const Attribute&)>;
Attribute(std::vector<int64_t> order, int64_t axis, bool is_final = false, bool is_initial = false)
: m_order(std::move(order)),
m_axis(axis),
@@ -60,13 +64,27 @@ public:
});
}
void set_callback(callback_t callback) {
m_callback = std::move(callback);
}
// Apply callback to materialize RIC inside graph
void operator()(Input<Node> input) const {
m_callback(input, *this);
void materialize(Input<Node> input) const {
if (get_axis() >= input.get_partial_shape().size()) {
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is out of range";
return;
}
const auto& axis_dim = input.get_partial_shape()[get_axis()];
if (axis_dim.is_dynamic()) {
NGRAPH_DEBUG << "Axis calculated to materialize RIC on input: " << input << " is dynamic";
return;
}
auto output = input.get_source_output();
// Handle case when the RIC order is default
auto order = get_order();
if (order.empty()) {
order.resize(axis_dim.get_length());
std::iota(order.rbegin(), order.rend(), 0);
}
auto gather = std::make_shared<opset8::Gather>(output, create_1d_const(order), create_1d_const({get_axis()}));
input.replace_source_output(gather);
// TODO: copy runtime info from RIC sub-graph (ticket 88597)
}
bool can_be_fused() const {
@@ -129,10 +147,6 @@ private:
// true - means that current RIC attribute is an initial attribute and belongs to real RIC output
// false - means that current RIC attribute is temporary and need only for propagation
bool m_is_initial;
// Callback specifies the action for RIC materialization for given input port.
// In most cases it should insert Gather operation for the input.
std::function<void(Input<Node>, const Attribute&)> m_callback = [](Input<Node>, const Attribute&) {};
};
namespace {
@@ -287,11 +301,6 @@ public:
} // namespace init
namespace prop {
namespace {
std::shared_ptr<opset8::Constant> create_const(const std::vector<int64_t>& values) {
return opset8::Constant::create(ov::element::i64, ov::Shape{values.size()}, values);
}
} // namespace
class Binary : public ngraph::pass::MatcherPass {
public:
@@ -366,19 +375,6 @@ public:
auto ric_const = ric;
ric_const.set_axis(new_axis);
ric_const.set_is_final(true);
ric_const.set_callback([axis_dim](Input<Node> input, const ric_attr::Attribute& attr) {
auto output = input.get_source_output();
// Handle case when the RIC order is default
auto order = attr.get_order();
if (order.empty()) {
order.resize(axis_dim);
std::iota(order.rbegin(), order.rend(), 0);
}
auto gather =
std::make_shared<opset8::Gather>(output, create_const(order), create_const({attr.get_axis()}));
input.replace_source_output(gather);
// TODO: copy runtime info from RIC sub-graph
});
ric_attr::set(input, ric_const);
}
@@ -395,48 +391,16 @@ class Convolution : public ngraph::pass::MatcherPass {
public:
Convolution() {
MATCHER_SCOPE(Convolution);
// Handle Convolution with Constant and FQ on weights. As Convolution is
// a terminal node, so we do not propagate RIC attribute further and insert
// final RIC attribute to the weights input.
auto input_p = pattern::any_input(ric_attr::has<Output<Node>>);
auto pattern_root =
pattern::wrap_type<opset8::Convolution>({input_p,
pattern::wrap_type<opset8::Constant, opset8::FakeQuantize>(
pattern::has_static_dim(1 /*output channel*/))});
auto pattern_root = pattern::wrap_type<opset8::Convolution>(
{input_p, pattern::any_input(pattern::has_static_dim(1 /*output channel*/))});
auto callback = [=](pattern::Matcher& m) {
auto conv = m.get_match_root();
auto ric = ric_attr::get(conv->input_value(0)).propagate();
if (ric.get_axis() != 1)
return false;
ric.set_is_final(true);
ric.set_callback([](Input<Node> input, const ric_attr::Attribute& attr) {
const auto output_channel_index = 1;
auto order = attr.get_order();
// Handle case when the RIC order is default
if (order.empty()) {
order.resize(input.get_partial_shape()[output_channel_index].get_length());
std::iota(order.rbegin(), order.rend(), 0);
}
auto weights = input.get_source_output();
auto gather = std::make_shared<opset8::Gather>(weights,
create_const(order),
create_const({output_channel_index}));
input.replace_source_output(gather);
// TODO: copy runtime info from RIC sub-graph
});
if (auto fq = std::dynamic_pointer_cast<opset8::FakeQuantize>(conv->get_input_node_shared_ptr(1))) {
// Set final RIC attr to the first FQ input
ric_attr::set(fq->input(0), ric);
// Apply Binary transformation for FQ to handle 1..5 inputs
ric.set_is_final(false);
ric_attr::set(fq->input_value(0), ric); // set ric attr to simulate propagation flow
Binary().apply(fq);
} else {
ric_attr::set(conv->input(1), ric);
}
ric_attr::set(conv->input(1), ric);
return true;
};
@@ -450,8 +414,8 @@ public:
GroupConvolution() {
MATCHER_SCOPE(GroupConvolution);
auto input_p = pattern::any_input(ric_attr::has<Output<Node>>);
auto pattern_root = pattern::wrap_type<opset8::GroupConvolution>(
{input_p, pattern::wrap_type<opset8::Constant, opset8::FakeQuantize>(pattern::has_static_shape())});
auto pattern_root =
pattern::wrap_type<opset8::GroupConvolution>({input_p, pattern::any_input(pattern::has_static_shape())});
auto callback = [=](pattern::Matcher& m) {
auto conv = m.get_match_root();
@@ -476,28 +440,9 @@ public:
// Update weights with RIC attribute
auto ric_weights = ric;
ric_weights.set_is_final(true);
ric_weights.set_axis(0);
ric_weights.set_callback([](Input<Node> input, const ric_attr::Attribute& attr) {
auto weights = input.get_source_output();
auto gather = std::make_shared<opset8::Gather>(weights,
create_const(attr.get_order()),
create_const({0} /* output channel */));
input.replace_source_output(gather);
// TODO: copy runtime info from RIC sub-graph
});
if (auto fq = std::dynamic_pointer_cast<opset8::FakeQuantize>(conv->get_input_node_shared_ptr(1))) {
// Set final RIC attr to the first FQ input
ric_attr::set(fq->input(0), ric_weights);
// Apply Binary transformation for FQ to handle 1..5 inputs
ric_weights.set_is_final(false);
ric_attr::set(fq->input_value(0), ric_weights); // set ric attr to simulate propagation flow
Binary().apply(fq);
} else {
ric_attr::set(conv->input(1), ric_weights);
}
ric_attr::set(conv->input(1), ric_weights);
// Calculate new order for RIC propagation
const int64_t output_channels = group * channels;
@@ -594,6 +539,9 @@ public:
for (const auto& input : m.get_match_root()->input_values()) {
if (ric_attr::has(input)) {
auto ric = ric_attr::get(input);
if (ric.is_final()) {
continue;
}
ric.set_can_be_fused(false);
NGRAPH_DEBUG << "Node is unsupported by RIC Fusion: " << *m.get_match_root() << std::endl;
}
@@ -629,7 +577,7 @@ public:
continue;
const auto& ric = ric_attr::get(input);
if (ric.can_be_fused() && ric.is_final()) {
ric(input);
ric.materialize(input);
}
}
return false;
@@ -682,6 +630,174 @@ public:
};
} // namespace fuse
namespace back_prop {
class Binary : public ngraph::pass::MatcherPass {
public:
Binary() {
MATCHER_SCOPE(Binary);
auto fake_quantize_pattern =
pattern::wrap_type<opset8::FakeQuantize>({pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank()),
pattern::any_input(pattern::has_static_rank())},
pattern::has_static_rank());
auto binary_elementwise_pattern = pattern::wrap_type<op::util::BinaryElementwiseArithmetic>(
{pattern::any_input(pattern::has_static_rank()), pattern::any_input(pattern::has_static_rank())},
pattern::has_static_rank());
auto pattern_root =
std::make_shared<pattern::op::Or>(OutputVector{fake_quantize_pattern, binary_elementwise_pattern});
auto callback = [=](pattern::Matcher& m) {
const auto& root = m.get_match_root();
const auto& output = root->output(0);
auto inputs = output.get_target_inputs();
// Check if an output of matched root is consumed as input labeled with reverse_input_channel_index
std::vector<ric_attr::Attribute> attrs;
for (const auto& input : inputs) {
if (ric_attr::has(input)) {
attrs.push_back(ric_attr::get(input).propagate());
} else {
return false;
}
}
if (attrs.empty())
return false;
// Check that all RIC attrs from consumers can be merged and then merge them
auto ric = attrs[0];
for (const auto& item : attrs) {
if (ric.can_be_merged_with(item)) {
ric.merge_with(item);
} else {
return false;
}
}
auto data_rank = root->get_output_partial_shape(0).rank().get_length();
for (const auto& input : root->inputs()) {
auto output = input.get_source_output();
const auto& shape = output.get_partial_shape();
const int64_t& shape_rank = shape.rank().get_length();
if (shape_rank > data_rank) {
// TODO: handle case when constant input broadcast another one
return false;
}
if (data_rank - shape_rank > ric.get_axis()) {
// we don't have to insert RIC for constant, so we keep propagating
continue;
}
const int64_t& new_axis = ric.get_axis() - (data_rank - shape_rank);
const auto& axis_dim = shape[new_axis];
if (axis_dim.is_dynamic())
return false;
if (axis_dim == 1) {
// we don't have to insert RIC, because the channel dimension is 1
continue;
}
// finally, insert RIC
auto ric_const = ric;
ric_const.set_axis(new_axis);
ric_attr::set(input, ric_const);
}
return true;
};
auto m = std::make_shared<pattern::Matcher>(pattern_root, matcher_name);
register_matcher(m, callback);
}
};
class ConvertPassThrough : public ngraph::pass::MatcherPass {
public:
ConvertPassThrough() {
MATCHER_SCOPE(ConvertPassThrough);
auto pattern_root = pattern::wrap_type<opset8::Convert>(pattern::has_static_rank());
auto callback = [=](pattern::Matcher& m) {
auto root = m.get_match_root();
const auto& output = root->output(0);
auto consumers = output.get_target_inputs();
std::vector<ric_attr::Attribute> attrs;
for (const auto& consumer : consumers) {
if (ric_attr::has(consumer)) {
attrs.push_back(ric_attr::get(consumer).propagate());
} else {
return false;
}
}
auto ric = attrs[0];
auto data_rank = root->get_output_partial_shape(0).rank().get_length();
for (const auto& item : attrs) {
if (ric.can_be_merged_with(item)) {
ric.merge_with(item);
} else {
return false;
}
}
auto input = root->input(0);
auto const_output = input.get_source_output();
const auto& shape = const_output.get_partial_shape();
if (shape.rank().is_dynamic())
return false;
const int64_t& shape_rank = shape.rank().get_length();
const int64_t& new_axis = ric.get_axis() - (data_rank - shape_rank);
// finally, insert RIC
ric.set_axis(new_axis);
ric_attr::set(input, ric);
return true;
};
auto m = std::make_shared<pattern::Matcher>(pattern_root, matcher_name);
register_matcher(m, callback);
}
};
class Constant : public ov::pass::ModelPass {
public:
OPENVINO_RTTI("Constant", "0");
Constant() = default;
bool run_on_model(const std::shared_ptr<ov::Model>& model) override {
RUN_ON_FUNCTION_SCOPE(Constant);
for (const auto& node : model->get_ordered_ops()) {
if ((std::dynamic_pointer_cast<op::util::BinaryElementwiseArithmetic>(node) ||
std::dynamic_pointer_cast<opset8::FakeQuantize>(node) ||
std::dynamic_pointer_cast<opset8::Convert>(node)) &&
node->get_output_partial_shape(0).rank().is_static()) {
continue;
}
for (const auto& output : node->outputs()) {
for (const auto& consumer : output.get_target_inputs()) {
if (ric_attr::has(consumer)) {
auto ric = ric_attr::get(consumer);
if (std::dynamic_pointer_cast<opset8::Constant>(node)) {
ric.set_is_final(true);
ric_attr::set(consumer, ric);
} else { // Unsupported
if (!ric.is_final()) {
ric.set_can_be_fused(false);
}
}
}
}
}
}
return true;
}
};
} // namespace back_prop
bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_ptr<ov::Model>& model) {
Manager m;
m.set_per_pass_validation(false);
@@ -698,6 +814,11 @@ bool ngraph::pass::ReverseInputChannelsFusion::run_on_model(const std::shared_pt
ric_prop->add_matcher<prop::PassThrough>();
ric_prop->add_matcher<prop::Unsupported>();
// Handle quantized weights case (dequantize sub-graph is on the weights path)
auto ric_back_prop = m.register_pass<ov::pass::BackwardGraphRewrite>();
ric_back_prop->add_matcher<back_prop::Binary>();
ric_back_prop->add_matcher<back_prop::ConvertPassThrough>();
m.register_pass<back_prop::Constant>();
// TODO: validate attributes by request
// Second we fuse available RIC into nodes and remove original nodes related to fused RIC

View File

@@ -63,14 +63,18 @@ std::shared_ptr<GroupConvolution> create_group_conv_with_gather(Output<Node> inp
ov::Strides{1, 1});
}
std::shared_ptr<Convolution> create_conv_with_gather(Output<Node> input, const Shape & weigts_shape, const std::vector<int64_t> & order) {
auto gather = std::make_shared<Gather>(create_weights(weigts_shape), Constant::create(element::i64, Shape{order.size()}, order),
std::shared_ptr<Convolution> create_conv_with_gather(Output<Node> input, Output<Node> weigts, const std::vector<int64_t> & order) {
auto gather = std::make_shared<Gather>(weigts, Constant::create(element::i64, Shape{order.size()}, order),
Constant::create(element::i64, Shape{1}, {1}));
return std::make_shared<Convolution>(input, gather, ov::Strides{1, 1},
ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0},
ov::Strides{1, 1});
}
std::shared_ptr<Convolution> create_conv_with_gather(Output<Node> input, const Shape & weigts_shape, const std::vector<int64_t> & order) {
return create_conv_with_gather(input, create_weights(weigts_shape), order);
}
std::shared_ptr<Parameter> create_param(const PartialShape & shape) {
return std::make_shared<Parameter>(element::f32, shape);
}
@@ -781,3 +785,394 @@ TEST_F(TransformationTestsF, FuseScaleValues) {
disable_rt_info_check();
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionConvertMultiply) {
// Input graph:
//
// Parameter
// |F32
// |
// FakeQuantize
// |F32
// |
// Convert Constant
// |U8 |I8
// | |
// Convert Constant Convert(DCF) Constant
// \FP32 /FP32 \FP32 /F32
// \ / \ /
// Multiply Multiply
// \FP32 /FP32
// \ /
// Convolution
//
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
apply_reverse_input_channels(function, {{0, "NCHW"}});
}
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto gather = create_gather(weights, {2, 1, 0}, 1);
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionConvertMultiplyGroupConv) {
Shape data_shape{1, 3, 14, 14};
{
auto data = std::make_shared<opset8::Parameter>(element::f32, data_shape);
std::shared_ptr<Node> weights = opset8::Constant::create(element::f32, Shape{3, 3, 1, 4, 4}, {-2});
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
auto group_conv = std::make_shared<opset8::GroupConvolution>(data, multiply, Strides{1, 1},
CoordinateDiff{1, 1}, CoordinateDiff{3, 3}, Shape{1, 1},
op::PadType::EXPLICIT);
auto relu = std::make_shared<Relu>(group_conv);
auto conv = create_conv(relu, {6, 9, 3, 3});
function = std::make_shared<Function>(NodeVector{conv}, ParameterVector{data});
apply_reverse_input_channels(function, {{0, "NCHW"}});
}
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
{
auto data = std::make_shared<opset8::Parameter>(element::f32, data_shape);
std::shared_ptr<Node> weights = opset8::Constant::create(element::f32, Shape{3, 3, 1, 4, 4}, {-2});
auto gather = create_gather(weights, {2, 1, 0}, 1);
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
auto group_conv = std::make_shared<opset8::GroupConvolution>(data, multiply, Strides{1, 1},
CoordinateDiff{1, 1}, CoordinateDiff{3, 3}, Shape{1, 1},
op::PadType::EXPLICIT);
auto relu = std::make_shared<Relu>(group_conv);
std::shared_ptr<Node> weights2 = opset8::Constant::create(element::f32, Shape{6, 9, 3, 3}, {-2});
auto gather2 = create_gather(weights2, {6, 7, 8, 3, 4, 5, 0, 1, 2}, 1);
auto conv = std::make_shared<opset8::Convolution>(relu, gather2, ov::Strides{1, 1},
ov::CoordinateDiff{0, 0}, ov::CoordinateDiff{0, 0},
ov::Strides{1, 1});
function_ref = std::make_shared<Function>(NodeVector{conv}, ParameterVector{data});
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionConvertMultiplyNegative1) {
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{1, 1, 1, 1}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
apply_reverse_input_channels(function, {{0, "NCHW"}});
}
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto gather = create_gather(weights, {2, 1, 0}, 1);
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{1, 1, 1, 1}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionConvertMultiplyNegativeBroadcast) {
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{3, 1, 1}, {-2});
{
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{4, 3, 1, 1}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
apply_reverse_input_channels(function, {{0, "NCHW"}});
}
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{3, 1, 1}, {-2});
{
auto gather = create_gather(weights, {2, 1, 0}, 0);
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{4, 3, 1, 1}, {0.2});
auto gather2 = create_gather(scale, {2, 1, 0}, 1);
auto multiply = std::make_shared<opset8::Multiply>(convert, gather2);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionNegativeUnsupported) {
{
auto input = create_param({1, 3, 64, 64});
auto relu = std::make_shared<Relu>(input);
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{6, 3, 3, 3}, {-2});
{
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
auto relu2 = std::make_shared<Relu>(multiply);
weights = relu2;
}
auto conv = std::make_shared<opset8::Convolution>(relu, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function = std::make_shared<Function>(NodeVector{conv}, ParameterVector{input});
apply_reverse_input_channels(function, {{0, "NCHW"}});
manager.register_pass<pass::ReverseInputChannelsFusion>();
}
}
TEST_F(TransformationTestsF, RICFusionConvertMultiplyNonScalarFQInput) {
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
std::make_shared<opset8::Constant>(element::f32, Shape{1, 3, 14, 14}),
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
apply_reverse_input_channels(function, {{0, "NCHW"}});
}
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
auto gather = create_gather(std::make_shared<opset8::Constant>(element::f32, Shape{1, 3, 14, 14}), {2, 1, 0}, 1);
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
gather,
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
gather = create_gather(weights, {2, 1, 0}, 1);
auto convert = std::make_shared<opset8::Convert>(gather, element::f32);
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function_ref = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter});
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionConvertMultiplySkipIfFQLowNonConst) {
{
auto parameter = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 3, 14, 14});
auto input_low = std::make_shared<opset8::Parameter>(element::f32, Shape{});
std::shared_ptr<Node> activations = std::make_shared<opset8::FakeQuantize>(parameter,
input_low,
opset8::Constant::create(element::f32, Shape{}, {20}),
opset8::Constant::create(element::f32, Shape{}, {0}),
opset8::Constant::create(element::f32, Shape{}, {254}), 255);
{
auto first_convert = std::make_shared<opset8::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset8::Convert>(first_convert, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(second_convert, scale);
activations = multiply;
}
std::shared_ptr<Node> weights = opset8::Constant::create(element::i8, Shape{4, 3, 1, 1}, {-2});
{
auto convert = std::make_shared<opset8::Convert>(weights, element::f32);
auto scale = opset8::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset8::Multiply>(convert, scale);
weights = multiply;
}
auto conv = std::make_shared<opset8::Convolution>(activations, weights, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1});
function = std::make_shared<ngraph::Function>(conv, ParameterVector{parameter, input_low});
apply_reverse_input_channels(function, {{0, "NCHW"}});
}
manager.register_pass<ngraph::pass::ReverseInputChannelsFusion>();
}
TEST_F(TransformationTestsF, RICFusionTwoConvolutions) {
auto input = create_param({1, 3, 16, 16});
{
auto conv1 = create_conv(input, create_weights({3, 3, 1, 1}));
auto conv2 = create_conv(conv1, create_weights({3, 3, 1, 1}));
function = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{input});
apply_reverse_input_channels(function, {{0, "NCHW"}});
manager.register_pass<pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
}
{
auto conv1_with_gather = create_conv_with_gather(input, create_weights({3, 3, 1, 1}), {2, 1, 0});
auto conv2 = create_conv(conv1_with_gather, create_weights({3, 3, 1, 1}));
function_ref = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{ input });
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}
TEST_F(TransformationTestsF, RICFusionTwoConvolutionsTheSameWeights) {
auto input = create_param({1, 3, 16, 16});
auto weights = create_weights({3, 3, 1, 1});
{
auto conv1 = create_conv(input, weights);
auto conv2 = create_conv(conv1, weights);
function = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{input});
apply_reverse_input_channels(function, {{0, "NCHW"}});
manager.register_pass<pass::ReverseInputChannelsFusion>();
disable_rt_info_check();
}
{
auto conv1_with_gather = create_conv_with_gather(input, weights, {2, 1, 0});
auto conv2 = create_conv(conv1_with_gather, weights);
function_ref = std::make_shared<Function>(NodeVector{ conv2 }, ParameterVector{ input });
}
comparator.enable(FunctionsComparator::CmpValues::ACCURACY);
}