Fix MarkDequantizationSubgraph when Mul has Convert on both inputs (#14997)

Ticket: 100042
This commit is contained in:
Mateusz Tabaka
2023-01-26 09:55:23 +01:00
committed by GitHub
parent c0f524e3bd
commit 3ad92084d6
4 changed files with 153 additions and 27 deletions

View File

@@ -4,7 +4,9 @@
#include <openvino/opsets/opset10.hpp>
#include <openvino/pass/constant_folding.hpp>
#include <transformations/disable_decompression_convert_constant_folding.hpp>
#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
#include <transformations/rt_info/decompression.hpp>
#include <transformations/rt_info/dequantization_node.hpp>
#include "common_test_utils/ngraph_test_utils.hpp"
@@ -78,8 +80,8 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) {
function = std::make_shared<Model>(conv, ParameterVector{parameter});
}
manager.register_pass<ov::pass::MarkDequantizationSubgraph>();
manager.register_pass<ov::pass::ConstantFolding>();
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
manager.register_pass<pass::ConstantFolding>();
{
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
@@ -95,27 +97,27 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) {
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
auto zero_point = opset10::Constant::create(element::u8, Shape{}, {127});
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
ov::pass::disable_constant_folding(convert_on_zero_point);
pass::disable_constant_folding(convert_on_zero_point);
auto subtract = std::make_shared<opset10::Subtract>(second_convert, convert_on_zero_point);
ov::mark_as_dequantization_node(subtract);
mark_as_dequantization_node(subtract);
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
ov::mark_as_dequantization_node(multiply);
mark_as_dequantization_node(multiply);
activations = multiply;
}
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
{
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
ov::pass::disable_constant_folding(convert);
pass::disable_constant_folding(convert);
auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127});
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
ov::pass::disable_constant_folding(convert_on_zero_point);
pass::disable_constant_folding(convert_on_zero_point);
auto subtract = std::make_shared<opset10::Subtract>(convert, convert_on_zero_point);
ov::mark_as_dequantization_node(subtract);
mark_as_dequantization_node(subtract);
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
ov::mark_as_dequantization_node(multiply);
mark_as_dequantization_node(multiply);
weights = multiply;
}
@@ -190,8 +192,8 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
function = std::make_shared<Model>(conv, ParameterVector{parameter});
}
manager.register_pass<ov::pass::MarkDequantizationSubgraph>();
manager.register_pass<ov::pass::ConstantFolding>();
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
manager.register_pass<pass::ConstantFolding>();
{
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
@@ -207,17 +209,128 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset10::Multiply>(second_convert, scale);
ov::mark_as_dequantization_node(multiply);
mark_as_dequantization_node(multiply);
activations = multiply;
}
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
{
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
ov::pass::disable_constant_folding(convert);
pass::disable_constant_folding(convert);
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
auto multiply = std::make_shared<opset10::Multiply>(convert, scale);
ov::mark_as_dequantization_node(multiply);
mark_as_dequantization_node(multiply);
weights = multiply;
}
auto conv = std::make_shared<opset10::Convolution>(activations,
weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function_ref = std::make_shared<Model>(conv, ParameterVector{parameter});
}
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS);
}
TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPointFP16) {
// Input graph:
//
// Parameter
// |F32
// |
// FakeQuantize
// |F32
// |
// Convert Constant Constant Constant
// |U8 |FP16 |I8 /FP16
// | | | /
// Convert Convert Convert(DCF) Convert
// \FP32 /FP32 \FP32 /F32
// \ / \ /
// Multiply Multiply
// \FP32 /FP32
// \ /
// Convolution
//
// After MarkDequantizationSubgraph all Multiply nodes from above graph
// are marked with 'DequantizationNode' attribute.
// Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute
{
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
std::shared_ptr<Node> activations =
std::make_shared<opset10::FakeQuantize>(parameter,
opset10::Constant::create(element::f32, Shape{}, {0}),
opset10::Constant::create(element::f32, Shape{}, {20}),
opset10::Constant::create(element::f32, Shape{}, {0}),
opset10::Constant::create(element::f32, Shape{}, {254}),
255);
{
auto first_convert = std::make_shared<opset10::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
mark_as_decompression(scale_convert);
auto multiply = std::make_shared<opset10::Multiply>(second_convert, scale_convert);
activations = multiply;
}
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
{
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
mark_as_decompression(scale_convert);
auto multiply = std::make_shared<opset10::Multiply>(convert, scale_convert);
weights = multiply;
}
auto conv = std::make_shared<opset10::Convolution>(activations,
weights,
Strides{1, 1},
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
function = std::make_shared<Model>(conv, ParameterVector{parameter});
}
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
manager.register_pass<pass::DisableDecompressionConvertConstantFolding>();
manager.register_pass<pass::ConstantFolding>();
{
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
std::shared_ptr<Node> activations =
std::make_shared<opset10::FakeQuantize>(parameter,
opset10::Constant::create(element::f32, Shape{}, {0}),
opset10::Constant::create(element::f32, Shape{}, {20}),
opset10::Constant::create(element::f32, Shape{}, {0}),
opset10::Constant::create(element::f32, Shape{}, {254}),
255);
{
auto first_convert = std::make_shared<opset10::Convert>(activations, element::u8);
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
mark_as_decompression(scale_convert);
auto multiply = std::make_shared<opset10::Multiply>(second_convert, scale_convert);
mark_as_dequantization_node(multiply);
activations = multiply;
}
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
{
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
pass::disable_constant_folding(convert);
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
mark_as_decompression(scale_convert);
auto multiply = std::make_shared<opset10::Multiply>(convert, scale_convert);
mark_as_dequantization_node(multiply);
weights = multiply;
}

View File

@@ -22,7 +22,7 @@ namespace pass {
class TRANSFORMATIONS_API MarkDequantizationSubgraph : public MatcherPass {
public:
OPENVINO_RTTI("MarkDequantizationSubgraph", "0");
MarkDequantizationSubgraph(const element::TypeVector& precisions = {});
MarkDequantizationSubgraph(const element::TypeVector& precisions);
};
} // namespace pass
} // namespace ov

View File

@@ -34,39 +34,49 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::
ov::matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool {
const auto& pattern_map = m.get_pattern_value_map();
const auto& convert = pattern_map.at(convert_pattern).get_node_shared_ptr();
const auto& input_precision = convert->get_input_element_type(0);
auto convert = pattern_map.at(convert_pattern).get_node_shared_ptr();
auto input = pattern_map.at(input_pattern).get_node_shared_ptr();
const auto multiply = m.get_match_root();
// validation by Convert operation input precisions
if (!precisions.empty()) {
if (std::find(precisions.begin(), precisions.end(), input_precision) == precisions.end()) {
return false;
auto subtract_it = pattern_map.find(subtract_pattern);
if (subtract_it == pattern_map.end()) {
for (size_t i = 0; i < multiply->get_input_size(); i++) {
const auto node = ov::as_type_ptr<opset10::Convert>(multiply->get_input_node_shared_ptr(i));
if (node && std::find(precisions.begin(), precisions.end(), node->get_input_element_type(0)) !=
precisions.end()) {
convert = node;
input = convert->get_input_node_shared_ptr(0);
}
}
}
const auto& input = pattern_map.at(input_pattern);
// validation by Convert operation input precisions
const auto& input_precision = input->get_output_element_type(0);
if (std::find(precisions.begin(), precisions.end(), input_precision) == precisions.end()) {
return false;
}
std::vector<Node*> tmp;
if (ngraph::could_propagate(input, tmp)) {
// disable ConstantFolding if dequantization subgraph is on constant data
ov::disable_constant_folding(convert);
}
auto subtract_it = pattern_map.find(subtract_pattern);
if (subtract_it != pattern_map.end()) {
// mark Subtract as dequantization node
ov::mark_as_dequantization_node(subtract_it->second.get_node_shared_ptr());
auto zero_point = pattern_map.at(zero_point_pattern).get_node();
auto zero_point = pattern_map.at(zero_point_pattern).get_node_shared_ptr();
if (ov::is_type<opset10::Convert>(zero_point) && input_precision == zero_point->get_input_element_type(0) &&
ov::is_type<opset10::Constant>(zero_point->get_input_node_ptr(0))) {
// disable ConstantFolding also for Convert on zero_point
// so we don't have to constantfold it and then convert it back to
// low precision in LP transformations
ov::disable_constant_folding(zero_point->shared_from_this());
ov::disable_constant_folding(zero_point);
}
}
// mark Multiply as dequantization node
ov::mark_as_dequantization_node(m.get_match_root());
ov::mark_as_dequantization_node(multiply);
return false;
};

View File

@@ -519,7 +519,10 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
REGISTER_PASS(manager, ConvertMatrixNmsToMatrixNmsIE, false)
REGISTER_PASS(manager, ConvertNMS9ToNMSIEInternal)
REGISTER_PASS(manager, ConvertGP9ToGPIEInternal)
REGISTER_PASS(manager, MarkDequantizationSubgraph)
REGISTER_PASS(
manager,
MarkDequantizationSubgraph,
ov::element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4})
REGISTER_PASS(manager, DisableDecompressionConvertConstantFolding)
REGISTER_PASS(manager, ConstantFolding)