Fix MarkDequantizationSubgraph when Mul has Convert on both inputs (#14997)
Ticket: 100042
This commit is contained in:
@@ -4,7 +4,9 @@
|
||||
|
||||
#include <openvino/opsets/opset10.hpp>
|
||||
#include <openvino/pass/constant_folding.hpp>
|
||||
#include <transformations/disable_decompression_convert_constant_folding.hpp>
|
||||
#include <transformations/low_precision/mark_dequantization_subgraph.hpp>
|
||||
#include <transformations/rt_info/decompression.hpp>
|
||||
#include <transformations/rt_info/dequantization_node.hpp>
|
||||
|
||||
#include "common_test_utils/ngraph_test_utils.hpp"
|
||||
@@ -78,8 +80,8 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) {
|
||||
function = std::make_shared<Model>(conv, ParameterVector{parameter});
|
||||
}
|
||||
|
||||
manager.register_pass<ov::pass::MarkDequantizationSubgraph>();
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
|
||||
manager.register_pass<pass::ConstantFolding>();
|
||||
|
||||
{
|
||||
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
|
||||
@@ -95,27 +97,27 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformation) {
|
||||
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
|
||||
auto zero_point = opset10::Constant::create(element::u8, Shape{}, {127});
|
||||
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
|
||||
ov::pass::disable_constant_folding(convert_on_zero_point);
|
||||
pass::disable_constant_folding(convert_on_zero_point);
|
||||
auto subtract = std::make_shared<opset10::Subtract>(second_convert, convert_on_zero_point);
|
||||
ov::mark_as_dequantization_node(subtract);
|
||||
mark_as_dequantization_node(subtract);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
|
||||
ov::mark_as_dequantization_node(multiply);
|
||||
mark_as_dequantization_node(multiply);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
|
||||
ov::pass::disable_constant_folding(convert);
|
||||
pass::disable_constant_folding(convert);
|
||||
auto zero_point = opset10::Constant::create(element::i8, Shape{}, {127});
|
||||
auto convert_on_zero_point = std::make_shared<opset10::Convert>(zero_point, element::f32);
|
||||
ov::pass::disable_constant_folding(convert_on_zero_point);
|
||||
pass::disable_constant_folding(convert_on_zero_point);
|
||||
auto subtract = std::make_shared<opset10::Subtract>(convert, convert_on_zero_point);
|
||||
ov::mark_as_dequantization_node(subtract);
|
||||
mark_as_dequantization_node(subtract);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(subtract, scale);
|
||||
ov::mark_as_dequantization_node(multiply);
|
||||
mark_as_dequantization_node(multiply);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
@@ -190,8 +192,8 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
|
||||
function = std::make_shared<Model>(conv, ParameterVector{parameter});
|
||||
}
|
||||
|
||||
manager.register_pass<ov::pass::MarkDequantizationSubgraph>();
|
||||
manager.register_pass<ov::pass::ConstantFolding>();
|
||||
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
|
||||
manager.register_pass<pass::ConstantFolding>();
|
||||
|
||||
{
|
||||
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
|
||||
@@ -207,17 +209,128 @@ TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPoint
|
||||
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(second_convert, scale);
|
||||
ov::mark_as_dequantization_node(multiply);
|
||||
mark_as_dequantization_node(multiply);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
|
||||
ov::pass::disable_constant_folding(convert);
|
||||
pass::disable_constant_folding(convert);
|
||||
auto scale = opset10::Constant::create(element::f32, Shape{}, {0.2});
|
||||
auto multiply = std::make_shared<opset10::Multiply>(convert, scale);
|
||||
ov::mark_as_dequantization_node(multiply);
|
||||
mark_as_dequantization_node(multiply);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset10::Convolution>(activations,
|
||||
weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function_ref = std::make_shared<Model>(conv, ParameterVector{parameter});
|
||||
}
|
||||
|
||||
comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES);
|
||||
comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS);
|
||||
}
|
||||
|
||||
TEST_F(TransformationTestsF, MarkDequantizationSubgraphTransformationNoZeroPointFP16) {
|
||||
// Input graph:
|
||||
//
|
||||
// Parameter
|
||||
// |F32
|
||||
// |
|
||||
// FakeQuantize
|
||||
// |F32
|
||||
// |
|
||||
// Convert Constant Constant Constant
|
||||
// |U8 |FP16 |I8 /FP16
|
||||
// | | | /
|
||||
// Convert Convert Convert(DCF) Convert
|
||||
// \FP32 /FP32 \FP32 /F32
|
||||
// \ / \ /
|
||||
// Multiply Multiply
|
||||
// \FP32 /FP32
|
||||
// \ /
|
||||
// Convolution
|
||||
//
|
||||
// After MarkDequantizationSubgraph all Multiply nodes from above graph
|
||||
// are marked with 'DequantizationNode' attribute.
|
||||
// Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute
|
||||
|
||||
{
|
||||
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
|
||||
std::shared_ptr<Node> activations =
|
||||
std::make_shared<opset10::FakeQuantize>(parameter,
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {254}),
|
||||
255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset10::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
|
||||
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
|
||||
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
|
||||
mark_as_decompression(scale_convert);
|
||||
auto multiply = std::make_shared<opset10::Multiply>(second_convert, scale_convert);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
|
||||
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
|
||||
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
|
||||
mark_as_decompression(scale_convert);
|
||||
auto multiply = std::make_shared<opset10::Multiply>(convert, scale_convert);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
auto conv = std::make_shared<opset10::Convolution>(activations,
|
||||
weights,
|
||||
Strides{1, 1},
|
||||
CoordinateDiff{0, 0},
|
||||
CoordinateDiff{0, 0},
|
||||
Strides{1, 1});
|
||||
function = std::make_shared<Model>(conv, ParameterVector{parameter});
|
||||
}
|
||||
|
||||
manager.register_pass<pass::MarkDequantizationSubgraph>(element::TypeVector{element::u8, element::i8});
|
||||
manager.register_pass<pass::DisableDecompressionConvertConstantFolding>();
|
||||
manager.register_pass<pass::ConstantFolding>();
|
||||
|
||||
{
|
||||
auto parameter = std::make_shared<opset10::Parameter>(element::f32, Shape{1, 16, 14, 14});
|
||||
std::shared_ptr<Node> activations =
|
||||
std::make_shared<opset10::FakeQuantize>(parameter,
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {20}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {0}),
|
||||
opset10::Constant::create(element::f32, Shape{}, {254}),
|
||||
255);
|
||||
{
|
||||
auto first_convert = std::make_shared<opset10::Convert>(activations, element::u8);
|
||||
auto second_convert = std::make_shared<opset10::Convert>(first_convert, element::f32);
|
||||
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
|
||||
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
|
||||
mark_as_decompression(scale_convert);
|
||||
auto multiply = std::make_shared<opset10::Multiply>(second_convert, scale_convert);
|
||||
mark_as_dequantization_node(multiply);
|
||||
activations = multiply;
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> weights = opset10::Constant::create(element::i8, Shape{4, 16, 1, 1}, {-2});
|
||||
{
|
||||
auto convert = std::make_shared<opset10::Convert>(weights, element::f32);
|
||||
pass::disable_constant_folding(convert);
|
||||
auto scale = opset10::Constant::create(element::f16, Shape{}, {0.2});
|
||||
auto scale_convert = std::make_shared<opset10::Convert>(scale, element::f32);
|
||||
mark_as_decompression(scale_convert);
|
||||
auto multiply = std::make_shared<opset10::Multiply>(convert, scale_convert);
|
||||
mark_as_dequantization_node(multiply);
|
||||
weights = multiply;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ namespace pass {
|
||||
class TRANSFORMATIONS_API MarkDequantizationSubgraph : public MatcherPass {
|
||||
public:
|
||||
OPENVINO_RTTI("MarkDequantizationSubgraph", "0");
|
||||
MarkDequantizationSubgraph(const element::TypeVector& precisions = {});
|
||||
MarkDequantizationSubgraph(const element::TypeVector& precisions);
|
||||
};
|
||||
} // namespace pass
|
||||
} // namespace ov
|
||||
|
||||
@@ -34,39 +34,49 @@ ov::pass::MarkDequantizationSubgraph::MarkDequantizationSubgraph(const element::
|
||||
|
||||
ov::matcher_pass_callback callback = [=](pattern::Matcher& m) -> bool {
|
||||
const auto& pattern_map = m.get_pattern_value_map();
|
||||
const auto& convert = pattern_map.at(convert_pattern).get_node_shared_ptr();
|
||||
const auto& input_precision = convert->get_input_element_type(0);
|
||||
auto convert = pattern_map.at(convert_pattern).get_node_shared_ptr();
|
||||
auto input = pattern_map.at(input_pattern).get_node_shared_ptr();
|
||||
const auto multiply = m.get_match_root();
|
||||
|
||||
// validation by Convert operation input precisions
|
||||
if (!precisions.empty()) {
|
||||
if (std::find(precisions.begin(), precisions.end(), input_precision) == precisions.end()) {
|
||||
return false;
|
||||
auto subtract_it = pattern_map.find(subtract_pattern);
|
||||
if (subtract_it == pattern_map.end()) {
|
||||
for (size_t i = 0; i < multiply->get_input_size(); i++) {
|
||||
const auto node = ov::as_type_ptr<opset10::Convert>(multiply->get_input_node_shared_ptr(i));
|
||||
if (node && std::find(precisions.begin(), precisions.end(), node->get_input_element_type(0)) !=
|
||||
precisions.end()) {
|
||||
convert = node;
|
||||
input = convert->get_input_node_shared_ptr(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto& input = pattern_map.at(input_pattern);
|
||||
// validation by Convert operation input precisions
|
||||
const auto& input_precision = input->get_output_element_type(0);
|
||||
if (std::find(precisions.begin(), precisions.end(), input_precision) == precisions.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<Node*> tmp;
|
||||
if (ngraph::could_propagate(input, tmp)) {
|
||||
// disable ConstantFolding if dequantization subgraph is on constant data
|
||||
ov::disable_constant_folding(convert);
|
||||
}
|
||||
|
||||
auto subtract_it = pattern_map.find(subtract_pattern);
|
||||
if (subtract_it != pattern_map.end()) {
|
||||
// mark Subtract as dequantization node
|
||||
ov::mark_as_dequantization_node(subtract_it->second.get_node_shared_ptr());
|
||||
auto zero_point = pattern_map.at(zero_point_pattern).get_node();
|
||||
auto zero_point = pattern_map.at(zero_point_pattern).get_node_shared_ptr();
|
||||
if (ov::is_type<opset10::Convert>(zero_point) && input_precision == zero_point->get_input_element_type(0) &&
|
||||
ov::is_type<opset10::Constant>(zero_point->get_input_node_ptr(0))) {
|
||||
// disable ConstantFolding also for Convert on zero_point
|
||||
// so we don't have to constantfold it and then convert it back to
|
||||
// low precision in LP transformations
|
||||
ov::disable_constant_folding(zero_point->shared_from_this());
|
||||
ov::disable_constant_folding(zero_point);
|
||||
}
|
||||
}
|
||||
|
||||
// mark Multiply as dequantization node
|
||||
ov::mark_as_dequantization_node(m.get_match_root());
|
||||
ov::mark_as_dequantization_node(multiply);
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
@@ -519,7 +519,10 @@ void CNNNetworkNGraphImpl::reshape(const std::map<std::string, ngraph::PartialSh
|
||||
REGISTER_PASS(manager, ConvertMatrixNmsToMatrixNmsIE, false)
|
||||
REGISTER_PASS(manager, ConvertNMS9ToNMSIEInternal)
|
||||
REGISTER_PASS(manager, ConvertGP9ToGPIEInternal)
|
||||
REGISTER_PASS(manager, MarkDequantizationSubgraph)
|
||||
REGISTER_PASS(
|
||||
manager,
|
||||
MarkDequantizationSubgraph,
|
||||
ov::element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4})
|
||||
REGISTER_PASS(manager, DisableDecompressionConvertConstantFolding)
|
||||
REGISTER_PASS(manager, ConstantFolding)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user