diff --git a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py index 25c4458ed51..361930837a2 100644 --- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py +++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py @@ -33,7 +33,7 @@ def load_hardware_config(config): raise ValueError('Unsupported target_device : {}'.format(config['target_device'])) hardware_config_path = __HARDWARE_CONFIG_DIR / __HARDWARE_CONFIGS_MAP.get(config['target_device'], "cpu.json") - return HardwareConfig.from_json(hardware_config_path.as_posix()) + return HardwareConfig.from_json(hardware_config_path.as_posix(), config['target_device']) def append_estimator_configs(quantization_configs, is_weights, config, opt_conf=None): diff --git a/tools/pot/openvino/tools/pot/app/argparser.py b/tools/pot/openvino/tools/pot/app/argparser.py index d5f8fdf8f77..c570a172381 100644 --- a/tools/pot/openvino/tools/pot/app/argparser.py +++ b/tools/pot/openvino/tools/pot/app/argparser.py @@ -110,7 +110,7 @@ def get_common_argument_parser(): '--keep-uncompressed-weights', action='store_true', default=False, - help='Keep Convolution, Deconvolution and FullyConnected weights uncompressed') + help='Keep Convolution, ConvolutionBackpropData and MatMul weights uncompressed') parser.add_argument( '--data-source', diff --git a/tools/pot/openvino/tools/pot/configs/hardware_config.py b/tools/pot/openvino/tools/pot/configs/hardware_config.py index ee005c0819a..0a03dedd517 100644 --- a/tools/pot/openvino/tools/pot/configs/hardware_config.py +++ b/tools/pot/openvino/tools/pot/configs/hardware_config.py @@ -33,11 +33,13 @@ class HardwareConfig(list): return config @classmethod - def from_json(cls, path): + def from_json(cls, path, target_device=None): with open(path) as f: json_config = json.load(f, object_pairs_hook=OrderedDict) + if target_device is None: + target_device = json_config['target_device'] hw_config = cls() - hw_config.append(Dict(('target_device', json_config['target_device']))) + hw_config.append(Dict(('target_device', target_device))) hw_config.append(Dict(('primary_bitwidth', json_config.get('primary_bitwidth', 8)), ('input_priority_types', json_config.get('input_priority_types', [])))) diff --git a/tools/pot/openvino/tools/pot/graph/cpu_patterns.py b/tools/pot/openvino/tools/pot/graph/cpu_patterns.py index abfe96fdf99..77be537502b 100644 --- a/tools/pot/openvino/tools/pot/graph/cpu_patterns.py +++ b/tools/pot/openvino/tools/pot/graph/cpu_patterns.py @@ -2,10 +2,24 @@ # SPDX-License-Identifier: Apache-2.0 from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \ - check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern + check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern, \ + get_softmax_reshape_transpose_gather_matmul_pattern def get_cpu_ignored_patterns(): + return { + 'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] + + [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()] + + [get_softmax_reshape_transpose_gather_matmul_pattern()], + 'activations': [get_clamp_mult_const_pattern()], + 'inputs': [] + } + + +# For CPU_SPR we should quantize self-attention block with +# FQ propagated before to Reshape to remove quantization overhead +# For details look at ticket: 97884 +def get_cpu_spr_ignored_patterns(): return { 'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] + [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()], diff --git a/tools/pot/openvino/tools/pot/graph/gna_patterns.py b/tools/pot/openvino/tools/pot/graph/gna_patterns.py index 6de9fc4187c..ffa8bc48ac1 100644 --- a/tools/pot/openvino/tools/pot/graph/gna_patterns.py +++ b/tools/pot/openvino/tools/pot/graph/gna_patterns.py @@ -1,12 +1,13 @@ # Copyright (C) 2020-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.tools.pot.graph.pattern_utils import get_assign_result_pattern +from openvino.tools.pot.graph.pattern_utils import get_assign_result_pattern, \ + get_softmax_reshape_transpose_gather_matmul_pattern def get_gna_ignored_patterns(): return { - 'blocks': [get_assign_result_pattern()], + 'blocks': [get_assign_result_pattern(), get_softmax_reshape_transpose_gather_matmul_pattern()], 'activations': [], 'inputs': [] } @@ -14,7 +15,7 @@ def get_gna_ignored_patterns(): def get_gna3_ignored_patterns(): return { - 'blocks': [get_assign_result_pattern()], + 'blocks': [get_assign_result_pattern(), get_softmax_reshape_transpose_gather_matmul_pattern()], 'activations': [], 'inputs': [] } diff --git a/tools/pot/openvino/tools/pot/graph/gpu_patterns.py b/tools/pot/openvino/tools/pot/graph/gpu_patterns.py index 12cc8345dd7..d59bf1b7846 100644 --- a/tools/pot/openvino/tools/pot/graph/gpu_patterns.py +++ b/tools/pot/openvino/tools/pot/graph/gpu_patterns.py @@ -2,13 +2,15 @@ # SPDX-License-Identifier: Apache-2.0 from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \ - check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern + check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern, \ + get_softmax_reshape_transpose_gather_matmul_pattern def get_gpu_ignored_patterns(): return { 'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] + - [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()], + [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()] + + [get_softmax_reshape_transpose_gather_matmul_pattern()], 'activations': [get_clamp_mult_const_pattern()], 'inputs': [] } diff --git a/tools/pot/openvino/tools/pot/graph/model_utils.py b/tools/pot/openvino/tools/pot/graph/model_utils.py index de4d1a3ac5e..2d1b660e9a0 100644 --- a/tools/pot/openvino/tools/pot/graph/model_utils.py +++ b/tools/pot/openvino/tools/pot/graph/model_utils.py @@ -102,7 +102,7 @@ def get_all_operation_nodes(model: CompressedModel, recursively: bool = True): def build_model_for_node(nx_model, input_name, input_shape, node, remove_bias=False, remove_fake_quantize=False, target_device='ANY'): """ Build Model containing Subgraph of CompressedModel (input - node - output). - The Convolution, FullyConnected node types are supported. + The Convolution, MatMul node types are supported. :param nx_model: CompressedModel model :param input_name: name of the input node in the generated graph :param input_shape: shape of the input node in the generated graph diff --git a/tools/pot/openvino/tools/pot/graph/passes.py b/tools/pot/openvino/tools/pot/graph/passes.py index fc9ed6ec392..89149e739d2 100644 --- a/tools/pot/openvino/tools/pot/graph/passes.py +++ b/tools/pot/openvino/tools/pot/graph/passes.py @@ -576,10 +576,10 @@ class RemoveFakeQuantize: check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node]) for op in get_nodes_by_type(graph, ['Add']): if not nu.check_const_input(op): - inputs_node = np.array(get_node_inputs(op)) + inputs_node = get_node_inputs(op) count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node]) indices = count_outputs_node.argsort()[::-1] - inputs_node = inputs_node[indices] + inputs_node = [inputs_node[idx] for idx in indices] if check_is_inputs_fq(inputs_node): delete_one_fq(inputs_node) @@ -831,7 +831,7 @@ def create_fake_quantize_node(graph: Graph, name, data_type=np.float32, **kwargs def insert_fake_quantize(graph, node, ports=None, names=None, fq_types=None, hw_config=None, input_priority_types=[]): - blobs_as_inputs_nodes_type = ['Convolution', 'Deconvolution', 'MatMul'] + blobs_as_inputs_nodes_type = ['Convolution', 'ConvolutionBackpropData', 'MatMul'] gru_node_types = ['GRUCell', 'GRUSequence'] port_name = None diff --git a/tools/pot/openvino/tools/pot/graph/pattern_utils.py b/tools/pot/openvino/tools/pot/graph/pattern_utils.py index e21e1b8cc57..ec624dfb3a7 100644 --- a/tools/pot/openvino/tools/pot/graph/pattern_utils.py +++ b/tools/pot/openvino/tools/pot/graph/pattern_utils.py @@ -82,3 +82,18 @@ def get_fq_result_pattern(): pattern.append_single_op('FakeQuantize', 'fq') pattern.append_single_op('Result', 'result') return pattern.set_name('fq_result').pattern + + +# Self-attention block in vision transformers (Swin, Twins, ViTPose) +def get_softmax_reshape_transpose_gather_matmul_pattern(): + pattern = PatternBuilder() + pattern_2 = PatternBuilder() + softmax_out = pattern.append_single_op('SoftMax', 'softmax').get_last_node() + pattern_2.append_single_op('Add', 'add').get_last_node() + pattern_2.append_op_const('Reshape', 'reshape') + pattern_2.append_single_op('Transpose', 'transpose').get_last_node() + gather_out = pattern_2.append_single_op('Gather', 'gather').get_last_node() + pattern.pattern['nodes'] += pattern_2.pattern['nodes'] + pattern.pattern['edges'] += pattern_2.pattern['edges'] + pattern.insert_single_op([softmax_out, gather_out], None, 'MatMul', 'matmul') + return pattern.set_name('softmax_reshape_transpose_gather_matmul').pattern diff --git a/tools/pot/openvino/tools/pot/graph/special_patterns.py b/tools/pot/openvino/tools/pot/graph/special_patterns.py index d0810a99622..91313bddb33 100644 --- a/tools/pot/openvino/tools/pot/graph/special_patterns.py +++ b/tools/pot/openvino/tools/pot/graph/special_patterns.py @@ -308,21 +308,6 @@ def create_stable_diffusion_pattern(): return pattern.set_name('stable_diffusion').pattern -@registry_ignore_patterns('blocks') -def create_softmax_reshape_transpose_gather_matmul_pattern(): - pattern = PatternBuilder() - pattern_2 = PatternBuilder() - softmax_out = pattern.append_single_op('SoftMax', 'softmax').get_last_node() - pattern_2.append_single_op('Add', 'add').get_last_node() - pattern_2.append_op_const('Reshape', 'reshape') - pattern_2.append_single_op('Transpose', 'transpose').get_last_node() - gather_out = pattern_2.append_single_op('Gather', 'gather').get_last_node() - pattern.pattern['nodes'] += pattern_2.pattern['nodes'] - pattern.pattern['edges'] += pattern_2.pattern['edges'] - pattern.insert_single_op([softmax_out, gather_out], None, 'MatMul', 'matmul') - return pattern.set_name('softmax_reshape_transpose_gather_matmul').pattern - - @registry_ignore_patterns('blocks') def create_hswish_without_denominator_pattern(): pattern = PatternBuilder() diff --git a/tools/pot/openvino/tools/pot/graph/utils.py b/tools/pot/openvino/tools/pot/graph/utils.py index a1dc38b4e9e..30844760d58 100644 --- a/tools/pot/openvino/tools/pot/graph/utils.py +++ b/tools/pot/openvino/tools/pot/graph/utils.py @@ -8,7 +8,7 @@ import json import numpy as np from openvino.tools.pot.version import get_version -from .cpu_patterns import get_cpu_ignored_patterns +from .cpu_patterns import get_cpu_ignored_patterns, get_cpu_spr_ignored_patterns from .gpu_patterns import get_gpu_ignored_patterns from .vpu_patterns import get_vpu_ignored_patterns from .gna_patterns import get_gna_ignored_patterns, get_gna3_ignored_patterns @@ -16,13 +16,14 @@ from .special_operations import QUANTIZE_AGNOSTIC_OPERATIONS from .node_utils import get_all_node_outputs, get_input_shape HARDWARE_AWARE_IGNORED_PATTERNS = { + 'ANY': get_cpu_ignored_patterns(), 'CPU': get_cpu_ignored_patterns(), 'GPU': get_gpu_ignored_patterns(), 'VPU': get_vpu_ignored_patterns(), 'GNA': get_gna_ignored_patterns(), 'GNA3': get_gna3_ignored_patterns(), 'GNA3.5': get_gna3_ignored_patterns(), - 'CPU_SPR': get_cpu_ignored_patterns() + 'CPU_SPR': get_cpu_spr_ignored_patterns() } DEFAULT_PATH = 'PATH' diff --git a/tools/pot/openvino/tools/pot/graph/vpu_patterns.py b/tools/pot/openvino/tools/pot/graph/vpu_patterns.py index 0770876736d..5da9861a34e 100644 --- a/tools/pot/openvino/tools/pot/graph/vpu_patterns.py +++ b/tools/pot/openvino/tools/pot/graph/vpu_patterns.py @@ -1,11 +1,12 @@ # Copyright (C) 2020-2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern +from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern, \ + get_softmax_reshape_transpose_gather_matmul_pattern def get_vpu_ignored_patterns(): return { - 'blocks': [], + 'blocks': [get_softmax_reshape_transpose_gather_matmul_pattern()], 'activations': [get_clamp_mult_const_pattern()], 'inputs': [] } diff --git a/tools/pot/tests/data/reference_models/resnet_example_pytorch.xml b/tools/pot/tests/data/reference_models/resnet_example_pytorch.xml index fc89d9dfdf2..8cf6afcdcfa 100644 --- a/tools/pot/tests/data/reference_models/resnet_example_pytorch.xml +++ b/tools/pot/tests/data/reference_models/resnet_example_pytorch.xml @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ef7c4f5fdfc04ec0b5d0091310682cc9ff1b9f3ebabdfb9f5c33f056bb7adcec -size 121728 +oid sha256:e314f836c4d7a148e25835b3cda0ffa7c69417ccdf9cf418e200cff3a5fed84f +size 107188 diff --git a/tools/pot/tests/test_graph.py b/tools/pot/tests/test_graph.py index beac6494d7e..09ebc7a2d27 100755 --- a/tools/pot/tests/test_graph.py +++ b/tools/pot/tests/test_graph.py @@ -19,7 +19,6 @@ GNA_CONFIG_PATH = HARDWARE_CONFIG_PATH / 'gna.json' TEST_MODELS = [ ('mobilenetv2_example', 'pytorch', 'ANY'), - ('resnet_example', 'pytorch', 'ANY'), ('googlenet_example', 'pytorch', 'ANY'), ('mobilenetv2_ssd_example', 'pytorch', 'ANY'), ('densenet121_example', 'pytorch', 'ANY'), @@ -53,9 +52,9 @@ def test_build_quantization_graph(tmp_path, models, model_name, model_framework, model = load_model(model.model_params, target_device=target_device) if target_device == 'GNA': - hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix()) + hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix(), target_device) else: - hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix()) + hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix(), target_device) quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model)