[POT] Update CPU_SPR transformer quantization (#15241)

* [POT] Update CPU_SPR transformer quantization * Add comments
2023-01-24 11:56:22 +00:00 · 2023-01-24 11:56:22 +00:00 · 5fca707ebd
commit 5fca707ebd
parent 54ea2612ae
14 changed files with 58 additions and 38 deletions
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
@ -33,7 +33,7 @@ def load_hardware_config(config):
        raise ValueError('Unsupported target_device : {}'.format(config['target_device']))
    hardware_config_path = __HARDWARE_CONFIG_DIR / __HARDWARE_CONFIGS_MAP.get(config['target_device'], "cpu.json")
-    return HardwareConfig.from_json(hardware_config_path.as_posix())
+    return HardwareConfig.from_json(hardware_config_path.as_posix(), config['target_device'])
 def append_estimator_configs(quantization_configs, is_weights, config, opt_conf=None):
--- a/tools/pot/openvino/tools/pot/app/argparser.py
+++ b/tools/pot/openvino/tools/pot/app/argparser.py
@ -110,7 +110,7 @@ def get_common_argument_parser():
        '--keep-uncompressed-weights',
        action='store_true',
        default=False,
-        help='Keep Convolution, Deconvolution and FullyConnected weights uncompressed')
+        help='Keep Convolution, ConvolutionBackpropData and MatMul weights uncompressed')
    parser.add_argument(
        '--data-source',
--- a/tools/pot/openvino/tools/pot/configs/hardware_config.py
+++ b/tools/pot/openvino/tools/pot/configs/hardware_config.py
@ -33,11 +33,13 @@ class HardwareConfig(list):
        return config
    @classmethod
-    def from_json(cls, path):
+    def from_json(cls, path, target_device=None):
        with open(path) as f:
            json_config = json.load(f, object_pairs_hook=OrderedDict)
            if target_device is None:
                target_device = json_config['target_device']
            hw_config = cls()
-            hw_config.append(Dict(('target_device', json_config['target_device'])))
+            hw_config.append(Dict(('target_device', target_device)))
            hw_config.append(Dict(('primary_bitwidth', json_config.get('primary_bitwidth', 8)),
                                  ('input_priority_types', json_config.get('input_priority_types', []))))
--- a/tools/pot/openvino/tools/pot/graph/cpu_patterns.py
+++ b/tools/pot/openvino/tools/pot/graph/cpu_patterns.py
@ -2,10 +2,24 @@
 # SPDX-License-Identifier: Apache-2.0
 from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \
-    check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern
+    check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern, \
    get_softmax_reshape_transpose_gather_matmul_pattern
 def get_cpu_ignored_patterns():
    return {
        'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
                  [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()] +
                  [get_softmax_reshape_transpose_gather_matmul_pattern()],
        'activations': [get_clamp_mult_const_pattern()],
        'inputs': []
    }
 # For CPU_SPR we should quantize self-attention block with
 # FQ propagated before to Reshape to remove quantization overhead
 # For details look at ticket: 97884
 def get_cpu_spr_ignored_patterns():
    return {
        'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
                  [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()],
--- a/tools/pot/openvino/tools/pot/graph/gna_patterns.py
+++ b/tools/pot/openvino/tools/pot/graph/gna_patterns.py
@ -1,12 +1,13 @@
 # Copyright (C) 2020-2022 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-from openvino.tools.pot.graph.pattern_utils import get_assign_result_pattern
+from openvino.tools.pot.graph.pattern_utils import get_assign_result_pattern, \
    get_softmax_reshape_transpose_gather_matmul_pattern
 def get_gna_ignored_patterns():
    return {
-        'blocks': [get_assign_result_pattern()],
+        'blocks': [get_assign_result_pattern(), get_softmax_reshape_transpose_gather_matmul_pattern()],
        'activations': [],
        'inputs': []
    }
@ -14,7 +15,7 @@ def get_gna_ignored_patterns():
 def get_gna3_ignored_patterns():
    return {
-        'blocks': [get_assign_result_pattern()],
+        'blocks': [get_assign_result_pattern(), get_softmax_reshape_transpose_gather_matmul_pattern()],
        'activations': [],
        'inputs': []
    }
--- a/tools/pot/openvino/tools/pot/graph/gpu_patterns.py
+++ b/tools/pot/openvino/tools/pot/graph/gpu_patterns.py
@ -2,13 +2,15 @@
 # SPDX-License-Identifier: Apache-2.0
 from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \
-    check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern
+    check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern, \
    get_softmax_reshape_transpose_gather_matmul_pattern
 def get_gpu_ignored_patterns():
    return {
        'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
-                  [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()],
+                  [(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()] +
                  [get_softmax_reshape_transpose_gather_matmul_pattern()],
        'activations': [get_clamp_mult_const_pattern()],
        'inputs': []
    }
--- a/tools/pot/openvino/tools/pot/graph/model_utils.py
+++ b/tools/pot/openvino/tools/pot/graph/model_utils.py
@ -102,7 +102,7 @@ def get_all_operation_nodes(model: CompressedModel, recursively: bool = True):
 def build_model_for_node(nx_model, input_name, input_shape, node, remove_bias=False,
                         remove_fake_quantize=False, target_device='ANY'):
    """ Build Model containing Subgraph of CompressedModel (input - node - output).
-    The Convolution, FullyConnected node types are supported.
+    The Convolution, MatMul node types are supported.
    :param nx_model: CompressedModel model
    :param input_name: name of the input node in the generated graph
    :param input_shape: shape of the input node in the generated graph
--- a/tools/pot/openvino/tools/pot/graph/passes.py
+++ b/tools/pot/openvino/tools/pot/graph/passes.py
@ -576,10 +576,10 @@ class RemoveFakeQuantize:
        check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node])
        for op in get_nodes_by_type(graph, ['Add']):
            if not nu.check_const_input(op):
-                inputs_node = np.array(get_node_inputs(op))
+                inputs_node = get_node_inputs(op)
                count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node])
                indices = count_outputs_node.argsort()[::-1]
-                inputs_node = inputs_node[indices]
+                inputs_node = [inputs_node[idx] for idx in indices]
                if check_is_inputs_fq(inputs_node):
                    delete_one_fq(inputs_node)
@ -831,7 +831,7 @@ def create_fake_quantize_node(graph: Graph, name, data_type=np.float32, **kwargs
 def insert_fake_quantize(graph, node, ports=None, names=None, fq_types=None, hw_config=None, input_priority_types=[]):
-    blobs_as_inputs_nodes_type = ['Convolution', 'Deconvolution', 'MatMul']
+    blobs_as_inputs_nodes_type = ['Convolution', 'ConvolutionBackpropData', 'MatMul']
    gru_node_types = ['GRUCell', 'GRUSequence']
    port_name = None
--- a/tools/pot/openvino/tools/pot/graph/pattern_utils.py
+++ b/tools/pot/openvino/tools/pot/graph/pattern_utils.py
@ -82,3 +82,18 @@ def get_fq_result_pattern():
    pattern.append_single_op('FakeQuantize', 'fq')
    pattern.append_single_op('Result', 'result')
    return pattern.set_name('fq_result').pattern
 # Self-attention block in vision transformers (Swin, Twins, ViTPose)
 def get_softmax_reshape_transpose_gather_matmul_pattern():
    pattern = PatternBuilder()
    pattern_2 = PatternBuilder()
    softmax_out = pattern.append_single_op('SoftMax', 'softmax').get_last_node()
    pattern_2.append_single_op('Add', 'add').get_last_node()
    pattern_2.append_op_const('Reshape', 'reshape')
    pattern_2.append_single_op('Transpose', 'transpose').get_last_node()
    gather_out = pattern_2.append_single_op('Gather', 'gather').get_last_node()
    pattern.pattern['nodes'] += pattern_2.pattern['nodes']
    pattern.pattern['edges'] += pattern_2.pattern['edges']
    pattern.insert_single_op([softmax_out, gather_out], None, 'MatMul', 'matmul')
    return pattern.set_name('softmax_reshape_transpose_gather_matmul').pattern
--- a/tools/pot/openvino/tools/pot/graph/special_patterns.py
+++ b/tools/pot/openvino/tools/pot/graph/special_patterns.py
@ -308,21 +308,6 @@ def create_stable_diffusion_pattern():
    return pattern.set_name('stable_diffusion').pattern
@registry_ignore_patterns('blocks')
 def create_softmax_reshape_transpose_gather_matmul_pattern():
    pattern = PatternBuilder()
    pattern_2 = PatternBuilder()
    softmax_out = pattern.append_single_op('SoftMax', 'softmax').get_last_node()
    pattern_2.append_single_op('Add', 'add').get_last_node()
    pattern_2.append_op_const('Reshape', 'reshape')
    pattern_2.append_single_op('Transpose', 'transpose').get_last_node()
    gather_out = pattern_2.append_single_op('Gather', 'gather').get_last_node()
    pattern.pattern['nodes'] += pattern_2.pattern['nodes']
    pattern.pattern['edges'] += pattern_2.pattern['edges']
    pattern.insert_single_op([softmax_out, gather_out], None, 'MatMul', 'matmul')
    return pattern.set_name('softmax_reshape_transpose_gather_matmul').pattern
@registry_ignore_patterns('blocks')
 def create_hswish_without_denominator_pattern():
    pattern = PatternBuilder()
--- a/tools/pot/openvino/tools/pot/graph/utils.py
+++ b/tools/pot/openvino/tools/pot/graph/utils.py
@ -8,7 +8,7 @@ import json
 import numpy as np
 from openvino.tools.pot.version import get_version
-from .cpu_patterns import get_cpu_ignored_patterns
+from .cpu_patterns import get_cpu_ignored_patterns, get_cpu_spr_ignored_patterns
 from .gpu_patterns import get_gpu_ignored_patterns
 from .vpu_patterns import get_vpu_ignored_patterns
 from .gna_patterns import get_gna_ignored_patterns, get_gna3_ignored_patterns
@ -16,13 +16,14 @@ from .special_operations import QUANTIZE_AGNOSTIC_OPERATIONS
 from .node_utils import get_all_node_outputs, get_input_shape
 HARDWARE_AWARE_IGNORED_PATTERNS = {
    'ANY': get_cpu_ignored_patterns(),
    'CPU': get_cpu_ignored_patterns(),
    'GPU': get_gpu_ignored_patterns(),
    'VPU': get_vpu_ignored_patterns(),
    'GNA': get_gna_ignored_patterns(),
    'GNA3': get_gna3_ignored_patterns(),
    'GNA3.5': get_gna3_ignored_patterns(),
-    'CPU_SPR': get_cpu_ignored_patterns()
+    'CPU_SPR': get_cpu_spr_ignored_patterns()
 }
 DEFAULT_PATH = 'PATH'
--- a/tools/pot/openvino/tools/pot/graph/vpu_patterns.py
+++ b/tools/pot/openvino/tools/pot/graph/vpu_patterns.py
@ -1,11 +1,12 @@
 # Copyright (C) 2020-2022 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern
+from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern, \
    get_softmax_reshape_transpose_gather_matmul_pattern
 def get_vpu_ignored_patterns():
    return {
-        'blocks': [],
+        'blocks': [get_softmax_reshape_transpose_gather_matmul_pattern()],
        'activations': [get_clamp_mult_const_pattern()],
        'inputs': []
    }
--- a/tools/pot/tests/data/reference_models/resnet_example_pytorch.xml
+++ b/tools/pot/tests/data/reference_models/resnet_example_pytorch.xml
@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef7c4f5fdfc04ec0b5d0091310682cc9ff1b9f3ebabdfb9f5c33f056bb7adcec
+oid sha256:e314f836c4d7a148e25835b3cda0ffa7c69417ccdf9cf418e200cff3a5fed84f
-size 121728
+size 107188
--- a/tools/pot/tests/test_graph.py
+++ b/tools/pot/tests/test_graph.py
@ -19,7 +19,6 @@ GNA_CONFIG_PATH = HARDWARE_CONFIG_PATH / 'gna.json'
 TEST_MODELS = [
    ('mobilenetv2_example', 'pytorch', 'ANY'),
    ('resnet_example', 'pytorch', 'ANY'),
    ('googlenet_example', 'pytorch', 'ANY'),
    ('mobilenetv2_ssd_example', 'pytorch', 'ANY'),
    ('densenet121_example', 'pytorch', 'ANY'),
@ -53,9 +52,9 @@ def test_build_quantization_graph(tmp_path, models, model_name, model_framework,
    model = load_model(model.model_params, target_device=target_device)
    if target_device == 'GNA':
-        hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix())
+        hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix(), target_device)
    else:
-        hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix())
+        hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix(), target_device)
    quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model)