[POT] Update CPU_SPR transformer quantization (#15241)
* [POT] Update CPU_SPR transformer quantization * Add comments
This commit is contained in:
parent
54ea2612ae
commit
5fca707ebd
@ -33,7 +33,7 @@ def load_hardware_config(config):
|
|||||||
raise ValueError('Unsupported target_device : {}'.format(config['target_device']))
|
raise ValueError('Unsupported target_device : {}'.format(config['target_device']))
|
||||||
|
|
||||||
hardware_config_path = __HARDWARE_CONFIG_DIR / __HARDWARE_CONFIGS_MAP.get(config['target_device'], "cpu.json")
|
hardware_config_path = __HARDWARE_CONFIG_DIR / __HARDWARE_CONFIGS_MAP.get(config['target_device'], "cpu.json")
|
||||||
return HardwareConfig.from_json(hardware_config_path.as_posix())
|
return HardwareConfig.from_json(hardware_config_path.as_posix(), config['target_device'])
|
||||||
|
|
||||||
|
|
||||||
def append_estimator_configs(quantization_configs, is_weights, config, opt_conf=None):
|
def append_estimator_configs(quantization_configs, is_weights, config, opt_conf=None):
|
||||||
|
@ -110,7 +110,7 @@ def get_common_argument_parser():
|
|||||||
'--keep-uncompressed-weights',
|
'--keep-uncompressed-weights',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
default=False,
|
default=False,
|
||||||
help='Keep Convolution, Deconvolution and FullyConnected weights uncompressed')
|
help='Keep Convolution, ConvolutionBackpropData and MatMul weights uncompressed')
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--data-source',
|
'--data-source',
|
||||||
|
@ -33,11 +33,13 @@ class HardwareConfig(list):
|
|||||||
return config
|
return config
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_json(cls, path):
|
def from_json(cls, path, target_device=None):
|
||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
json_config = json.load(f, object_pairs_hook=OrderedDict)
|
json_config = json.load(f, object_pairs_hook=OrderedDict)
|
||||||
|
if target_device is None:
|
||||||
|
target_device = json_config['target_device']
|
||||||
hw_config = cls()
|
hw_config = cls()
|
||||||
hw_config.append(Dict(('target_device', json_config['target_device'])))
|
hw_config.append(Dict(('target_device', target_device)))
|
||||||
hw_config.append(Dict(('primary_bitwidth', json_config.get('primary_bitwidth', 8)),
|
hw_config.append(Dict(('primary_bitwidth', json_config.get('primary_bitwidth', 8)),
|
||||||
('input_priority_types', json_config.get('input_priority_types', []))))
|
('input_priority_types', json_config.get('input_priority_types', []))))
|
||||||
|
|
||||||
|
@ -2,10 +2,24 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \
|
from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \
|
||||||
check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern
|
check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern, \
|
||||||
|
get_softmax_reshape_transpose_gather_matmul_pattern
|
||||||
|
|
||||||
|
|
||||||
def get_cpu_ignored_patterns():
|
def get_cpu_ignored_patterns():
|
||||||
|
return {
|
||||||
|
'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
|
||||||
|
[(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()] +
|
||||||
|
[get_softmax_reshape_transpose_gather_matmul_pattern()],
|
||||||
|
'activations': [get_clamp_mult_const_pattern()],
|
||||||
|
'inputs': []
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# For CPU_SPR we should quantize self-attention block with
|
||||||
|
# FQ propagated before to Reshape to remove quantization overhead
|
||||||
|
# For details look at ticket: 97884
|
||||||
|
def get_cpu_spr_ignored_patterns():
|
||||||
return {
|
return {
|
||||||
'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
|
'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
|
||||||
[(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()],
|
[(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()],
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
# Copyright (C) 2020-2022 Intel Corporation
|
# Copyright (C) 2020-2022 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
from openvino.tools.pot.graph.pattern_utils import get_assign_result_pattern
|
from openvino.tools.pot.graph.pattern_utils import get_assign_result_pattern, \
|
||||||
|
get_softmax_reshape_transpose_gather_matmul_pattern
|
||||||
|
|
||||||
|
|
||||||
def get_gna_ignored_patterns():
|
def get_gna_ignored_patterns():
|
||||||
return {
|
return {
|
||||||
'blocks': [get_assign_result_pattern()],
|
'blocks': [get_assign_result_pattern(), get_softmax_reshape_transpose_gather_matmul_pattern()],
|
||||||
'activations': [],
|
'activations': [],
|
||||||
'inputs': []
|
'inputs': []
|
||||||
}
|
}
|
||||||
@ -14,7 +15,7 @@ def get_gna_ignored_patterns():
|
|||||||
|
|
||||||
def get_gna3_ignored_patterns():
|
def get_gna3_ignored_patterns():
|
||||||
return {
|
return {
|
||||||
'blocks': [get_assign_result_pattern()],
|
'blocks': [get_assign_result_pattern(), get_softmax_reshape_transpose_gather_matmul_pattern()],
|
||||||
'activations': [],
|
'activations': [],
|
||||||
'inputs': []
|
'inputs': []
|
||||||
}
|
}
|
||||||
|
@ -2,13 +2,15 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \
|
from .pattern_utils import check_fused_scale_shift_patterns, get_fused_scale_shift_patterns, \
|
||||||
check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern
|
check_fused_op_const_patterns, get_fused_op_const_pattern, get_clamp_mult_const_pattern, \
|
||||||
|
get_softmax_reshape_transpose_gather_matmul_pattern
|
||||||
|
|
||||||
|
|
||||||
def get_gpu_ignored_patterns():
|
def get_gpu_ignored_patterns():
|
||||||
return {
|
return {
|
||||||
'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
|
'blocks': [(pattern, check_fused_scale_shift_patterns) for pattern in get_fused_scale_shift_patterns()] +
|
||||||
[(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()],
|
[(pattern, check_fused_op_const_patterns) for pattern in get_fused_op_const_pattern()] +
|
||||||
|
[get_softmax_reshape_transpose_gather_matmul_pattern()],
|
||||||
'activations': [get_clamp_mult_const_pattern()],
|
'activations': [get_clamp_mult_const_pattern()],
|
||||||
'inputs': []
|
'inputs': []
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,7 @@ def get_all_operation_nodes(model: CompressedModel, recursively: bool = True):
|
|||||||
def build_model_for_node(nx_model, input_name, input_shape, node, remove_bias=False,
|
def build_model_for_node(nx_model, input_name, input_shape, node, remove_bias=False,
|
||||||
remove_fake_quantize=False, target_device='ANY'):
|
remove_fake_quantize=False, target_device='ANY'):
|
||||||
""" Build Model containing Subgraph of CompressedModel (input - node - output).
|
""" Build Model containing Subgraph of CompressedModel (input - node - output).
|
||||||
The Convolution, FullyConnected node types are supported.
|
The Convolution, MatMul node types are supported.
|
||||||
:param nx_model: CompressedModel model
|
:param nx_model: CompressedModel model
|
||||||
:param input_name: name of the input node in the generated graph
|
:param input_name: name of the input node in the generated graph
|
||||||
:param input_shape: shape of the input node in the generated graph
|
:param input_shape: shape of the input node in the generated graph
|
||||||
|
@ -576,10 +576,10 @@ class RemoveFakeQuantize:
|
|||||||
check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node])
|
check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node])
|
||||||
for op in get_nodes_by_type(graph, ['Add']):
|
for op in get_nodes_by_type(graph, ['Add']):
|
||||||
if not nu.check_const_input(op):
|
if not nu.check_const_input(op):
|
||||||
inputs_node = np.array(get_node_inputs(op))
|
inputs_node = get_node_inputs(op)
|
||||||
count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node])
|
count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node])
|
||||||
indices = count_outputs_node.argsort()[::-1]
|
indices = count_outputs_node.argsort()[::-1]
|
||||||
inputs_node = inputs_node[indices]
|
inputs_node = [inputs_node[idx] for idx in indices]
|
||||||
if check_is_inputs_fq(inputs_node):
|
if check_is_inputs_fq(inputs_node):
|
||||||
delete_one_fq(inputs_node)
|
delete_one_fq(inputs_node)
|
||||||
|
|
||||||
@ -831,7 +831,7 @@ def create_fake_quantize_node(graph: Graph, name, data_type=np.float32, **kwargs
|
|||||||
|
|
||||||
|
|
||||||
def insert_fake_quantize(graph, node, ports=None, names=None, fq_types=None, hw_config=None, input_priority_types=[]):
|
def insert_fake_quantize(graph, node, ports=None, names=None, fq_types=None, hw_config=None, input_priority_types=[]):
|
||||||
blobs_as_inputs_nodes_type = ['Convolution', 'Deconvolution', 'MatMul']
|
blobs_as_inputs_nodes_type = ['Convolution', 'ConvolutionBackpropData', 'MatMul']
|
||||||
gru_node_types = ['GRUCell', 'GRUSequence']
|
gru_node_types = ['GRUCell', 'GRUSequence']
|
||||||
|
|
||||||
port_name = None
|
port_name = None
|
||||||
|
@ -82,3 +82,18 @@ def get_fq_result_pattern():
|
|||||||
pattern.append_single_op('FakeQuantize', 'fq')
|
pattern.append_single_op('FakeQuantize', 'fq')
|
||||||
pattern.append_single_op('Result', 'result')
|
pattern.append_single_op('Result', 'result')
|
||||||
return pattern.set_name('fq_result').pattern
|
return pattern.set_name('fq_result').pattern
|
||||||
|
|
||||||
|
|
||||||
|
# Self-attention block in vision transformers (Swin, Twins, ViTPose)
|
||||||
|
def get_softmax_reshape_transpose_gather_matmul_pattern():
|
||||||
|
pattern = PatternBuilder()
|
||||||
|
pattern_2 = PatternBuilder()
|
||||||
|
softmax_out = pattern.append_single_op('SoftMax', 'softmax').get_last_node()
|
||||||
|
pattern_2.append_single_op('Add', 'add').get_last_node()
|
||||||
|
pattern_2.append_op_const('Reshape', 'reshape')
|
||||||
|
pattern_2.append_single_op('Transpose', 'transpose').get_last_node()
|
||||||
|
gather_out = pattern_2.append_single_op('Gather', 'gather').get_last_node()
|
||||||
|
pattern.pattern['nodes'] += pattern_2.pattern['nodes']
|
||||||
|
pattern.pattern['edges'] += pattern_2.pattern['edges']
|
||||||
|
pattern.insert_single_op([softmax_out, gather_out], None, 'MatMul', 'matmul')
|
||||||
|
return pattern.set_name('softmax_reshape_transpose_gather_matmul').pattern
|
||||||
|
@ -308,21 +308,6 @@ def create_stable_diffusion_pattern():
|
|||||||
return pattern.set_name('stable_diffusion').pattern
|
return pattern.set_name('stable_diffusion').pattern
|
||||||
|
|
||||||
|
|
||||||
@registry_ignore_patterns('blocks')
|
|
||||||
def create_softmax_reshape_transpose_gather_matmul_pattern():
|
|
||||||
pattern = PatternBuilder()
|
|
||||||
pattern_2 = PatternBuilder()
|
|
||||||
softmax_out = pattern.append_single_op('SoftMax', 'softmax').get_last_node()
|
|
||||||
pattern_2.append_single_op('Add', 'add').get_last_node()
|
|
||||||
pattern_2.append_op_const('Reshape', 'reshape')
|
|
||||||
pattern_2.append_single_op('Transpose', 'transpose').get_last_node()
|
|
||||||
gather_out = pattern_2.append_single_op('Gather', 'gather').get_last_node()
|
|
||||||
pattern.pattern['nodes'] += pattern_2.pattern['nodes']
|
|
||||||
pattern.pattern['edges'] += pattern_2.pattern['edges']
|
|
||||||
pattern.insert_single_op([softmax_out, gather_out], None, 'MatMul', 'matmul')
|
|
||||||
return pattern.set_name('softmax_reshape_transpose_gather_matmul').pattern
|
|
||||||
|
|
||||||
|
|
||||||
@registry_ignore_patterns('blocks')
|
@registry_ignore_patterns('blocks')
|
||||||
def create_hswish_without_denominator_pattern():
|
def create_hswish_without_denominator_pattern():
|
||||||
pattern = PatternBuilder()
|
pattern = PatternBuilder()
|
||||||
|
@ -8,7 +8,7 @@ import json
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from openvino.tools.pot.version import get_version
|
from openvino.tools.pot.version import get_version
|
||||||
from .cpu_patterns import get_cpu_ignored_patterns
|
from .cpu_patterns import get_cpu_ignored_patterns, get_cpu_spr_ignored_patterns
|
||||||
from .gpu_patterns import get_gpu_ignored_patterns
|
from .gpu_patterns import get_gpu_ignored_patterns
|
||||||
from .vpu_patterns import get_vpu_ignored_patterns
|
from .vpu_patterns import get_vpu_ignored_patterns
|
||||||
from .gna_patterns import get_gna_ignored_patterns, get_gna3_ignored_patterns
|
from .gna_patterns import get_gna_ignored_patterns, get_gna3_ignored_patterns
|
||||||
@ -16,13 +16,14 @@ from .special_operations import QUANTIZE_AGNOSTIC_OPERATIONS
|
|||||||
from .node_utils import get_all_node_outputs, get_input_shape
|
from .node_utils import get_all_node_outputs, get_input_shape
|
||||||
|
|
||||||
HARDWARE_AWARE_IGNORED_PATTERNS = {
|
HARDWARE_AWARE_IGNORED_PATTERNS = {
|
||||||
|
'ANY': get_cpu_ignored_patterns(),
|
||||||
'CPU': get_cpu_ignored_patterns(),
|
'CPU': get_cpu_ignored_patterns(),
|
||||||
'GPU': get_gpu_ignored_patterns(),
|
'GPU': get_gpu_ignored_patterns(),
|
||||||
'VPU': get_vpu_ignored_patterns(),
|
'VPU': get_vpu_ignored_patterns(),
|
||||||
'GNA': get_gna_ignored_patterns(),
|
'GNA': get_gna_ignored_patterns(),
|
||||||
'GNA3': get_gna3_ignored_patterns(),
|
'GNA3': get_gna3_ignored_patterns(),
|
||||||
'GNA3.5': get_gna3_ignored_patterns(),
|
'GNA3.5': get_gna3_ignored_patterns(),
|
||||||
'CPU_SPR': get_cpu_ignored_patterns()
|
'CPU_SPR': get_cpu_spr_ignored_patterns()
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFAULT_PATH = 'PATH'
|
DEFAULT_PATH = 'PATH'
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
# Copyright (C) 2020-2022 Intel Corporation
|
# Copyright (C) 2020-2022 Intel Corporation
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern
|
from openvino.tools.pot.graph.pattern_utils import get_clamp_mult_const_pattern, \
|
||||||
|
get_softmax_reshape_transpose_gather_matmul_pattern
|
||||||
|
|
||||||
def get_vpu_ignored_patterns():
|
def get_vpu_ignored_patterns():
|
||||||
return {
|
return {
|
||||||
'blocks': [],
|
'blocks': [get_softmax_reshape_transpose_gather_matmul_pattern()],
|
||||||
'activations': [get_clamp_mult_const_pattern()],
|
'activations': [get_clamp_mult_const_pattern()],
|
||||||
'inputs': []
|
'inputs': []
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
version https://git-lfs.github.com/spec/v1
|
version https://git-lfs.github.com/spec/v1
|
||||||
oid sha256:ef7c4f5fdfc04ec0b5d0091310682cc9ff1b9f3ebabdfb9f5c33f056bb7adcec
|
oid sha256:e314f836c4d7a148e25835b3cda0ffa7c69417ccdf9cf418e200cff3a5fed84f
|
||||||
size 121728
|
size 107188
|
||||||
|
@ -19,7 +19,6 @@ GNA_CONFIG_PATH = HARDWARE_CONFIG_PATH / 'gna.json'
|
|||||||
|
|
||||||
TEST_MODELS = [
|
TEST_MODELS = [
|
||||||
('mobilenetv2_example', 'pytorch', 'ANY'),
|
('mobilenetv2_example', 'pytorch', 'ANY'),
|
||||||
('resnet_example', 'pytorch', 'ANY'),
|
|
||||||
('googlenet_example', 'pytorch', 'ANY'),
|
('googlenet_example', 'pytorch', 'ANY'),
|
||||||
('mobilenetv2_ssd_example', 'pytorch', 'ANY'),
|
('mobilenetv2_ssd_example', 'pytorch', 'ANY'),
|
||||||
('densenet121_example', 'pytorch', 'ANY'),
|
('densenet121_example', 'pytorch', 'ANY'),
|
||||||
@ -53,9 +52,9 @@ def test_build_quantization_graph(tmp_path, models, model_name, model_framework,
|
|||||||
model = load_model(model.model_params, target_device=target_device)
|
model = load_model(model.model_params, target_device=target_device)
|
||||||
|
|
||||||
if target_device == 'GNA':
|
if target_device == 'GNA':
|
||||||
hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix())
|
hardware_config = HardwareConfig.from_json(GNA_CONFIG_PATH.as_posix(), target_device)
|
||||||
else:
|
else:
|
||||||
hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix())
|
hardware_config = HardwareConfig.from_json(CPU_CONFIG_PATH.as_posix(), target_device)
|
||||||
|
|
||||||
quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model)
|
quantization_model = GraphTransformer(hardware_config).insert_fake_quantize(model)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user