[POT] Optimize quantization scheme for SPR/ATS (#9564)
* feat: delete fq from branch * delete transformation * pylint * delete gpu_ats * add tests * pylint
This commit is contained in:
@@ -18,7 +18,8 @@ __HARDWARE_CONFIGS_MAP = {'ANY': 'cpu.json',
|
||||
'CPU': 'cpu.json',
|
||||
'GNA': 'gna.json',
|
||||
'GPU': 'cpu.json',
|
||||
'VPU': 'vpu.json'}
|
||||
'VPU': 'vpu.json',
|
||||
'CPU_SPR': 'cpu.json'}
|
||||
|
||||
|
||||
def load_hardware_config(config):
|
||||
|
||||
@@ -25,6 +25,7 @@ from openvino.tools.mo.middle.passes.convert_data_type import convert_blob
|
||||
|
||||
from . import editor as ge
|
||||
from . import node_utils as nu
|
||||
from .editor import get_nodes_by_type
|
||||
from .pattern_utils import get_fq_result_pattern
|
||||
from .special_operations import OPERATIONS_WITH_WEIGHTS, DETECTION_OUTPUT_FINAL_TYPES, SPLIT_OPERATIONS
|
||||
from .utils import find_operation_matches, is_ignored, get_hw_aware_ignored_patterns
|
||||
@@ -487,6 +488,53 @@ class RemoveFakeQuantize:
|
||||
if parent_node.type == 'Const':
|
||||
parent_node['need_shape_inference'] = True
|
||||
|
||||
def optimize_for_gp_hw(self, graph, target_device):
|
||||
"""
|
||||
Removing redundant FQs before operation Add for SPR(CPU) platform
|
||||
"""
|
||||
def _walk_for_branch(node):
|
||||
input_node = node
|
||||
delete_const = lambda node: ([op for op in node if op is not None and op.type != 'Const'])
|
||||
while True:
|
||||
input_node = get_node_inputs(input_node)
|
||||
input_node = delete_const(input_node)
|
||||
if len(input_node) > 1:
|
||||
return False
|
||||
input_node = input_node[0]
|
||||
if input_node.type in ['Convolution', 'GroupConvolution', 'MatMul']:
|
||||
return True
|
||||
|
||||
def _check_const_input(node):
|
||||
input_node = get_node_inputs(node)[0]
|
||||
return nu.check_const_input(input_node)
|
||||
|
||||
def delete_one_fq(inputs_node):
|
||||
fq_1, fq_2 = inputs_node
|
||||
if len(get_all_node_outputs(fq_1)) > 1 \
|
||||
and len(get_all_node_outputs(fq_2)) == 1 and _check_const_input(fq_2):
|
||||
self.disconnect_fq_node(fq_2)
|
||||
return
|
||||
if _walk_for_branch(fq_1) and _walk_for_branch(fq_2):
|
||||
if np.prod(nu.get_output_shape(fq_1, 0)) >= np.prod(nu.get_output_shape(fq_2, 0)):
|
||||
self.disconnect_fq_node(fq_1)
|
||||
else:
|
||||
self.disconnect_fq_node(fq_2)
|
||||
return
|
||||
|
||||
special_target_device = ['CPU_SPR']
|
||||
if target_device not in special_target_device:
|
||||
return
|
||||
|
||||
check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node])
|
||||
for op in get_nodes_by_type(graph, ['Add']):
|
||||
if not nu.check_const_input(op):
|
||||
inputs_node = np.array(get_node_inputs(op))
|
||||
count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node])
|
||||
indices = count_outputs_node.argsort()[::-1]
|
||||
inputs_node = inputs_node[indices]
|
||||
if check_is_inputs_fq(inputs_node):
|
||||
delete_one_fq(inputs_node)
|
||||
|
||||
@staticmethod
|
||||
def undo_bias_correction(conv_node):
|
||||
bias_node = nu.get_bias_for_node(conv_node)
|
||||
|
||||
@@ -65,6 +65,9 @@ class GraphTransformer:
|
||||
self.fq_name_swapper.rename_fqs_in_the_end(graph)
|
||||
graph.clean_up()
|
||||
|
||||
self.fq_removal.optimize_for_gp_hw(graph, self.target_device)
|
||||
graph.clean_up()
|
||||
|
||||
return graph
|
||||
|
||||
def insert_fake_quantize(self, model, ignored_params=None):
|
||||
|
||||
@@ -19,7 +19,8 @@ HARDWARE_AWARE_IGNORED_PATTERNS = {
|
||||
'CPU': get_cpu_ignored_patterns(),
|
||||
'GPU': get_gpu_ignored_patterns(),
|
||||
'VPU': get_vpu_ignored_patterns(),
|
||||
'GNA': get_gna_ignored_patterns()
|
||||
'GNA': get_gna_ignored_patterns(),
|
||||
'CPU_SPR': get_cpu_ignored_patterns()
|
||||
}
|
||||
|
||||
DEFAULT_PATH = 'PATH'
|
||||
|
||||
@@ -26,9 +26,11 @@ TEST_MODELS = [
|
||||
('multiple_out_ports_net', 'tf', 'ANY'),
|
||||
('lstm_example', 'pytorch', 'GNA'),
|
||||
('multiple_outputs_net_example', 'dldt', 'GNA'),
|
||||
# ('tensor_iterator_example', 'tf', 'ANY'),
|
||||
('resnet_example', 'pytorch', 'CPU_SPR'),
|
||||
#('tensor_iterator_example', 'tf', 'ANY'),
|
||||
]
|
||||
|
||||
|
||||
CASCADE_MAP = Dict({
|
||||
'mtcnn': {
|
||||
'model_names': ['mtcnn-p', 'mtcnn-r', 'mtcnn-o'],
|
||||
|
||||
Reference in New Issue
Block a user