[POT] Optimize quantization scheme for SPR/ATS (#9564)

* feat: delete fq from branch

* delete transformation

* pylint

* delete gpu_ats

* add tests

* pylint
This commit is contained in:
Indira Salyahova
2022-01-25 17:37:45 +03:00
committed by GitHub
parent 6927547f9c
commit 0ecbe50bf5
5 changed files with 58 additions and 3 deletions

View File

@@ -18,7 +18,8 @@ __HARDWARE_CONFIGS_MAP = {'ANY': 'cpu.json',
'CPU': 'cpu.json',
'GNA': 'gna.json',
'GPU': 'cpu.json',
'VPU': 'vpu.json'}
'VPU': 'vpu.json',
'CPU_SPR': 'cpu.json'}
def load_hardware_config(config):

View File

@@ -25,6 +25,7 @@ from openvino.tools.mo.middle.passes.convert_data_type import convert_blob
from . import editor as ge
from . import node_utils as nu
from .editor import get_nodes_by_type
from .pattern_utils import get_fq_result_pattern
from .special_operations import OPERATIONS_WITH_WEIGHTS, DETECTION_OUTPUT_FINAL_TYPES, SPLIT_OPERATIONS
from .utils import find_operation_matches, is_ignored, get_hw_aware_ignored_patterns
@@ -487,6 +488,53 @@ class RemoveFakeQuantize:
if parent_node.type == 'Const':
parent_node['need_shape_inference'] = True
def optimize_for_gp_hw(self, graph, target_device):
"""
Removing redundant FQs before operation Add for SPR(CPU) platform
"""
def _walk_for_branch(node):
input_node = node
delete_const = lambda node: ([op for op in node if op is not None and op.type != 'Const'])
while True:
input_node = get_node_inputs(input_node)
input_node = delete_const(input_node)
if len(input_node) > 1:
return False
input_node = input_node[0]
if input_node.type in ['Convolution', 'GroupConvolution', 'MatMul']:
return True
def _check_const_input(node):
input_node = get_node_inputs(node)[0]
return nu.check_const_input(input_node)
def delete_one_fq(inputs_node):
fq_1, fq_2 = inputs_node
if len(get_all_node_outputs(fq_1)) > 1 \
and len(get_all_node_outputs(fq_2)) == 1 and _check_const_input(fq_2):
self.disconnect_fq_node(fq_2)
return
if _walk_for_branch(fq_1) and _walk_for_branch(fq_2):
if np.prod(nu.get_output_shape(fq_1, 0)) >= np.prod(nu.get_output_shape(fq_2, 0)):
self.disconnect_fq_node(fq_1)
else:
self.disconnect_fq_node(fq_2)
return
special_target_device = ['CPU_SPR']
if target_device not in special_target_device:
return
check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node])
for op in get_nodes_by_type(graph, ['Add']):
if not nu.check_const_input(op):
inputs_node = np.array(get_node_inputs(op))
count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node])
indices = count_outputs_node.argsort()[::-1]
inputs_node = inputs_node[indices]
if check_is_inputs_fq(inputs_node):
delete_one_fq(inputs_node)
@staticmethod
def undo_bias_correction(conv_node):
bias_node = nu.get_bias_for_node(conv_node)

View File

@@ -65,6 +65,9 @@ class GraphTransformer:
self.fq_name_swapper.rename_fqs_in_the_end(graph)
graph.clean_up()
self.fq_removal.optimize_for_gp_hw(graph, self.target_device)
graph.clean_up()
return graph
def insert_fake_quantize(self, model, ignored_params=None):

View File

@@ -19,7 +19,8 @@ HARDWARE_AWARE_IGNORED_PATTERNS = {
'CPU': get_cpu_ignored_patterns(),
'GPU': get_gpu_ignored_patterns(),
'VPU': get_vpu_ignored_patterns(),
'GNA': get_gna_ignored_patterns()
'GNA': get_gna_ignored_patterns(),
'CPU_SPR': get_cpu_ignored_patterns()
}
DEFAULT_PATH = 'PATH'

View File

@@ -26,9 +26,11 @@ TEST_MODELS = [
('multiple_out_ports_net', 'tf', 'ANY'),
('lstm_example', 'pytorch', 'GNA'),
('multiple_outputs_net_example', 'dldt', 'GNA'),
# ('tensor_iterator_example', 'tf', 'ANY'),
('resnet_example', 'pytorch', 'CPU_SPR'),
#('tensor_iterator_example', 'tf', 'ANY'),
]
CASCADE_MAP = Dict({
'mtcnn': {
'model_names': ['mtcnn-p', 'mtcnn-r', 'mtcnn-o'],