[POT] Optimize quantization scheme for SPR/ATS (#9564)

* feat: delete fq from branch * delete transformation * pylint * delete gpu_ats * add tests * pylint
2022-01-25 17:37:45 +03:00
parent 6927547f9c
commit 0ecbe50bf5
5 changed files with 58 additions and 3 deletions
--- a/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
+++ b/tools/pot/openvino/tools/pot/algorithms/quantization/utils.py
@@ -18,7 +18,8 @@ __HARDWARE_CONFIGS_MAP = {'ANY': 'cpu.json',
                          'CPU': 'cpu.json',
                          'GNA': 'gna.json',
                          'GPU': 'cpu.json',
-                          'VPU': 'vpu.json'}
+                          'VPU': 'vpu.json',
+                          'CPU_SPR': 'cpu.json'}


 def load_hardware_config(config):
--- a/tools/pot/openvino/tools/pot/graph/passes.py
+++ b/tools/pot/openvino/tools/pot/graph/passes.py
@@ -25,6 +25,7 @@ from openvino.tools.mo.middle.passes.convert_data_type import convert_blob

 from . import editor as ge
 from . import node_utils as nu
+from .editor import get_nodes_by_type
 from .pattern_utils import get_fq_result_pattern
 from .special_operations import OPERATIONS_WITH_WEIGHTS, DETECTION_OUTPUT_FINAL_TYPES, SPLIT_OPERATIONS
 from .utils import find_operation_matches, is_ignored, get_hw_aware_ignored_patterns
@@ -487,6 +488,53 @@ class RemoveFakeQuantize:
        if parent_node.type == 'Const':
            parent_node['need_shape_inference'] = True

+    def optimize_for_gp_hw(self, graph, target_device):
+        """
+        Removing redundant FQs before operation Add for SPR(CPU) platform
+        """
+        def _walk_for_branch(node):
+            input_node = node
+            delete_const = lambda node: ([op for op in node if op is not None and op.type != 'Const'])
+            while True:
+                input_node = get_node_inputs(input_node)
+                input_node = delete_const(input_node)
+                if len(input_node) > 1:
+                    return False
+                input_node = input_node[0]
+                if input_node.type in ['Convolution', 'GroupConvolution', 'MatMul']:
+                    return True
+
+        def _check_const_input(node):
+            input_node = get_node_inputs(node)[0]
+            return nu.check_const_input(input_node)
+
+        def delete_one_fq(inputs_node):
+            fq_1, fq_2 = inputs_node
+            if len(get_all_node_outputs(fq_1)) > 1 \
+                and len(get_all_node_outputs(fq_2)) == 1 and _check_const_input(fq_2):
+                self.disconnect_fq_node(fq_2)
+                return
+            if _walk_for_branch(fq_1) and _walk_for_branch(fq_2):
+                if np.prod(nu.get_output_shape(fq_1, 0)) >= np.prod(nu.get_output_shape(fq_2, 0)):
+                    self.disconnect_fq_node(fq_1)
+                else:
+                    self.disconnect_fq_node(fq_2)
+                return
+
+        special_target_device = ['CPU_SPR']
+        if target_device not in special_target_device:
+            return
+
+        check_is_inputs_fq = lambda node: all([op.type == 'FakeQuantize' for op in node])
+        for op in get_nodes_by_type(graph, ['Add']):
+            if not nu.check_const_input(op):
+                inputs_node = np.array(get_node_inputs(op))
+                count_outputs_node = np.array([len(get_all_node_outputs(node)) for node in inputs_node])
+                indices = count_outputs_node.argsort()[::-1]
+                inputs_node = inputs_node[indices]
+                if check_is_inputs_fq(inputs_node):
+                    delete_one_fq(inputs_node)
+
    @staticmethod
    def undo_bias_correction(conv_node):
        bias_node = nu.get_bias_for_node(conv_node)
--- a/tools/pot/openvino/tools/pot/graph/transformer.py
+++ b/tools/pot/openvino/tools/pot/graph/transformer.py
@@ -65,6 +65,9 @@ class GraphTransformer:
        self.fq_name_swapper.rename_fqs_in_the_end(graph)
        graph.clean_up()

+        self.fq_removal.optimize_for_gp_hw(graph, self.target_device)
+        graph.clean_up()
+
        return graph

    def insert_fake_quantize(self, model, ignored_params=None):
--- a/tools/pot/openvino/tools/pot/graph/utils.py
+++ b/tools/pot/openvino/tools/pot/graph/utils.py
@@ -19,7 +19,8 @@ HARDWARE_AWARE_IGNORED_PATTERNS = {
    'CPU': get_cpu_ignored_patterns(),
    'GPU': get_gpu_ignored_patterns(),
    'VPU': get_vpu_ignored_patterns(),
-    'GNA': get_gna_ignored_patterns()
+    'GNA': get_gna_ignored_patterns(),
+    'CPU_SPR': get_cpu_ignored_patterns()
 }

 DEFAULT_PATH = 'PATH'
--- a/tools/pot/tests/test_graph.py
+++ b/tools/pot/tests/test_graph.py
@@ -26,9 +26,11 @@ TEST_MODELS = [
    ('multiple_out_ports_net', 'tf', 'ANY'),
    ('lstm_example', 'pytorch', 'GNA'),
    ('multiple_outputs_net_example', 'dldt', 'GNA'),
-    # ('tensor_iterator_example', 'tf', 'ANY'),
+    ('resnet_example', 'pytorch', 'CPU_SPR'),
+    #('tensor_iterator_example', 'tf', 'ANY'),
 ]

+
 CASCADE_MAP = Dict({
    'mtcnn': {
        'model_names': ['mtcnn-p', 'mtcnn-r', 'mtcnn-o'],