openvino/model-optimizer/extensions/ops/MatMul.py

"""
 Copyright (C) 2018-2020 Intel Corporation

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
"""

import logging as log

import numpy as np

from mo.front.common.partial_infer.utils import assign_dims_to_weights, int64_array
from mo.graph.graph import Node, Graph
from mo.ops.op import Op


class MatMul(Op):
    """
    MatMul operation takes two tensors and performs usual matrix-matrix multiplication, matrix-vector multiplication
    or vector-matrix multiplication depending on argument shapes.

    Input tensors can have any rank >= 1.

    Two right-most axes in each tensor are interpreted as matrix rows and columns dimensions while
    all left-most axes (if present) are interpreted as multi-dimensional batch:

    [BATCH_DIM_1, BATCH_DIM_2,..., BATCH_DIM_K, ROW_INDEX_DIM, COL_INDEX_DIM]

    The operation supports usual broadcast semantics for batch dimensions.
    It enables multiplication of batch of pairs of matrices in a single shot.

    Before matrix multiplication, there is an implicit shape alignment for input arguments.
    It consists of the following steps:

    1. If rank of an input less than 2 it is unsqueezed to 2D tensor by adding axes with size 1 to the left of the shape
        For example, if input has shape [S] it will be reshaped to [1, S]. It is applied for each input independently
    2. Applied transpositions specified by optional transpose_a and transpose_b attributes
    3. If ranks of input arguments are different after steps 1 and 2, each is unsqueezed from the left side of
        the shape by necessary number of axes to make both shapes of the same rank
    4. Usual rules of the broadcasting are applied for batch dimensions

    Two attributes, transpose_a and transpose_b specifies embedded transposition for two right-most dimension for the
    first and the second input tensors correspondingly. It implies swapping of ROW_INDEX_DIM and COL_INDEX_DIM in
    the corresponding input tensor. Batch dimensions are not affected by these attributes.

    Shape inference mechanism:
        0-port aligned input shape:
            [BATCH_DIM_1, BATCH_DIM_2,..., BATCH_DIM_K, A_ROW_INDEX_DIM, A_COL_INDEX_DIM]
        1-port aligned input shape:
            [BATCH_DIM_1, BATCH_DIM_2,..., BATCH_DIM_K, B_ROW_INDEX_DIM, B_COL_INDEX_DIM]
        where A_COL_INDEX_DIM == B_ROW_INDEX_DIM

        Output shape:
            [BATCH_DIM_1, BATCH_DIM_2,..., BATCH_DIM_K, A_ROW_INDEX_DIM, B_COL_INDEX_DIM]

    """
    op = 'MatMul'

    def __init__(self, graph: Graph, attrs: dict):
        mandatory_props = {
            'type': self.op,
            'op': self.op,
            'version': 'opset1',
            'transpose_a': False,
            'transpose_b': False,
            'infer': __class__.infer,
            'in_ports_count': 2,
            'out_ports_count': 1,
        }
        super().__init__(graph, mandatory_props, attrs)

    def supported_attrs(self):
        return [
            'transpose_a',
            'transpose_b',
        ]

    @staticmethod
    def shape_alignment(node: Node):
        """
        Specification of MatMul operation allows inputs to be aligned together before matrix multiplication.
        Alignment steps described in MatMul operation doc-string upper in current file.

        Current method raises an error if input shapes are not valid at any step of alignment process
        :return: aligned copies of both input shapes
        """
        node_name = node.soft_get('name', str(node.id))
        input_shapes = [node.in_port(i).data.get_shape() for i in range(2)]
        transpose_a = node.has_and_set('transpose_a')
        transpose_b = node.has_and_set('transpose_b')

        transformed_shapes = []
        for i, shape in enumerate(input_shapes):
            input_shape = shape.copy()
            # prerequisites check
            assert input_shape is not None, "MatMul has shape=`None` for {} input of `{}` node".format(i, node_name)
            assert input_shape.ndim == 1, "MatMul doesn't support scalar inputs. {} input of `{}` node has shape {}" \
                                          "".format(i, node_name, input_shape)
            assert input_shape.size >= 1, "MatMul doesn't support inputs with rank lower than 1. {} input of `{}` " \
                                          "node has shape {}".format(i, node_name, input_shape)

            # shape alignment
            if input_shape.size < 2:
                input_shape = np.insert(input_shape, 0, 1)
            if (i == 0 and transpose_a) or (i == 1 and transpose_b):
                input_shape[-2], input_shape[-1] = input_shape[-1], input_shape[-2]

            max_shape_length = max(input_shapes[0].size, input_shapes[1].size)
            input_shape = np.insert(input_shape, 0, [1] * (max_shape_length - input_shape.size))
            transformed_shapes.append(input_shape)

        A_shape = transformed_shapes[0]
        B_shape = transformed_shapes[1]

        assert A_shape.size == B_shape.size, \
            "Shapes were not aligned by length for MatMul `{}`. Shapes: `{}`".format(node_name, transformed_shapes)

        # batch broadcasting
        batch_len = A_shape.size - 2
        for i in range(batch_len):
            if A_shape[i] != B_shape[i]:
                if A_shape[i] == 1:
                    A_shape[i] = B_shape[i]
                if B_shape[i] == 1:
                    B_shape[i] = A_shape[i]

        assert np.array_equal(A_shape[:-2], B_shape[:-2]), \
            "MatMul input shapes are incorrect. BATCH_DIMs are not equal. Node: {}. Aligned shapes: {}" \
            "".format(node_name, transformed_shapes)

        return A_shape, B_shape

    @staticmethod
    def value_propagation(node: Node):
        """
        This function performs a value propagation for MatMul layer.
        :param node: MatMul layer
        :return: None
        """
        a_value = node.in_port(0).get_source().data.get_value()
        b_value = node.in_port(1).get_source().data.get_value()
        if a_value is not None and b_value is not None:
            if node.transpose_a:
                a_value = transpose(a_value)
            if node.transpose_b:
                b_value = transpose(b_value)
            node.out_port(0).data.set_value(np.matmul(a_value, b_value))

    @staticmethod
    def infer(node: Node):
        """
        Performs shape inference of MatMul node as operation doc-string says
        Raises on any shape inconsistency
        """
        name = node.soft_get('name', str(node.id))
        connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()}
        assert len(connected_in_ports) == 2 and 0 in connected_in_ports and 1 in connected_in_ports, \
            "MatMul should have 2 connected input ports, but it doesn't for node: `{}`. Ports: {}" \
            "".format(name, connected_in_ports)

        log.debug('MatMul `{}` input shapes: {}'.format(name, [node.in_port(i).data.get_shape() for i in range(2)]))
        A_shape, B_shape = MatMul.shape_alignment(node)
        log.debug('MatMul `{}` aligned input shapes: {}'.format(name, [A_shape, B_shape]))

        assert A_shape[-1] == B_shape[-2], \
            "MatMul input shapes are incorrect. COL_INDEX_DIMs are not equal. Node: {}. Shapes: {}" \
            "".format(name, [A_shape, B_shape])

        output_shape = np.concatenate((A_shape[:-1], B_shape[-1:]))
        node.out_port(0).data.set_shape(output_shape)

        in_ch = 0 if not node.transpose_b else 1
        out_ch = 1 if not node.transpose_b else 0
        assign_dims_to_weights(node.in_node(1), None, in_ch, out_ch, node.in_port(1).data.get_shape().size)
        MatMul.value_propagation(node)


def transpose(value):
    num_of_dims = value.ndim
    if num_of_dims == 1:
        return np.reshape(value, (len(value), 1))
    elif num_of_dims == 2:
        return np.transpose(value, [1, 0])
    else:
        return np.transpose(value, [*range(0, num_of_dims - 2), num_of_dims - 1, num_of_dims - 2])


# MatMul-like operation from frameworks

class GemmONNX(Op):
    """
    Represents Gemm operation from ONNX

    Missing `type` and `infer` attributes on purpose - node should be decomposed on front phase
    and should never be inferred or translated to IR as is
    """
    op = 'Gemm'
    enabled = False

    def __init__(self, graph: Graph, attrs: dict):
        mandatory_props = {
            'op': __class__.op,
            'transpose_a': False,
            'transpose_b': False,
            'alpha': 1,
            'beta': 1,
            'broadcast_c': True,
            'in_ports_count': 3,
            'out_ports_count': 1,
        }
        super().__init__(graph, mandatory_props, attrs)


class FullyConnected(Op):
    # TODO: remove `infer`, `type` and supported_attrs after op removal from IR Spec
    op = 'FullyConnected'
    enabled = False

    def __init__(self, graph: Graph, attrs: dict):
        super().__init__(graph, {
            'op': __class__.op,
            'type': __class__.op,
            'infer': __class__.infer,
            'in_ports_count': 3,
            'out_ports_count': 1,
        }, attrs)

    def supported_attrs(self):
        return [
            'out-size',
        ]

    @staticmethod
    def infer(node: Node):
        name = node.soft_get('name', node.id)

        connected_in_ports = {idx: port for idx, port in node.in_ports().items() if not port.disconnected()}
        assert len(connected_in_ports) >= 2 and 0 in connected_in_ports and 1 in connected_in_ports, \
            'FullyConnected should have 2 connected input ports, but it doesn\'t for node: `{}`. Ports: {}' \
            ''.format(name, connected_in_ports)

        assert node.has_valid('out-size')
        input_shape = node.in_port(0).data.get_shape()
        weights_shape = node.in_port(1).data.get_shape()
        assert input_shape is not None and weights_shape is not None, \
            'Incorrect FullyConnected input shapes. Node: {}. Shapes: {}'.format(name, [input_shape, weights_shape])
        assert weights_shape.size == 2
        out_size = node.soft_get('out-size')
        assert weights_shape[0] == out_size, 'weights_shape={}, out-size={}'.format(weights_shape, out_size)

        if 2 in connected_in_ports:
            bias_value = node.in_port(2).data.get_value()
            bias_shape = node.in_port(2).data.get_shape()
            assert bias_shape is not None, 'Shape was not inferred for biases of FullyConnected {}'.format(name)
            assert bias_value is not None, 'Value was not inferred for biases of FullyConnected {}'.format(name)
            assert np.array_equal(bias_shape, [out_size]) or np.array_equal(bias_shape, [1, out_size]), \
                'Incorrect FullyConnected bias shape `{}` for node {}. `out-size`={}'.format(bias_shape, node, out_size)

        out_shape = int64_array([*input_shape[:-1], out_size])
        node.out_port(0).data.set_shape(out_shape)


# MatMul-like operations for IR V6

class Gemm(MatMul):
    """
    Represents GEMM operation that is acceptable to appear in v6 IRs
    Inherits MatMul semantic to be re-inferred in back phase and to be successfully translated to IR (v6)
    """
    op = 'GEMM'
    enabled = False