fix TSGatherForward transformation (#18537)

* add comment * rewrite new_order generation code * add unit tests * code review fix * fix windows build * code review fixes --------- Co-authored-by: Ivan Tikhonov <ivan.tikhonov@intel.com>
2023-07-24 16:18:22 +02:00 · 2023-07-24 16:18:22 +02:00 · 6ecbdaea08
commit 6ecbdaea08
parent 5eab1be682
2 changed files with 79 additions and 17 deletions
--- a/src/common/transformations/src/transformations/transpose_sinking/ts_gather.cpp
+++ b/src/common/transformations/src/transformations/transpose_sinking/ts_gather.cpp
@ -67,20 +67,66 @@ TSGatherForward::TSGatherForward() {
        } else {
            axis = static_cast<size_t>(axes[0]);
        }
        /*
            https://docs.openvino.ai/2023.0/openvino_docs_ops_movement_Gather_8.html
            The Gather output shape has the same shape as the input,
            with the indexed-axis replaced by the shape of the indices
            Gather input shape | Gather indexes shape | axis | Gather output shape
                {1, 2, 3}      |        {}            |   1  |      {1, 3}
                {1, 2, 3}      |        {7}           |   1  |      {1, 7, 3}
                {1, 2, 3}      |        {7,5}         |   1  |      {1, 7, 5, 3}
            New transpose order length equals to output Gather shape size.
            As gather modifies input shape within axis dimension, our transpose order
            will be modified with axis dimension.
            New transpose order values:
                - values before axis will be original
                - values in [axis, axis + indexes_ranks_size - 1] will be original + [0 1 ...]
                  if indexes_ranks_size == 0, there will be no such items
                - values after axis will be original + indexes_rank_size - 1
                  (as one dim[axis] will be substituted with new indexes_rank_size dimesions)
                  if indexes_ranks_size == 0, values will be original - 1
        */
        const auto& indices_rank_val = indices_rank.get_length();
        std::vector<size_t> new_transpose_order(order_val.size() + indices_rank_val - 1);
-        for (size_t i = 0, j = 0; i < new_transpose_order.size(); ++i) {
+        const int n_axis_dims = static_cast<int>(indices_rank_val) - 1;
-            if (i > axis && i < (axis + indices_rank_val)) {
+        /*
-                new_transpose_order[i] = new_transpose_order[j - 1] + 1;
+            i - new_transpose_order index
-            } else if (order_val[i] > axis) {
+            j - order_val index
-                new_transpose_order[i] = order_val[j] + indices_rank_val - 1;
+            k - substituted dims by Gather index
-                j++;
+            - There might be a situation when output Gather shape has one dimension
-            } else {
+                less than input shape. In a such case n_axis_dims < 0 and we should
                skip order_val[axis] and all the next order_val[j] will be reduced.
            - On the other hand in a case with multidimentional index Gather output
                shape has more dimensions than input shape. We need to add this
                dimensions into the transpose order and increase all next order_val[j]
        */
        for (size_t i = 0, j = 0, k = 0; i < new_transpose_order.size(); ++i) {
            if (order_val[j] == axis && static_cast<int>(k) > n_axis_dims) {
                /*
                    We added all new dimensions into the order.
                    We should go to the next order_val value.
                */
                ++j;
            }
            if (order_val[j] < axis) {
                // transpose order values that are less than the axis remains the same
                new_transpose_order[i] = order_val[j];
-                j++;
+                ++j;
            } else if (order_val[j] == axis && static_cast<int>(k) <= n_axis_dims) {
                // these are new dims and they are not involved in the transposition. They have to stay in the same
                // place.
                new_transpose_order[i] = order_val[j] + k;
                ++k;
            } else {  // order_val[j] > axis
                /*
                    Transpose order values that are greater than the axis are shifted by N, where N is a count
                    of new added dimensions
                */
                new_transpose_order[i] = order_val[j] + n_axis_dims;
                ++j;
            }
        }
        auto new_order_const = ov::op::v0::Constant::create(transpose_order->get_element_type(),
                                                            {new_transpose_order.size()},
                                                            new_transpose_order);
--- a/src/common/transformations/tests/transpose_sinking/ts_gather_test.cpp
+++ b/src/common/transformations/tests/transpose_sinking/ts_gather_test.cpp
@ -63,7 +63,8 @@ auto wrapper = [](const TestCase& test_case) {
 struct GatherForwardArguments {
    OutputVector inputs_to_main;
-    Output<Node> new_input_to_Gather_1;
+    Output<Node> new_Gather_first_input;
    AxisVector new_transpose_order;
 };
 auto test_forward_gather = [](const GatherForwardArguments& test_arguments) {
@ -80,9 +81,11 @@ auto test_forward_gather = [](const GatherForwardArguments& test_arguments) {
    test_case.model.model_template = create_model;
    // Reference model description:
-    auto new_transpose = [](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
+    auto new_transpose = [&test_arguments](const vector<size_t>& idxs, const OutputVector& out_vec) -> OutputVector {
        OutputVector new_out_vec(out_vec.size());
-        auto order = make_shared<Constant>(element::i32, Shape{4}, std::vector<int64_t>{3, 2, 1, 0});
+        auto order = make_shared<Constant>(i32,
                                           Shape{test_arguments.new_transpose_order.size()},
                                           test_arguments.new_transpose_order);
        new_out_vec[0] = make_shared<Transpose>(out_vec[0], order);
        return new_out_vec;
    };
@ -90,7 +93,7 @@ auto test_forward_gather = [](const GatherForwardArguments& test_arguments) {
        OutputVector new_out_vec(out_vec.size());
        new_out_vec[0] = out_vec[0];
        new_out_vec[1] = out_vec[1];
-        new_out_vec[2] = test_arguments.new_input_to_Gather_1;
+        new_out_vec[2] = test_arguments.new_Gather_first_input;
        return new_out_vec;
    };
    test_case.model_ref.preprocess_inputs_to_main = {{new_constant}, {{2}}};
@ -103,13 +106,26 @@ auto test_forward_gather = [](const GatherForwardArguments& test_arguments) {
 vector<GatherForwardArguments> tests_arguments_fw{
    {{{parameter(f32, {3, 4, 5, 6}), constant<int>(i32, {2}, {0, 2}), constant<int>(i32, {1}, {2})}},
-     constant<int>(i32, {1}, {1})}};
+     constant<int>(i32, {1}, {1}),
     AxisVector{3, 2, 1, 0}},
    {{parameter(f32, {2, 4}), constant<int>(i32, {}, {0}), constant<int>(i32, {1}, {1})},
     constant<int>(i32, {1}, {0}),
     AxisVector{0}},
    {{parameter(f32, {2, 4}), constant<int>(i32, {1}, {0}), constant<int>(i32, {1}, {1})},
     constant<int>(i32, {1}, {0}),
     AxisVector{1, 0}},
    {{parameter(f32, {2, 3, 4}), constant<int>(i32, {2, 3}, {0, 1, 0, 1, 0, 1}), constant<int>(i32, {1}, {1})},
     constant<int>(i32, {1}, {1}),
     AxisVector{3, 1, 2, 0}}};
 INSTANTIATE_TEST_SUITE_P(TSCommonGatherForward_0, TSTestFixture, test_forward_gather(tests_arguments_fw[0]));
 INSTANTIATE_TEST_SUITE_P(TSCommonGatherForward_1, TSTestFixture, test_forward_gather(tests_arguments_fw[1]));
 INSTANTIATE_TEST_SUITE_P(TSCommonGatherForward_2, TSTestFixture, test_forward_gather(tests_arguments_fw[2]));
 INSTANTIATE_TEST_SUITE_P(TSCommonGatherForward_3, TSTestFixture, test_forward_gather(tests_arguments_fw[3]));
 struct GatherBackwardArguments {
    OutputVector inputs_to_main;
-    Output<Node> new_input_to_Gather_1;
+    Output<Node> new_Gather_first_input;
 };
 auto test_backward_gather = [](const GatherBackwardArguments& test_arguments) {
@ -130,7 +146,7 @@ auto test_backward_gather = [](const GatherBackwardArguments& test_arguments) {
        OutputVector new_out_vec(out_vec.size());
        new_out_vec[0] = out_vec[0];
        new_out_vec[1] = out_vec[1];
-        new_out_vec[2] = test_arguments.new_input_to_Gather_1;
+        new_out_vec[2] = test_arguments.new_Gather_first_input;
        return new_out_vec;
    };
    test_case.model_ref.preprocess_inputs_to_main = {{set_transpose_for, new_constant}, {{0}, {2}}};
@ -167,7 +183,7 @@ auto test_backward_gather_optimization = [](const GatherBackwardArguments& test_
        OutputVector new_out_vec(out_vec.size());
        new_out_vec[0] = out_vec[0];
        new_out_vec[1] = make_shared<Squeeze>(out_vec[1]);
-        new_out_vec[2] = test_arguments.new_input_to_Gather_1;
+        new_out_vec[2] = test_arguments.new_Gather_first_input;
        return new_out_vec;
    };