From 204c17cc211c44b9f6977dce4b12a8541cf5d017 Mon Sep 17 00:00:00 2001
From: Gleb Kazantaev <gleb.kazantaev@intel.com>
Date: Tue, 28 Sep 2021 10:46:31 +0300
Subject: [PATCH] Enable SoftmaxFusion inside MOC Transformations pipeline
 (#7684)

* Enable SoftmaxFusion inside MOC Transformations pipeline

* Disable SoftmaxDecomposition by default
---
 .../src/cldnn_engine/cldnn_engine.cpp         |  7 +-
 .../src/mkldnn_plugin/mkldnn_plugin.cpp       |  7 +-
 .../src/moc_transformations.cpp               |  2 +
 .../op_conversions/softmax_decomposition.hpp  | 75 +++++++++++++++++++
 .../common_optimizations.cpp                  |  2 +
 .../op_conversions/softmax_decomposition.cpp  | 43 +++++++++++
 .../plugin/cpu/single_layer_tests/softmax.cpp | 17 +++++
 .../plugin/cpu/test_utils/cpu_test_utils.cpp  |  3 -
 8 files changed, 147 insertions(+), 9 deletions(-)
 create mode 100644 inference-engine/src/transformations/include/transformations/op_conversions/softmax_decomposition.hpp
 create mode 100644 inference-engine/src/transformations/src/transformations/op_conversions/softmax_decomposition.cpp

diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index a4206268b4a..ea246150c5f 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -32,7 +32,6 @@
 #include <transformations/common_optimizations/lin_op_sequence_fusion.hpp>
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
-#include "transformations/common_optimizations/softmax_fusion.hpp"
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
 #include <transformations/op_conversions/convert_gelu.hpp>
@@ -64,6 +63,7 @@
 #include <transformations/op_conversions/convert_gather_0d.hpp>
 #include <transformations/op_conversions/convert_deformable_conv_v8_to_v1.hpp>
 #include <transformations/op_conversions/simplify_ctc_greedy_decoder_seq_len.hpp>
+#include "transformations/op_conversions/softmax_decomposition.hpp"
 #include <transformations/convert_precision.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/rt_info/fused_names_attribute.hpp>
@@ -333,9 +333,10 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc
                     return false;
                 });
 
-            pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
+            pass_config->enable<ngraph::pass::SoftmaxDecomposition>();
+            pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>(
                 [](const_node_ptr &node) -> bool {
-                    return node->input_value(0).get_partial_shape().rank().get_length() > 5;
+                    return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
                 });
 
             // List of enabled/disabled transformations
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 77e15a4dac7..e756b4e209d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -28,7 +28,6 @@
 #include <transformations/common_optimizations/common_optimizations.hpp>
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
-#include <transformations/common_optimizations/softmax_fusion.hpp>
 #include <transformations/op_conversions/convert_depth_to_space.hpp>
 #include <transformations/op_conversions/convert_shuffle_channels3.hpp>
 #include <transformations/op_conversions/convert_space_to_depth.hpp>
@@ -47,6 +46,7 @@
 #include <transformations/op_conversions/convert_batch_to_space.hpp>
 #include <transformations/op_conversions/convert_sequences_to_tensor_iterator.hpp>
 #include <transformations/op_conversions/convert_subtract.hpp>
+#include <transformations/op_conversions/softmax_decomposition.hpp>
 #include <transformations/control_flow/unroll_tensor_iterator.hpp>
 #include <transformations/op_conversions/convert_mod.hpp>
 #include <transformations/op_conversions/convert_ti_to_sequences.hpp>
@@ -289,9 +289,10 @@ static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function>
                 return MKLDNNNormalizeL2Node::isSupportedOperation(node, errorMsg);
             });
 
-    pass_config->set_callback<ngraph::pass::SoftmaxFusion>(
+    pass_config->enable<ngraph::pass::SoftmaxDecomposition>();
+    pass_config->set_callback<ngraph::pass::SoftmaxDecomposition>(
             [](const_node_ptr &node) -> bool {
-                return node->input_value(0).get_partial_shape().rank().get_length() > 5;
+                return node->input_value(0).get_partial_shape().rank().get_length() <= 5;
             });
 
     pass_config->set_callback<ngraph::pass::ConvertNMSToNMSIEInternal>(
diff --git a/inference-engine/src/offline_transformations/src/moc_transformations.cpp b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
index 40952e9494e..1427f229d23 100644
--- a/inference-engine/src/offline_transformations/src/moc_transformations.cpp
+++ b/inference-engine/src/offline_transformations/src/moc_transformations.cpp
@@ -39,6 +39,7 @@
 #include <transformations/common_optimizations/leaky_relu_fusion.hpp>
 #include <transformations/common_optimizations/normalize_l2_fusion.hpp>
 #include <transformations/common_optimizations/random_uniform_fusion.hpp>
+#include <transformations/common_optimizations/softmax_fusion.hpp>
 #include "transformations/common_optimizations/mul_conv_fusion.hpp"
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::MOCTransformations, "MOCTransformations", 0);
@@ -89,6 +90,7 @@ bool ngraph::pass::MOCTransformations::run_on_function(std::shared_ptr<ngraph::F
     common_fusions->add_matcher<ngraph::pass::NormalizeL2Fusion>();
     common_fusions->add_matcher<ngraph::pass::ClampFusion>();
     common_fusions->add_matcher<ngraph::pass::PadFusion>();
+    common_fusions->add_matcher<ngraph::pass::SoftmaxFusion>();
     common_fusions->add_matcher<ngraph::pass::MVNFusion>();
     common_fusions->add_matcher<ngraph::pass::DilatedConvolutionConverter>();
     common_fusions->add_matcher<ngraph::pass::GeluFusion>();
diff --git a/inference-engine/src/transformations/include/transformations/op_conversions/softmax_decomposition.hpp b/inference-engine/src/transformations/include/transformations/op_conversions/softmax_decomposition.hpp
new file mode 100644
index 00000000000..f465e6605d7
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/op_conversions/softmax_decomposition.hpp
@@ -0,0 +1,75 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API SoftmaxDecomposition;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief SoftmaxDecomposition transformation replaces softmax with following graph:
+ *
+ *            +---------------+
+ *            │               │
+ *            │     input     │
+ *            │               │
+ *            +---------------+
+ *                │      │
+ *                │      v
+ *                │ +-----------+
+ *                │ │           │
+ *                │ │ ReduceMax │
+ *                │ │           │
+ *                │ +-----------+
+ *                │      │
+ *                │      │
+ *                v      v
+ *            +---------------+
+ *            │               │
+ *            │      Sub      │
+ *            │               │
+ *            +---------------+
+ *                    |
+ *                    |
+ *                    v
+ *            +---------------+
+ *            │               │
+ *            │      Exp      │
+ *            │               │
+ *            +---------------+
+ *                │      │
+ *                │      v
+ *                │ +-----------+
+ *                │ │           │
+ *                │ │ ReduceSum │
+ *                │ │           │
+ *                │ +-----------+
+ *                │      │
+ *                │      │
+ *                v      v
+ *             +-------------+
+ *             |             │
+ *             |     Div     │
+ *             │             │
+ *             +-------------+
+ *
+ */
+
+class ngraph::pass::SoftmaxDecomposition: public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    SoftmaxDecomposition();
+};
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index a739e90654a..abaa0261fc5 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -83,6 +83,7 @@
 #include <transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp>
 #include <transformations/common_optimizations/simplify_shape_of_sub_graph.hpp>
 #include <transformations/op_conversions/normalize_l2_decomposition.hpp>
+#include <transformations/op_conversions/softmax_decomposition.hpp>
 
 NGRAPH_RTTI_DEFINITION(ngraph::pass::CommonOptimizations, "CommonOptimizations", 0);
 
@@ -171,6 +172,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
     decomp->add_matcher<ngraph::pass::NormalizeL2Decomposition, false>();
     decomp->add_matcher<ngraph::pass::SimplifyCTCGreedyDecoderSeqLen>();
     decomp->add_matcher<ngraph::pass::EinsumDecomposition>();
+    decomp->add_matcher<ngraph::pass::SoftmaxDecomposition, false>();
     decomp->add_matcher<ngraph::pass::GatherNegativeConstIndicesNormalize>();
     decomp->add_matcher<ngraph::pass::DropoutWithRandomUniformReplacer>();
     decomp->set_name("ngraph::pass::CommonDecompositions");
diff --git a/inference-engine/src/transformations/src/transformations/op_conversions/softmax_decomposition.cpp b/inference-engine/src/transformations/src/transformations/op_conversions/softmax_decomposition.cpp
new file mode 100644
index 00000000000..c133feaca9a
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/op_conversions/softmax_decomposition.cpp
@@ -0,0 +1,43 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "itt.hpp"
+#include <transformations/op_conversions/softmax_decomposition.hpp>
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/rt_info.hpp>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::SoftmaxDecomposition, "SoftmaxDecomposition", 0);
+
+ngraph::pass::SoftmaxDecomposition::SoftmaxDecomposition() {
+    MATCHER_SCOPE(SoftmaxDecomposition);
+    auto softmax = pattern::wrap_type<ngraph::opset8::Softmax>();
+
+    ngraph::matcher_pass_callback callback = [=](ngraph::pattern::Matcher& m) {
+        auto node = std::dynamic_pointer_cast<opset8::Softmax>(m.get_match_root());
+        if (!node || transformation_callback(node)) {
+            return false;
+        }
+
+        auto input = node->input_value(0);
+        auto axis = opset8::Constant::create(element::i64, Shape{1}, {node->get_axis()});
+        auto reduce_max = std::make_shared<opset8::ReduceMax>(input, axis, true);
+        auto sub = std::make_shared<opset8::Subtract>(input, reduce_max);
+        auto exp = std::make_shared<opset8::Exp>(sub);
+        auto reduce_sum = std::make_shared<opset8::ReduceSum>(exp, axis, true);
+        auto div = std::make_shared<opset8::Divide>(exp, reduce_sum);
+
+        replace_node(node, div);
+        copy_runtime_info(node, {reduce_max, reduce_sum, sub, exp, div});
+        div->set_friendly_name(node->get_friendly_name());
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(softmax, matcher_name);
+    register_matcher(m, callback);
+}
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
index 2877a64c44a..ec619350737 100644
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/softmax.cpp
@@ -112,6 +112,15 @@ const std::vector<SoftMaxConfig> notOptimizedConfigsFP32 {
         {InferenceEngine::SizeVector{10, 10, 10}, 1},
 };
 
+const std::vector<SoftMaxConfig> unsupportedConfigsFP32 {
+        {InferenceEngine::SizeVector{5, 5, 5, 5, 5, 5}, 0},
+        {InferenceEngine::SizeVector{5, 5, 5, 5, 5, 5}, 1},
+        {InferenceEngine::SizeVector{5, 5, 5, 5, 5, 5}, 2},
+        {InferenceEngine::SizeVector{5, 5, 5, 5, 5, 5}, 3},
+        {InferenceEngine::SizeVector{5, 5, 5, 5, 5, 5}, 4},
+        {InferenceEngine::SizeVector{5, 5, 5, 5, 5, 5}, 5},
+};
+
 const auto OptimizedParams = testing::Combine(
         testing::Values(Precision::FP32, Precision::BF16),
         testing::ValuesIn(optimizedConfigsFP32),
@@ -128,5 +137,13 @@ const auto NotOptimizedParams = testing::Combine(
 
 INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_CPU, SoftMaxLayerCPUTest, NotOptimizedParams, SoftMaxLayerCPUTest::getTestCaseName);
 
+const auto UnsupportedParams = testing::Combine(
+        testing::Values(Precision::FP32, Precision::BF16),
+        testing::ValuesIn(unsupportedConfigsFP32),
+        testing::Values(CommonTestUtils::DEVICE_CPU),
+        testing::Values(notOptimizedCPUSpec));
+
+INSTANTIATE_TEST_SUITE_P(smoke_SoftMax_Unsupported_CPU, SoftMaxLayerCPUTest, UnsupportedParams, SoftMaxLayerCPUTest::getTestCaseName);
+
 } // namespace
 } // namespace CPULayerTestsDefinitions
\ No newline at end of file
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
index dba15ff1d48..f01bd40b96b 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/cpu_test_utils.cpp
@@ -116,7 +116,6 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
     if (nodeType.empty()) return;
 
     ASSERT_TRUE(!selectedType.empty()) << "Node type is not defined.";
-    bool isNodeFound = false;
     InferenceEngine::CNNNetwork execGraphInfo = execNet.GetExecGraphInfo();
     auto function = execGraphInfo.getFunction();
     ASSERT_NE(nullptr, function);
@@ -145,7 +144,6 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
         };
 
         if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == nodeType) {
-            isNodeFound = true;
             ASSERT_LE(inFmts.size(), node->get_input_size());
             ASSERT_LE(outFmts.size(), node->get_output_size());
             for (int i = 0; i < inFmts.size(); i++) {
@@ -205,7 +203,6 @@ void CPUTestsBase::CheckPluginRelatedResults(InferenceEngine::ExecutableNetwork
             ASSERT_EQ(selectedType, primType);
         }
     }
-    ASSERT_TRUE(isNodeFound) << "Node type name: \"" << nodeType << "\" has not been found.";
 }
 
 std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {