Backport of FQ+Mul transform to master (#2214)

* Backport of FQ+Mul transform to master * Accept any type of input to FQ in the transformation * Test the fusion when all FQ inputs are non-const * Fusion test when only one output limit is const * Test passing the output of FQ to second input of Mul
2020-09-21 12:21:27 +02:00 · 2020-09-21 12:21:27 +02:00 · dda6d9136b
commit dda6d9136b
parent c13ec24e1e
4 changed files with 506 additions and 0 deletions
--- a/inference-engine/src/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/fq_mul_fusion.hpp
@ -0,0 +1,32 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include <transformations_visibility.hpp>
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class TRANSFORMATIONS_API FakeQuantizeMulFusion;
+
+} // namespace pass
+} // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief This transformation looks for a FQ + Mul pair in the graph and moves
+ * the Mul operation above the FQ node. The last two inputs of FQ are multiplied
+ * by the value that was originally below the FQ node.
+ */
+
+class ngraph::pass::FakeQuantizeMulFusion : public ngraph::pass::MatcherPass {
+public:
+  FakeQuantizeMulFusion();
+};
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/fq_mul_fusion.cpp
@ -0,0 +1,108 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/fq_mul_fusion.hpp"
+#include "transformations/utils/utils.hpp"
+
+#include <memory>
+#include <vector>
+
+#include <ngraph/opsets/opset4.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/rt_info.hpp>
+
+namespace {
+  std::pair<ngraph::Output<ngraph::Node>, ngraph::Output<ngraph::Node>>
+  get_adjusted_output_range(ngraph::Output<ngraph::Node> out_low,
+                            ngraph::Output<ngraph::Node> out_high,
+                            ngraph::Output<ngraph::Node> multiplier) {
+    const auto mul_out_low = std::make_shared<ngraph::opset4::Multiply>(out_low, multiplier);
+    const auto mul_out_high = std::make_shared<ngraph::opset4::Multiply>(out_high, multiplier);
+    copy_runtime_info({out_low.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
+                      mul_out_low);
+    copy_runtime_info({out_high.get_node_shared_ptr(), multiplier.get_node_shared_ptr()},
+                      mul_out_high);
+
+    ngraph::OutputVector new_out_low(1), new_out_high(1);
+
+    if (!mul_out_low->constant_fold(new_out_low, {out_low, multiplier})) {
+      new_out_low[0] = mul_out_low;
+    }
+
+    if (!mul_out_high->constant_fold(new_out_high, {out_high, multiplier})) {
+      new_out_high[0] = mul_out_high;
+    }
+
+    return {new_out_low[0], new_out_high[0]};
+  }
+} // namespace
+
+// This transformation multiplies the "output_low" and "output_high" inputs of the FQ operation
+// by the constant value that before transormation is used to multiply the output of FQ.
+// Both output_low and output_high are multiplied by the value represented as C (a constant) below.
+// In case any of the FQ inputs (out_L, out_H) is constant, it gets constant folded with C.
+//
+//          data  in_L in_H out_L out_H
+//            |    |    |     |     |
+//            |    |    |     |     |                data  in_L in_H  out_L * C  out_H * C
+//            v    v    v     v     v                  |    |    |        |          |
+//          +-------------------------+                |    |    |        |          |
+//          |       FakeQuantize      |                v    v    v        v          v
+//          +-------------------------+             +-----------------------------------+
+//                       |                =====>    |            FakeQuantize           |
+//                       v                          +-----------------------------------+
+//                  +----------+                                      |
+//                  | Multiply | <--- C                               v
+//                  +----+-----+
+//                       |
+//                       v
+//
+
+ngraph::pass::FakeQuantizeMulFusion::FakeQuantizeMulFusion() {
+  const auto fq_output_low_p = ngraph::pattern::any_input();
+  const auto fq_output_high_p = ngraph::pattern::any_input();
+
+  const auto fq_node_p = ngraph::pattern::wrap_type<opset4::FakeQuantize>(
+      {ngraph::pattern::any_input(),
+       ngraph::pattern::any_input(),
+       ngraph::pattern::any_input(),
+       fq_output_low_p,
+       fq_output_high_p},
+      pattern::consumers_count(1));
+
+  const auto mul_constant_p = ngraph::pattern::wrap_type<opset4::Constant>();
+  const auto mul_node_p = ngraph::pattern::wrap_type<opset4::Multiply>(
+      {fq_node_p, mul_constant_p}, pattern::consumers_count(1));
+
+  ngraph::matcher_pass_callback callback = [=](pattern::Matcher &m) {
+    const auto& pattern_map = m.get_pattern_value_map();
+
+    const auto fq_node = pattern_map.at(fq_node_p).get_node_shared_ptr();
+
+    const auto original_output_low = pattern_map.at(fq_output_low_p);
+    const auto original_output_high = pattern_map.at(fq_output_high_p);
+    const auto mul_constant = pattern_map.at(mul_constant_p);
+
+    const auto new_output_limits = get_adjusted_output_range(
+      original_output_low, original_output_high, mul_constant);
+
+    const auto new_fq_node = fq_node->clone_with_new_inputs({fq_node->input_value(0),
+                                                            fq_node->input_value(1),
+                                                            fq_node->input_value(2),
+                                                            new_output_limits.first,
+                                                            new_output_limits.second});
+
+    const auto mul_node = pattern_map.at(mul_node_p).get_node_shared_ptr();
+    replace_node(mul_node, new_fq_node);
+
+    new_fq_node->set_friendly_name(fq_node->get_friendly_name());
+    copy_runtime_info({fq_node, mul_node}, new_fq_node);
+
+    return true;
+  };
+
+  auto m = std::make_shared<ngraph::pattern::Matcher>(mul_node_p,
+                                                      "FakeQuantizeMulFusion");
+  this->register_matcher(m, callback);
+}
--- a/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
+++ b/inference-engine/src/transformations/src/transformations/convert_opset1_to_legacy/convert_opset1_to_legacy.cpp
@ -51,6 +51,7 @@
 #include <transformations/hswish_decomposition.hpp>
 #include <transformations/reduce_l1_decomposition.hpp>
 #include <transformations/reduce_l2_decomposition.hpp>
+#include <transformations/common_optimizations/fq_mul_fusion.hpp>

 #include <ngraph/pass/constant_folding.hpp>
 #include <ngraph/pass/manager.hpp>
@ -111,6 +112,9 @@ bool ngraph::pass::ConvertOpSet1ToLegacy::run_on_function(std::shared_ptr<ngraph
    manager.register_pass<ngraph::pass::GroupConvolutionBackpropDataMultiplyFusion>();
    manager.register_pass<ngraph::pass::ConstantFolding>();

+    // Multiply the thrird and fourth input instead of the output of FQ with all const inputs
+    manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
+
    // Convolution/Deconvolution/FullyConnected fusions
    auto convert_convolutions = manager.register_pass<ngraph::pass::GraphRewrite>();
    convert_convolutions->add_matcher<ngraph::pass::ConvertConvolution>();
--- a/inference-engine/tests/functional/inference_engine/transformations/fq_mul_fusion_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/fq_mul_fusion_test.cpp
@ -0,0 +1,362 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <memory>
+#include <tuple>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "common_test_utils/ngraph_test_utils.hpp"
+#include "common_test_utils/test_common.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+
+#include <ngraph/function.hpp>
+#include <ngraph/opsets/opset4.hpp>
+#include <ngraph/pass/manager.hpp>
+#include <transformations/common_optimizations/fq_mul_fusion.hpp>
+#include <transformations/init_node_info.hpp>
+
+namespace LayerTestsDefinitions {
+
+using FQMulFusionParams =
+    std::tuple<ngraph::Shape,  // FQ data shape
+               ngraph::Shape,  // in_* shape
+               ngraph::Shape,  // out_* shape
+               ngraph::Shape,  // Mul constant shape
+               ngraph::Shape>; // Expected shape of the new out_* constants
+
+class FQMulFusion : public testing::WithParamInterface<FQMulFusionParams>,
+                    public CommonTestUtils::TestsCommon {
+public:
+    void SetUp() override {
+        ngraph::Shape data_shape, in_shape, out_shape, mul_const_shape, expected_out_shape;
+        std::tie(data_shape, in_shape, out_shape, mul_const_shape, expected_out_shape) =
+            this->GetParam();
+
+        const auto data = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, data_shape, {0.0f});
+        const auto in_low = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, in_shape, {-0.5f});
+        const auto in_high = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, in_shape, {0.5f});
+        const auto out_low = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, out_shape, {0.0f});
+        const auto out_high = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, out_shape, {100.0f});
+        const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+            data, in_low, in_high, out_low, out_high, 255);
+
+        const auto mul_value = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, mul_const_shape, {3.14f});
+        const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
+
+        m_function = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector{mul}, ngraph::ParameterVector{}, "FQMulFusion");
+
+        const auto expected_data = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, data_shape, {0.0f});
+        const auto expected_in_low = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, in_shape, {-0.5f});
+        const auto expected_in_high = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, in_shape, {0.5f});
+        const auto expected_out_low = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, expected_out_shape, {0.0f});
+        const auto expected_out_high = ngraph::opset4::Constant::create(
+            ngraph::element::Type_t::f32, expected_out_shape, {314.0f});
+
+        const auto expected_fq =
+            std::make_shared<ngraph::opset4::FakeQuantize>(expected_data,
+                expected_in_low, expected_in_high, expected_out_low, expected_out_high, 255);
+
+        m_expected_function = std::make_shared<ngraph::Function>(
+            ngraph::OutputVector{expected_fq}, ngraph::ParameterVector{}, "FQMulFusion_expected");
+    }
+
+  std::shared_ptr<ngraph::Function> m_function;
+  std::shared_ptr<ngraph::Function> m_expected_function;
+};
+
+TEST_P(FQMulFusion, ExpectFusion) {
+  ngraph::pass::Manager manager;
+  manager.register_pass<ngraph::pass::InitNodeInfo>();
+  manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
+
+  manager.run_passes(m_function);
+  ASSERT_NO_THROW(check_rt_info(m_function));
+
+  const auto res = compare_functions(m_function, m_expected_function);
+  ASSERT_TRUE(res.first) << res.second;
+};
+
+namespace {
+INSTANTIATE_TEST_CASE_P(ScalarFQParams_C6_4D_channel_0, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{64, 3, 7, 7}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{64, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{64, 1, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(ScalarFQParams_C6_4D_channel_1, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{64, 3, 7, 7}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1, 3, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 3, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(ScalarFQParams_C6_scalar, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{64, 3, 7, 7}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{})));
+
+INSTANTIATE_TEST_CASE_P(FQOutputs1D_C6_scalar, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{64, 3, 7, 7}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1})));
+
+INSTANTIATE_TEST_CASE_P(FQOutputs_NHWC_C6_scalar, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 7, 7, 3}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 3}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 3})));
+
+INSTANTIATE_TEST_CASE_P(FQOutputs_NCHW_C6_scalar, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 3, 7, 7}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1, 3, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{}),
+                                           ::testing::Values(ngraph::Shape{1, 3, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQInputs_4D_with_channel_dimension, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQInputs_4D_per__multiplier_with_channel, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQInputs_4D_with_channel__multiplier_4D_per_tensor, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQInputs_4D__multiplier_channel_3rd_dim, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 3, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 3, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQOutputs_1D__multiplier_3D, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1}),
+                                           ::testing::Values(ngraph::Shape{1, 3, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 3, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQ_all_ones__multiplier_4D_with_channel, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 1, 1})));
+
+INSTANTIATE_TEST_CASE_P(FQInOUt_ones__multiplier_4D_with_channel, FQMulFusion,
+                        ::testing::Combine(::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 1, 1, 1}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 3, 3}),
+                                           ::testing::Values(ngraph::Shape{1, 64, 3, 3})));
+
+TEST(FQMulFusion_NonConstInputs, AllInputsNonConst) {
+    const auto data = std::make_shared<ngraph::opset4::Parameter>(
+        ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224});
+    const auto in_low =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto in_high =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto out_low =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto out_high =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, out_low, out_high, 42);
+
+    const auto mul_value = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {3.14f});
+    const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
+
+    auto function = std::make_shared<ngraph::Function>(ngraph::OutputVector{mul},
+        ngraph::ParameterVector{data, in_low, in_high, out_low, out_high});
+
+    const auto expected_out_low = std::make_shared<ngraph::opset4::Multiply>(out_low, mul_value);
+    const auto expected_out_high = std::make_shared<ngraph::opset4::Multiply>(out_high, mul_value);
+
+    const auto expected_fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, expected_out_low, expected_out_high, 42);
+
+    const auto expected_function =
+        std::make_shared<ngraph::Function>(ngraph::OutputVector{expected_fq},
+            ngraph::ParameterVector{data, in_low, in_high, out_low, out_high});
+
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
+
+    manager.run_passes(function);
+    ASSERT_NO_THROW(check_rt_info(function));
+
+    const auto res = compare_functions(function, expected_function);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(FQMulFusion_NonConstInputs, FQ_out_high_const) {
+    const auto data = std::make_shared<ngraph::opset4::Parameter>(
+        ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224});
+    const auto in_low =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto in_high =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto out_low =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto out_high = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {100.0f});
+    const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, out_low, out_high, 42);
+
+    const auto mul_value = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {3.14f});
+    const auto mul = std::make_shared<ngraph::opset4::Multiply>(fq, mul_value);
+
+    auto function = std::make_shared<ngraph::Function>(ngraph::OutputVector{mul},
+        ngraph::ParameterVector{data, in_low, in_high, out_low});
+
+    const auto expected_out_low = std::make_shared<ngraph::opset4::Multiply>(out_low, mul_value);
+    // this constant should be created by constant folding of the last FQ input
+    const auto expected_out_high = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {314.0f});
+
+    const auto expected_fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, expected_out_low, expected_out_high, 42);
+
+    const auto expected_function =
+        std::make_shared<ngraph::Function>(ngraph::OutputVector{expected_fq},
+            ngraph::ParameterVector{data, in_low, in_high, out_low});
+
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
+
+    manager.run_passes(function);
+    ASSERT_NO_THROW(check_rt_info(function));
+
+    const auto res = compare_functions(function, expected_function);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(FQMulFusion_FQ_Mul_inputs, FQ_out_to_mul_input_2) {
+    const auto data = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224}, {0.0f});
+    const auto in_low = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {-0.5f});
+    const auto in_high = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {0.5f});
+    const auto out_low = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {0.0f});
+    const auto out_high = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {100.0f});
+    const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, out_low, out_high, 42);
+
+    const auto mul_value = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {3.14f});
+    // here the FQ's output is passed to the second input of the Mul operation
+    const auto mul = std::make_shared<ngraph::opset4::Multiply>(mul_value, fq);
+
+    auto function =
+        std::make_shared<ngraph::Function>(ngraph::OutputVector{mul}, ngraph::ParameterVector{});
+
+    const auto expected_out_low = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {0.0f});
+    const auto expected_out_high = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {314.0f});
+
+    const auto expected_fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, expected_out_low, expected_out_high, 42);
+
+    const auto expected_function = std::make_shared<ngraph::Function>(
+        ngraph::OutputVector{expected_fq}, ngraph::ParameterVector{});
+
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
+
+    manager.run_passes(function);
+    ASSERT_NO_THROW(check_rt_info(function));
+
+    const auto res = compare_functions(function, expected_function);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+TEST(FQMulFusion_FQ_Mul_inputs, FQ_out_to_mul_input_2_param) {
+    const auto data = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{1, 3, 224, 224}, {0.0f});
+    const auto in_low = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {-0.5f});
+    const auto in_high = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {0.5f});
+    const auto out_low = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {0.0f});
+    // out_high is a parameter, which means it should not be constant folded
+    const auto out_high =
+        std::make_shared<ngraph::opset4::Parameter>(ngraph::element::Type_t::f32, ngraph::Shape{});
+    const auto fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, out_low, out_high, 42);
+
+    const auto mul_value = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {3.14f});
+    // and here the output of FQ is passed as the second input of Mul
+    const auto mul = std::make_shared<ngraph::opset4::Multiply>(mul_value, fq);
+
+    auto function = std::make_shared<ngraph::Function>(
+        ngraph::OutputVector{mul}, ngraph::ParameterVector{out_high});
+
+    const auto expected_out_low = ngraph::opset4::Constant::create(
+        ngraph::element::Type_t::f32, ngraph::Shape{}, {0.0f});
+    const auto expected_out_high = std::make_shared<ngraph::opset4::Multiply>(out_high, mul_value);
+
+    const auto expected_fq = std::make_shared<ngraph::opset4::FakeQuantize>(
+        data, in_low, in_high, expected_out_low, expected_out_high, 42);
+
+    const auto expected_function = std::make_shared<ngraph::Function>(
+        ngraph::OutputVector{expected_fq}, ngraph::ParameterVector{out_high});
+
+    ngraph::pass::Manager manager;
+    manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::FakeQuantizeMulFusion>();
+
+    manager.run_passes(function);
+    ASSERT_NO_THROW(check_rt_info(function));
+
+    const auto res = compare_functions(function, expected_function);
+    ASSERT_TRUE(res.first) << res.second;
+}
+
+} // namespace
+
+} // namespace LayerTestsDefinitions