[CPU] Fixed zero-point fusion transformation (#18435)

Proper hanlding for subgraphs where ZP parent node has more than one output port
2023-07-19 08:30:21 +01:00 · 2023-07-19 08:30:21 +01:00 · 38dec7b8cf
commit 38dec7b8cf
parent 510f578aab
6 changed files with 256 additions and 3 deletions
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@ -756,8 +756,10 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
                return false;
        }
-        auto subtractArg0 = parent0->getParentEdgesAtPort(0)[0]->getParent();
+        const auto& parentEdge = parent0->getParentEdgeAt(0);
-        if (subtractArg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
+        const auto& subtractArg0 = parentEdge->getParent();
        const size_t portNum = parentEdge->getInputNum();
        if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != Precision::U8)
            return false;
        auto zeroPointsConstant = dynamic_cast<node::Input*>(subtractArg1.get());
--- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/include/conv_with_zero_point_fuse.hpp
@ -0,0 +1,56 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #pragma once
 #include <tuple>
 #include <vector>
 #include <string>
 #include "test_utils/cpu_test_utils.hpp"
 #include "shared_test_classes/base/layer_test_utils.hpp"
 #include "ngraph_functions/utils/ngraph_helpers.hpp"
 #include "ngraph_functions/builders.hpp"
 using namespace CPUTestUtils;
 namespace SubgraphTestsDefinitions {
 using convConcatCPUParams = std::tuple<
    nodeType,                           // Ngraph convolution type
    InferenceEngine::SizeVector         // Input shapes
 >;
 // Subgraph:
 /*
 *           Paramter           Constant
 *               |                 | i8
 *               |                 |
 *         FakeQuantise         Convert
 *           /      \              | f32
 *          /        \             |
 *      MaxPool    FakeQuantize  Mulltiply
 *         \           \         /
 *          \           \       /
 *           \        Convolution
 *            \        /
 *             \      /
 *              Concat
 *                |
 *                |
 *             Result
 */
 class ConvWithZeroPointFuseSubgraphTest : public testing::WithParamInterface<convConcatCPUParams>,
                                          public CPUTestsBase,
                                          virtual public LayerTestsUtils::LayerTestsCommon {
 public:
    static std::string getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj);
 protected:
    void SetUp() override;
    std::string pluginTypeNode;
 };
 } // namespace SubgraphTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp
+++ b/src/plugins/intel_cpu/tests/functional/subgraph_tests/src/conv_with_zero_point_fuse.cpp
@ -0,0 +1,152 @@
 // Copyright (C) 2018-2023 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 #include "ngraph/opsets/opset1.hpp"
 #include "test_utils/convolution_params.hpp"
 #include "subgraph_tests/include/conv_with_zero_point_fuse.hpp"
 using namespace InferenceEngine;
 using namespace CPUTestUtils;
 namespace SubgraphTestsDefinitions {
 std::string ConvWithZeroPointFuseSubgraphTest::getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj) {
    std::ostringstream result;
    nodeType type;
    SizeVector inputShapes;
    std::tie(type, inputShapes) = obj.param;
    result << "Type=" << nodeType2str(type) << "_";
    result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
    return result.str();
 }
 void ConvWithZeroPointFuseSubgraphTest::SetUp() {
    targetDevice = CommonTestUtils::DEVICE_CPU;
    nodeType type;
    SizeVector inputShapes;
    std::tie(type, inputShapes) = this->GetParam();
    pluginTypeNode = nodeType2PluginType(type);
    const ngraph::op::PadType paddingType { ngraph::op::PadType::EXPLICIT };
    const size_t numOutChannels = 256;
    const SizeVector dilation { 1, 1 };
    const SizeVector kernelSize { 1, 1 };
    const SizeVector strides { 1, 1 };
    const std::vector<ptrdiff_t> padBegin { 0, 0 };
    const std::vector<ptrdiff_t> padEnd { 0, 0 };
    selectedType = ".*_I8";
    auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, {inputShapes});
    const auto fq = ngraph::builder::makeFakeQuantize(
        inputParams[0],
        ov::element::f32,
        256,
        {1, 1, 1, 1},
        {-12.8f},
        {12.7f},
        {-12.8f},
        {12.7f});
    auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
    std::vector<std::shared_ptr<ngraph::Node>> branches(2);
    {
        ngraph::Strides strides{1, 1};
        ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 1};
        branches[0] = std::make_shared<ngraph::opset1::MaxPool>(fq,
                                                                        strides,
                                                                        pads_begin,
                                                                        pads_end,
                                                                        kernel);
    }
    {
        const auto fq_conv_data = ngraph::builder::makeFakeQuantize(
            fq,
            ov::element::f32,
            256,
            {1, 1, 1, 1},
            {-12.8f},
            {12.7f},
            {-12.8f},
            {12.7f});
        const InferenceEngine::SizeVector weights_const_shape = {numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]};
        const auto weights_const_values = std::vector<int>(ngraph::shape_size(weights_const_shape), 1);
        const auto weights_const = ngraph::builder::makeConstant(ov::element::i8, weights_const_shape, weights_const_values);
        const auto weights_convert = ngraph::builder::makeConversion(
            weights_const,
            ov::element::f32,
            ngraph::helpers::ConversionTypes::CONVERT);
        const auto weights_multiply = std::make_shared<ov::opset10::Multiply>(
            weights_convert,
            ngraph::builder::makeConstant(ov::element::f32,
                                            {numOutChannels, 1, 1, 1},
                                            std::vector<float>(numOutChannels, 1.0)));
        switch (type) {
            case nodeType::convolution: {
                branches[1] = ngraph::builder::makeConvolution(fq_conv_data,
                                                               weights_multiply,
                                                               ngraph::element::f32,
                                                               kernelSize,
                                                               strides,
                                                               padBegin,
                                                               padEnd,
                                                               dilation,
                                                               paddingType,
                                                               numOutChannels);
                break;
            }
            case nodeType::groupConvolution: {
                branches[1] = ngraph::builder::makeGroupConvolution(
                    fq_conv_data,
                    std::make_shared<ov::opset10::Reshape>(
                        weights_multiply,
                        ngraph::builder::makeConstant(
                            ov::element::i32,
                            {5},
                            std::vector<size_t>{1, numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]}),
                        true),
                    ngraph::element::f32,
                    strides,
                    padBegin,
                    padEnd,
                    dilation,
                    paddingType);
                break;
            }
            default: {
                throw std::runtime_error("Subgraph concat test doesn't support this type of operation");
            }
        }
    }
    auto concat = ngraph::builder::makeConcat(ngraph::OutputVector{branches[0], branches[1]}, 1);
    ngraph::ResultVector results{std::make_shared<ngraph::opset4::Result>(concat)};
    function = std::make_shared<ngraph::Function>(results, inputParams, "ConvWithZeroPointFuseSubgraphTest");
 }
 TEST_P(ConvWithZeroPointFuseSubgraphTest, CompareWithRefs) {
    Run();
    CheckPluginRelatedResults(executableNetwork, pluginTypeNode);
 };
 const SizeVector inputShapes2D = {1, 32, 136, 136};
 const auto params2DConv = ::testing::Combine(::testing::ValuesIn({nodeType::convolution, nodeType::groupConvolution}),
                                             ::testing::Values(inputShapes2D));
 INSTANTIATE_TEST_SUITE_P(smoke_ConvWithZeroPointFuse,
                         ConvWithZeroPointFuseSubgraphTest,
                         params2DConv,
                         ConvWithZeroPointFuseSubgraphTest::getTestCaseName);
 }  // namespace SubgraphTestsDefinitions
--- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
+++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.cpp
@ -224,7 +224,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:
 }
 bool CPUTestsBase::primTypeCheck(std::string primType) const {
-    return selectedType.find(CPUTestsBase::any_type) != std::string::npos || selectedType == primType;
+    return selectedType.find(CPUTestsBase::any_type) != std::string::npos || std::regex_match(primType, std::regex(selectedType));
 }
 std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {
--- a/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/include/ngraph_functions/builders.hpp
@ -96,6 +96,19 @@ std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node> &in,
                                              const std::vector<float> &filterWeights = {},
                                              const std::vector<float> &biasesWeights = {});
 std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node>& in_data,
                                              const ngraph::Output<Node>& in_weights,
                                              const element::Type& type,
                                              const std::vector<size_t>& filterSize,
                                              const std::vector<size_t>& strides,
                                              const std::vector<ptrdiff_t>& padsBegin,
                                              const std::vector<ptrdiff_t>& padsEnd,
                                              const std::vector<size_t>& dilations,
                                              const op::PadType& autoPad,
                                              size_t numOutChannels,
                                              bool addBiases = false,
                                              const std::vector<float>& biasesWeights = {});
 std::shared_ptr<ngraph::Node> makeGroupConvolution(const ngraph::Output<Node> &in,
                                                   const element::Type &type,
                                                   const std::vector<size_t> &filterSize,
--- a/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp
+++ b/src/tests/ngraph_helpers/ngraph_functions/src/convolution.cpp
@ -39,5 +39,35 @@ std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node> &in,
    }
 }
 std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node>& in_data,
                                      const ngraph::Output<Node>& in_weights,
                                      const element::Type &type,
                                      const std::vector<size_t> &filterSize,
                                      const std::vector<size_t> &strides,
                                      const std::vector<ptrdiff_t> &padsBegin,
                                      const std::vector<ptrdiff_t> &padsEnd,
                                      const std::vector<size_t> &dilations,
                                      const op::PadType &autoPad,
                                      size_t numOutChannels,
                                      bool addBiases,
                                      const std::vector<float> &biasesWeights) {
    auto shape = in_data.get_partial_shape();
    auto conv = std::make_shared<opset1::Convolution>(in_data,
                                                      in_weights,
                                                      strides,
                                                      padsBegin,
                                                      padsEnd,
                                                      dilations,
                                                      autoPad);
    if (addBiases) {
        bool randomBiases = biasesWeights.empty();
        auto biasesWeightsNode = makeConstant(type, {1, numOutChannels , 1, 1}, biasesWeights, randomBiases);
        auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
        return add;
    } else {
        return conv;
    }
 }
 }  // namespace builder
 }  // namespace ngraph