[CPU] Fixed zero-point fusion transformation (#18435)

Proper hanlding for subgraphs where ZP parent node has more than one output port
This commit is contained in:
Edward Shogulin 2023-07-19 08:30:21 +01:00 committed by GitHub
parent 510f578aab
commit 38dec7b8cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 256 additions and 3 deletions

View File

@ -756,8 +756,10 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
return false;
}
auto subtractArg0 = parent0->getParentEdgesAtPort(0)[0]->getParent();
if (subtractArg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
const auto& parentEdge = parent0->getParentEdgeAt(0);
const auto& subtractArg0 = parentEdge->getParent();
const size_t portNum = parentEdge->getInputNum();
if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != Precision::U8)
return false;
auto zeroPointsConstant = dynamic_cast<node::Input*>(subtractArg1.get());

View File

@ -0,0 +1,56 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <tuple>
#include <vector>
#include <string>
#include "test_utils/cpu_test_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
using namespace CPUTestUtils;
namespace SubgraphTestsDefinitions {
using convConcatCPUParams = std::tuple<
nodeType, // Ngraph convolution type
InferenceEngine::SizeVector // Input shapes
>;
// Subgraph:
/*
* Paramter Constant
* | | i8
* | |
* FakeQuantise Convert
* / \ | f32
* / \ |
* MaxPool FakeQuantize Mulltiply
* \ \ /
* \ \ /
* \ Convolution
* \ /
* \ /
* Concat
* |
* |
* Result
*/
class ConvWithZeroPointFuseSubgraphTest : public testing::WithParamInterface<convConcatCPUParams>,
public CPUTestsBase,
virtual public LayerTestsUtils::LayerTestsCommon {
public:
static std::string getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj);
protected:
void SetUp() override;
std::string pluginTypeNode;
};
} // namespace SubgraphTestsDefinitions

View File

@ -0,0 +1,152 @@
// Copyright (C) 2018-2023 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph/opsets/opset1.hpp"
#include "test_utils/convolution_params.hpp"
#include "subgraph_tests/include/conv_with_zero_point_fuse.hpp"
using namespace InferenceEngine;
using namespace CPUTestUtils;
namespace SubgraphTestsDefinitions {
std::string ConvWithZeroPointFuseSubgraphTest::getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj) {
std::ostringstream result;
nodeType type;
SizeVector inputShapes;
std::tie(type, inputShapes) = obj.param;
result << "Type=" << nodeType2str(type) << "_";
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
return result.str();
}
void ConvWithZeroPointFuseSubgraphTest::SetUp() {
targetDevice = CommonTestUtils::DEVICE_CPU;
nodeType type;
SizeVector inputShapes;
std::tie(type, inputShapes) = this->GetParam();
pluginTypeNode = nodeType2PluginType(type);
const ngraph::op::PadType paddingType { ngraph::op::PadType::EXPLICIT };
const size_t numOutChannels = 256;
const SizeVector dilation { 1, 1 };
const SizeVector kernelSize { 1, 1 };
const SizeVector strides { 1, 1 };
const std::vector<ptrdiff_t> padBegin { 0, 0 };
const std::vector<ptrdiff_t> padEnd { 0, 0 };
selectedType = ".*_I8";
auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, {inputShapes});
const auto fq = ngraph::builder::makeFakeQuantize(
inputParams[0],
ov::element::f32,
256,
{1, 1, 1, 1},
{-12.8f},
{12.7f},
{-12.8f},
{12.7f});
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
std::vector<std::shared_ptr<ngraph::Node>> branches(2);
{
ngraph::Strides strides{1, 1};
ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 1};
branches[0] = std::make_shared<ngraph::opset1::MaxPool>(fq,
strides,
pads_begin,
pads_end,
kernel);
}
{
const auto fq_conv_data = ngraph::builder::makeFakeQuantize(
fq,
ov::element::f32,
256,
{1, 1, 1, 1},
{-12.8f},
{12.7f},
{-12.8f},
{12.7f});
const InferenceEngine::SizeVector weights_const_shape = {numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]};
const auto weights_const_values = std::vector<int>(ngraph::shape_size(weights_const_shape), 1);
const auto weights_const = ngraph::builder::makeConstant(ov::element::i8, weights_const_shape, weights_const_values);
const auto weights_convert = ngraph::builder::makeConversion(
weights_const,
ov::element::f32,
ngraph::helpers::ConversionTypes::CONVERT);
const auto weights_multiply = std::make_shared<ov::opset10::Multiply>(
weights_convert,
ngraph::builder::makeConstant(ov::element::f32,
{numOutChannels, 1, 1, 1},
std::vector<float>(numOutChannels, 1.0)));
switch (type) {
case nodeType::convolution: {
branches[1] = ngraph::builder::makeConvolution(fq_conv_data,
weights_multiply,
ngraph::element::f32,
kernelSize,
strides,
padBegin,
padEnd,
dilation,
paddingType,
numOutChannels);
break;
}
case nodeType::groupConvolution: {
branches[1] = ngraph::builder::makeGroupConvolution(
fq_conv_data,
std::make_shared<ov::opset10::Reshape>(
weights_multiply,
ngraph::builder::makeConstant(
ov::element::i32,
{5},
std::vector<size_t>{1, numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]}),
true),
ngraph::element::f32,
strides,
padBegin,
padEnd,
dilation,
paddingType);
break;
}
default: {
throw std::runtime_error("Subgraph concat test doesn't support this type of operation");
}
}
}
auto concat = ngraph::builder::makeConcat(ngraph::OutputVector{branches[0], branches[1]}, 1);
ngraph::ResultVector results{std::make_shared<ngraph::opset4::Result>(concat)};
function = std::make_shared<ngraph::Function>(results, inputParams, "ConvWithZeroPointFuseSubgraphTest");
}
TEST_P(ConvWithZeroPointFuseSubgraphTest, CompareWithRefs) {
Run();
CheckPluginRelatedResults(executableNetwork, pluginTypeNode);
};
const SizeVector inputShapes2D = {1, 32, 136, 136};
const auto params2DConv = ::testing::Combine(::testing::ValuesIn({nodeType::convolution, nodeType::groupConvolution}),
::testing::Values(inputShapes2D));
INSTANTIATE_TEST_SUITE_P(smoke_ConvWithZeroPointFuse,
ConvWithZeroPointFuseSubgraphTest,
params2DConv,
ConvWithZeroPointFuseSubgraphTest::getTestCaseName);
} // namespace SubgraphTestsDefinitions

View File

@ -224,7 +224,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:
}
bool CPUTestsBase::primTypeCheck(std::string primType) const {
return selectedType.find(CPUTestsBase::any_type) != std::string::npos || selectedType == primType;
return selectedType.find(CPUTestsBase::any_type) != std::string::npos || std::regex_match(primType, std::regex(selectedType));
}
std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {

View File

@ -96,6 +96,19 @@ std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node> &in,
const std::vector<float> &filterWeights = {},
const std::vector<float> &biasesWeights = {});
std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node>& in_data,
const ngraph::Output<Node>& in_weights,
const element::Type& type,
const std::vector<size_t>& filterSize,
const std::vector<size_t>& strides,
const std::vector<ptrdiff_t>& padsBegin,
const std::vector<ptrdiff_t>& padsEnd,
const std::vector<size_t>& dilations,
const op::PadType& autoPad,
size_t numOutChannels,
bool addBiases = false,
const std::vector<float>& biasesWeights = {});
std::shared_ptr<ngraph::Node> makeGroupConvolution(const ngraph::Output<Node> &in,
const element::Type &type,
const std::vector<size_t> &filterSize,

View File

@ -39,5 +39,35 @@ std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node> &in,
}
}
std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node>& in_data,
const ngraph::Output<Node>& in_weights,
const element::Type &type,
const std::vector<size_t> &filterSize,
const std::vector<size_t> &strides,
const std::vector<ptrdiff_t> &padsBegin,
const std::vector<ptrdiff_t> &padsEnd,
const std::vector<size_t> &dilations,
const op::PadType &autoPad,
size_t numOutChannels,
bool addBiases,
const std::vector<float> &biasesWeights) {
auto shape = in_data.get_partial_shape();
auto conv = std::make_shared<opset1::Convolution>(in_data,
in_weights,
strides,
padsBegin,
padsEnd,
dilations,
autoPad);
if (addBiases) {
bool randomBiases = biasesWeights.empty();
auto biasesWeightsNode = makeConstant(type, {1, numOutChannels , 1, 1}, biasesWeights, randomBiases);
auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
return add;
} else {
return conv;
}
}
} // namespace builder
} // namespace ngraph