[CPU] Fixed zero-point fusion transformation (#18435)
Proper hanlding for subgraphs where ZP parent node has more than one output port
This commit is contained in:
parent
510f578aab
commit
38dec7b8cf
@ -756,8 +756,10 @@ void GraphOptimizer::FuseConvolutionAndZeroPoints(Graph &graph) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto subtractArg0 = parent0->getParentEdgesAtPort(0)[0]->getParent();
|
const auto& parentEdge = parent0->getParentEdgeAt(0);
|
||||||
if (subtractArg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8)
|
const auto& subtractArg0 = parentEdge->getParent();
|
||||||
|
const size_t portNum = parentEdge->getInputNum();
|
||||||
|
if (subtractArg0->getOriginalOutputPrecisionAtPort(portNum) != Precision::U8)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
auto zeroPointsConstant = dynamic_cast<node::Input*>(subtractArg1.get());
|
auto zeroPointsConstant = dynamic_cast<node::Input*>(subtractArg1.get());
|
||||||
|
@ -0,0 +1,56 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "test_utils/cpu_test_utils.hpp"
|
||||||
|
#include "shared_test_classes/base/layer_test_utils.hpp"
|
||||||
|
#include "ngraph_functions/utils/ngraph_helpers.hpp"
|
||||||
|
#include "ngraph_functions/builders.hpp"
|
||||||
|
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace SubgraphTestsDefinitions {
|
||||||
|
|
||||||
|
using convConcatCPUParams = std::tuple<
|
||||||
|
nodeType, // Ngraph convolution type
|
||||||
|
InferenceEngine::SizeVector // Input shapes
|
||||||
|
>;
|
||||||
|
|
||||||
|
// Subgraph:
|
||||||
|
/*
|
||||||
|
* Paramter Constant
|
||||||
|
* | | i8
|
||||||
|
* | |
|
||||||
|
* FakeQuantise Convert
|
||||||
|
* / \ | f32
|
||||||
|
* / \ |
|
||||||
|
* MaxPool FakeQuantize Mulltiply
|
||||||
|
* \ \ /
|
||||||
|
* \ \ /
|
||||||
|
* \ Convolution
|
||||||
|
* \ /
|
||||||
|
* \ /
|
||||||
|
* Concat
|
||||||
|
* |
|
||||||
|
* |
|
||||||
|
* Result
|
||||||
|
*/
|
||||||
|
|
||||||
|
class ConvWithZeroPointFuseSubgraphTest : public testing::WithParamInterface<convConcatCPUParams>,
|
||||||
|
public CPUTestsBase,
|
||||||
|
virtual public LayerTestsUtils::LayerTestsCommon {
|
||||||
|
public:
|
||||||
|
static std::string getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void SetUp() override;
|
||||||
|
std::string pluginTypeNode;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace SubgraphTestsDefinitions
|
@ -0,0 +1,152 @@
|
|||||||
|
// Copyright (C) 2018-2023 Intel Corporation
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "ngraph/opsets/opset1.hpp"
|
||||||
|
#include "test_utils/convolution_params.hpp"
|
||||||
|
#include "subgraph_tests/include/conv_with_zero_point_fuse.hpp"
|
||||||
|
|
||||||
|
using namespace InferenceEngine;
|
||||||
|
using namespace CPUTestUtils;
|
||||||
|
|
||||||
|
namespace SubgraphTestsDefinitions {
|
||||||
|
|
||||||
|
std::string ConvWithZeroPointFuseSubgraphTest::getTestCaseName(testing::TestParamInfo<convConcatCPUParams> obj) {
|
||||||
|
std::ostringstream result;
|
||||||
|
nodeType type;
|
||||||
|
SizeVector inputShapes;
|
||||||
|
std::tie(type, inputShapes) = obj.param;
|
||||||
|
|
||||||
|
result << "Type=" << nodeType2str(type) << "_";
|
||||||
|
result << "IS=" << CommonTestUtils::vec2str(inputShapes) << "_";
|
||||||
|
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConvWithZeroPointFuseSubgraphTest::SetUp() {
|
||||||
|
targetDevice = CommonTestUtils::DEVICE_CPU;
|
||||||
|
nodeType type;
|
||||||
|
SizeVector inputShapes;
|
||||||
|
std::tie(type, inputShapes) = this->GetParam();
|
||||||
|
pluginTypeNode = nodeType2PluginType(type);
|
||||||
|
|
||||||
|
const ngraph::op::PadType paddingType { ngraph::op::PadType::EXPLICIT };
|
||||||
|
const size_t numOutChannels = 256;
|
||||||
|
const SizeVector dilation { 1, 1 };
|
||||||
|
const SizeVector kernelSize { 1, 1 };
|
||||||
|
const SizeVector strides { 1, 1 };
|
||||||
|
const std::vector<ptrdiff_t> padBegin { 0, 0 };
|
||||||
|
const std::vector<ptrdiff_t> padEnd { 0, 0 };
|
||||||
|
|
||||||
|
selectedType = ".*_I8";
|
||||||
|
|
||||||
|
auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, {inputShapes});
|
||||||
|
const auto fq = ngraph::builder::makeFakeQuantize(
|
||||||
|
inputParams[0],
|
||||||
|
ov::element::f32,
|
||||||
|
256,
|
||||||
|
{1, 1, 1, 1},
|
||||||
|
{-12.8f},
|
||||||
|
{12.7f},
|
||||||
|
{-12.8f},
|
||||||
|
{12.7f});
|
||||||
|
|
||||||
|
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<ngraph::Node>> branches(2);
|
||||||
|
{
|
||||||
|
ngraph::Strides strides{1, 1};
|
||||||
|
ngraph::Shape pads_begin{0, 0}, pads_end{0, 0}, kernel{1, 1};
|
||||||
|
branches[0] = std::make_shared<ngraph::opset1::MaxPool>(fq,
|
||||||
|
strides,
|
||||||
|
pads_begin,
|
||||||
|
pads_end,
|
||||||
|
kernel);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const auto fq_conv_data = ngraph::builder::makeFakeQuantize(
|
||||||
|
fq,
|
||||||
|
ov::element::f32,
|
||||||
|
256,
|
||||||
|
{1, 1, 1, 1},
|
||||||
|
{-12.8f},
|
||||||
|
{12.7f},
|
||||||
|
{-12.8f},
|
||||||
|
{12.7f});
|
||||||
|
|
||||||
|
const InferenceEngine::SizeVector weights_const_shape = {numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]};
|
||||||
|
const auto weights_const_values = std::vector<int>(ngraph::shape_size(weights_const_shape), 1);
|
||||||
|
const auto weights_const = ngraph::builder::makeConstant(ov::element::i8, weights_const_shape, weights_const_values);
|
||||||
|
|
||||||
|
const auto weights_convert = ngraph::builder::makeConversion(
|
||||||
|
weights_const,
|
||||||
|
ov::element::f32,
|
||||||
|
ngraph::helpers::ConversionTypes::CONVERT);
|
||||||
|
|
||||||
|
const auto weights_multiply = std::make_shared<ov::opset10::Multiply>(
|
||||||
|
weights_convert,
|
||||||
|
ngraph::builder::makeConstant(ov::element::f32,
|
||||||
|
{numOutChannels, 1, 1, 1},
|
||||||
|
std::vector<float>(numOutChannels, 1.0)));
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case nodeType::convolution: {
|
||||||
|
branches[1] = ngraph::builder::makeConvolution(fq_conv_data,
|
||||||
|
weights_multiply,
|
||||||
|
ngraph::element::f32,
|
||||||
|
kernelSize,
|
||||||
|
strides,
|
||||||
|
padBegin,
|
||||||
|
padEnd,
|
||||||
|
dilation,
|
||||||
|
paddingType,
|
||||||
|
numOutChannels);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case nodeType::groupConvolution: {
|
||||||
|
branches[1] = ngraph::builder::makeGroupConvolution(
|
||||||
|
fq_conv_data,
|
||||||
|
std::make_shared<ov::opset10::Reshape>(
|
||||||
|
weights_multiply,
|
||||||
|
ngraph::builder::makeConstant(
|
||||||
|
ov::element::i32,
|
||||||
|
{5},
|
||||||
|
std::vector<size_t>{1, numOutChannels, inputShapes[1], kernelSize[0], kernelSize[1]}),
|
||||||
|
true),
|
||||||
|
ngraph::element::f32,
|
||||||
|
strides,
|
||||||
|
padBegin,
|
||||||
|
padEnd,
|
||||||
|
dilation,
|
||||||
|
paddingType);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
throw std::runtime_error("Subgraph concat test doesn't support this type of operation");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto concat = ngraph::builder::makeConcat(ngraph::OutputVector{branches[0], branches[1]}, 1);
|
||||||
|
|
||||||
|
ngraph::ResultVector results{std::make_shared<ngraph::opset4::Result>(concat)};
|
||||||
|
function = std::make_shared<ngraph::Function>(results, inputParams, "ConvWithZeroPointFuseSubgraphTest");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(ConvWithZeroPointFuseSubgraphTest, CompareWithRefs) {
|
||||||
|
Run();
|
||||||
|
|
||||||
|
CheckPluginRelatedResults(executableNetwork, pluginTypeNode);
|
||||||
|
};
|
||||||
|
|
||||||
|
const SizeVector inputShapes2D = {1, 32, 136, 136};
|
||||||
|
|
||||||
|
const auto params2DConv = ::testing::Combine(::testing::ValuesIn({nodeType::convolution, nodeType::groupConvolution}),
|
||||||
|
::testing::Values(inputShapes2D));
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(smoke_ConvWithZeroPointFuse,
|
||||||
|
ConvWithZeroPointFuseSubgraphTest,
|
||||||
|
params2DConv,
|
||||||
|
ConvWithZeroPointFuseSubgraphTest::getTestCaseName);
|
||||||
|
|
||||||
|
} // namespace SubgraphTestsDefinitions
|
@ -224,7 +224,7 @@ void CPUTestsBase::CheckPluginRelatedResultsImpl(const std::shared_ptr<const ov:
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CPUTestsBase::primTypeCheck(std::string primType) const {
|
bool CPUTestsBase::primTypeCheck(std::string primType) const {
|
||||||
return selectedType.find(CPUTestsBase::any_type) != std::string::npos || selectedType == primType;
|
return selectedType.find(CPUTestsBase::any_type) != std::string::npos || std::regex_match(primType, std::regex(selectedType));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {
|
std::string CPUTestsBase::getTestCaseName(CPUSpecificParams params) {
|
||||||
|
@ -96,6 +96,19 @@ std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node> &in,
|
|||||||
const std::vector<float> &filterWeights = {},
|
const std::vector<float> &filterWeights = {},
|
||||||
const std::vector<float> &biasesWeights = {});
|
const std::vector<float> &biasesWeights = {});
|
||||||
|
|
||||||
|
std::shared_ptr<ngraph::Node> makeConvolution(const ngraph::Output<Node>& in_data,
|
||||||
|
const ngraph::Output<Node>& in_weights,
|
||||||
|
const element::Type& type,
|
||||||
|
const std::vector<size_t>& filterSize,
|
||||||
|
const std::vector<size_t>& strides,
|
||||||
|
const std::vector<ptrdiff_t>& padsBegin,
|
||||||
|
const std::vector<ptrdiff_t>& padsEnd,
|
||||||
|
const std::vector<size_t>& dilations,
|
||||||
|
const op::PadType& autoPad,
|
||||||
|
size_t numOutChannels,
|
||||||
|
bool addBiases = false,
|
||||||
|
const std::vector<float>& biasesWeights = {});
|
||||||
|
|
||||||
std::shared_ptr<ngraph::Node> makeGroupConvolution(const ngraph::Output<Node> &in,
|
std::shared_ptr<ngraph::Node> makeGroupConvolution(const ngraph::Output<Node> &in,
|
||||||
const element::Type &type,
|
const element::Type &type,
|
||||||
const std::vector<size_t> &filterSize,
|
const std::vector<size_t> &filterSize,
|
||||||
|
@ -39,5 +39,35 @@ std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node> &in,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<Node> makeConvolution(const ngraph::Output<Node>& in_data,
|
||||||
|
const ngraph::Output<Node>& in_weights,
|
||||||
|
const element::Type &type,
|
||||||
|
const std::vector<size_t> &filterSize,
|
||||||
|
const std::vector<size_t> &strides,
|
||||||
|
const std::vector<ptrdiff_t> &padsBegin,
|
||||||
|
const std::vector<ptrdiff_t> &padsEnd,
|
||||||
|
const std::vector<size_t> &dilations,
|
||||||
|
const op::PadType &autoPad,
|
||||||
|
size_t numOutChannels,
|
||||||
|
bool addBiases,
|
||||||
|
const std::vector<float> &biasesWeights) {
|
||||||
|
auto shape = in_data.get_partial_shape();
|
||||||
|
auto conv = std::make_shared<opset1::Convolution>(in_data,
|
||||||
|
in_weights,
|
||||||
|
strides,
|
||||||
|
padsBegin,
|
||||||
|
padsEnd,
|
||||||
|
dilations,
|
||||||
|
autoPad);
|
||||||
|
if (addBiases) {
|
||||||
|
bool randomBiases = biasesWeights.empty();
|
||||||
|
auto biasesWeightsNode = makeConstant(type, {1, numOutChannels , 1, 1}, biasesWeights, randomBiases);
|
||||||
|
auto add = std::make_shared<ngraph::opset1::Add>(conv, biasesWeightsNode);
|
||||||
|
return add;
|
||||||
|
} else {
|
||||||
|
return conv;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace builder
|
} // namespace builder
|
||||||
} // namespace ngraph
|
} // namespace ngraph
|
||||||
|
Loading…
Reference in New Issue
Block a user