[LPT] [GPU] Multiply to group convolution (#9971)

* [LPT] MultiplyToGroupConvolution optimization for GPU

* [LPT] MatMul in FP32 in GPU workarround support

* [LPT] GPU plugin tests
This commit is contained in:
Edward Shogulin 2022-02-01 08:10:27 +03:00 committed by GitHub
parent 8c7e0d9479
commit cc19ff74f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 102 additions and 35 deletions

View File

@ -239,9 +239,11 @@ public:
public:
Params(
const bool updatePrecisions = true,
element::Type deqPrecision = element::f32) :
element::Type deqPrecision = element::f32,
const bool reshapeIgnorePerTensorQuantizationCheck = false) :
updatePrecisions(updatePrecisions),
deqPrecision(deqPrecision) {}
deqPrecision(deqPrecision),
reshapeIgnorePerTensorQuantizationCheck(reshapeIgnorePerTensorQuantizationCheck) {}
Params& setUpdatePrecisions(const bool updatePrecisions) {
this->updatePrecisions = updatePrecisions;
@ -255,6 +257,8 @@ public:
bool updatePrecisions;
element::Type deqPrecision;
// to support GPU workarround to keep Reshape and MatMul in FP32
bool reshapeIgnorePerTensorQuantizationCheck;
};
class PrecisionDetails {
@ -322,6 +326,7 @@ protected:
bool updatePrecisions;
element::Type deqPrecision;
bool reshapeIgnorePerTensorQuantizationCheck;
static constexpr char originalLayerPostfix[] = "_original";
TransformationContext* context;

View File

@ -87,7 +87,7 @@ public:
static std::shared_ptr<opset1::Constant> toScalar(std::shared_ptr<opset1::Constant> constant);
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<Node>& node, const bool convertIsExpected = false);
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected = false);
static std::vector<size_t> updateReshapeValues(
const Shape& elementwiseConstantShape,

View File

@ -30,6 +30,7 @@ std::mutex LayerTransformation::defaultPrecisionsMutex;
LayerTransformation::LayerTransformation(const Params& params) :
updatePrecisions(params.updatePrecisions),
deqPrecision(params.deqPrecision),
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
context(nullptr) {}
void LayerTransformation::setContext(TransformationContext* context) noexcept {

View File

@ -373,7 +373,7 @@ std::shared_ptr<opset1::Constant> NetworkHelper::toScalar(std::shared_ptr<opset1
return std::make_shared<opset1::Constant>(constant->get_element_type(), Shape{}, constant->get_data_ptr());
}
std::shared_ptr<Node> NetworkHelper::getConstantInput(const std::shared_ptr<Node>& node, const bool convertIsExpected) {
std::shared_ptr<Node> NetworkHelper::getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected) {
std::shared_ptr<Node> parent = ov::as_type_ptr<opset1::Constant>(node->input_value(0).get_node_shared_ptr());
if (parent != nullptr) {
return parent;

View File

@ -195,8 +195,18 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
return false;
}
if (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant))) {
bool ignorePerTensorQuantizationCheck = false;
if (reshapeIgnorePerTensorQuantizationCheck) {
const auto inputs = op->get_output_target_inputs(0);
if (inputs.size() == 1ul) {
const auto consumer = inputs.begin()->get_node();
ignorePerTensorQuantizationCheck = ngraph::as_type<ngraph::opset1::MatMul>(consumer) != nullptr;
}
}
if (!ignorePerTensorQuantizationCheck &&
(((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) {
return true;
}

View File

@ -409,13 +409,47 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
});
if (!use_onednn) {
lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
return MatMulTransformation::is3DTensorOnActivations(node);
});
}
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization);
lptPassConfig->set_callback<MultiplyToGroupConvolutionTransformation>([&](const_node_ptr& node) -> bool {
// disable MultiplyToGroupConvolution if Multiply with Constant can be fused
const auto dequantization = NetworkHelper::getDequantization(node, 0, true);
std::shared_ptr<ov::Node> parent = dequantization.empty() ? nullptr : dequantization.data.get_node()->shared_from_this();
if (parent == nullptr) {
const auto constantNode = NetworkHelper::getConstantInput(node);
const auto constant = constantNode == nullptr ? nullptr : ngraph::as_type_ptr<ngraph::opset1::Constant>(constantNode);
if (constant != nullptr) {
auto parent = node->get_input_node_shared_ptr(0);
if (parent == constant) {
parent = node->get_input_node_shared_ptr(1);
}
}
}
if (parent != nullptr) {
const auto parentHasOneConsumer = parent->get_output_target_inputs(0).size() == 1ul;
if (parentHasOneConsumer) {
return true;
}
}
// disable MultiplyToGroupConvolution for Multiply with scalar
if (MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node)) {
return true;
}
return false;
});
auto params = LayerTransformation::Params(true, element::f32, true);
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization, params);
lptManager.run_passes(func);
}

View File

@ -23,35 +23,40 @@ const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
"output/GroupConvolution",
"U8"
"U8",
true
},
// Multiply with scalar is not transformed to GroupConvolution
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
"output/GroupConvolution",
""
"",
true
},
// Multiply with scalar is not transformed to GroupConvolution
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{4.f}, element::f32, Shape{}},
"output/GroupConvolution",
""
"",
true
},
// Zero point
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
"output/GroupConvolution",
"U8"
"U8",
true
},
// Zero point
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
"output/GroupConvolution",
"U8"
"U8",
true
}
};

View File

@ -23,36 +23,41 @@ const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
"output/GroupConvolution",
"U8"
},
// Multiply with scalar is transformed to GroupConvolution
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
"output/GroupConvolution",
"U8"
},
// multiply with scalar is transformed to groupconvolution
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{4.f}, element::f32, Shape{}},
"output/GroupConvolution",
"U8"
"U8",
false
},
// zero point
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
"output/GroupConvolution",
"I8"
"I8",
false
},
// zero point
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
"output/GroupConvolution",
"U8"
}
"U8",
false
},
// Multiply => GroupConvolution optimizations
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{3.f}, element::f32, Shape{1, 1, 1, 1}},
"output/GroupConvolution",
"",
false
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{{3.f}, element::f32, Shape{1, 1, 1, 1}},
"output/GroupConvolution",
"",
true
},
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,

View File

@ -22,6 +22,7 @@ public:
builder::subgraph::Constant constant;
std::string layerName;
std::string expectedKernelType;
bool parentHasOneConsumer;
};
typedef std::tuple <

View File

@ -34,7 +34,8 @@ std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(const test
param.fqOnData << "_" <<
param.constant << "_" <<
param.layerName << "_" <<
param.expectedKernelType;
param.expectedKernelType << "_" <<
param.parentHasOneConsumer;
return result.str();
}
@ -48,7 +49,8 @@ void MultiplyToGroupConvolutionTransformation::SetUp() {
precision,
shape,
param.fqOnData,
param.constant);
param.constant,
param.parentHasOneConsumer);
}
void MultiplyToGroupConvolutionTransformation::Run() {

View File

@ -29,7 +29,8 @@ public:
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const FakeQuantizeOnData& fqOnData,
const Constant& constant);
const Constant& constant,
const bool parentHasOneConsumer = true);
static std::shared_ptr<ngraph::Function> getReference(
const ngraph::PartialShape& inputShape,

View File

@ -39,7 +39,8 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const FakeQuantizeOnData& fqOnData,
const Constant& constant) {
const Constant& constant,
const bool parentHasOneConsumer) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData);
@ -58,7 +59,9 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
std::make_shared<ngraph::opset1::Constant>(constant.outPrecision, constant.shape, constant.values));
multiply->set_friendly_name("output");
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(multiply)};
ngraph::ResultVector results = parentHasOneConsumer ?
ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(multiply)} :
ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(maxPool), std::make_shared<ngraph::opset1::Result>(multiply)};
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction");
}