[LPT] [GPU] Multiply to group convolution (#9971)
* [LPT] MultiplyToGroupConvolution optimization for GPU * [LPT] MatMul in FP32 in GPU workarround support * [LPT] GPU plugin tests
This commit is contained in:
parent
8c7e0d9479
commit
cc19ff74f1
@ -239,9 +239,11 @@ public:
|
||||
public:
|
||||
Params(
|
||||
const bool updatePrecisions = true,
|
||||
element::Type deqPrecision = element::f32) :
|
||||
element::Type deqPrecision = element::f32,
|
||||
const bool reshapeIgnorePerTensorQuantizationCheck = false) :
|
||||
updatePrecisions(updatePrecisions),
|
||||
deqPrecision(deqPrecision) {}
|
||||
deqPrecision(deqPrecision),
|
||||
reshapeIgnorePerTensorQuantizationCheck(reshapeIgnorePerTensorQuantizationCheck) {}
|
||||
|
||||
Params& setUpdatePrecisions(const bool updatePrecisions) {
|
||||
this->updatePrecisions = updatePrecisions;
|
||||
@ -255,6 +257,8 @@ public:
|
||||
|
||||
bool updatePrecisions;
|
||||
element::Type deqPrecision;
|
||||
// to support GPU workarround to keep Reshape and MatMul in FP32
|
||||
bool reshapeIgnorePerTensorQuantizationCheck;
|
||||
};
|
||||
|
||||
class PrecisionDetails {
|
||||
@ -322,6 +326,7 @@ protected:
|
||||
|
||||
bool updatePrecisions;
|
||||
element::Type deqPrecision;
|
||||
bool reshapeIgnorePerTensorQuantizationCheck;
|
||||
|
||||
static constexpr char originalLayerPostfix[] = "_original";
|
||||
TransformationContext* context;
|
||||
|
@ -87,7 +87,7 @@ public:
|
||||
|
||||
static std::shared_ptr<opset1::Constant> toScalar(std::shared_ptr<opset1::Constant> constant);
|
||||
|
||||
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<Node>& node, const bool convertIsExpected = false);
|
||||
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected = false);
|
||||
|
||||
static std::vector<size_t> updateReshapeValues(
|
||||
const Shape& elementwiseConstantShape,
|
||||
|
@ -30,6 +30,7 @@ std::mutex LayerTransformation::defaultPrecisionsMutex;
|
||||
LayerTransformation::LayerTransformation(const Params& params) :
|
||||
updatePrecisions(params.updatePrecisions),
|
||||
deqPrecision(params.deqPrecision),
|
||||
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
|
||||
context(nullptr) {}
|
||||
|
||||
void LayerTransformation::setContext(TransformationContext* context) noexcept {
|
||||
|
@ -373,7 +373,7 @@ std::shared_ptr<opset1::Constant> NetworkHelper::toScalar(std::shared_ptr<opset1
|
||||
return std::make_shared<opset1::Constant>(constant->get_element_type(), Shape{}, constant->get_data_ptr());
|
||||
}
|
||||
|
||||
std::shared_ptr<Node> NetworkHelper::getConstantInput(const std::shared_ptr<Node>& node, const bool convertIsExpected) {
|
||||
std::shared_ptr<Node> NetworkHelper::getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected) {
|
||||
std::shared_ptr<Node> parent = ov::as_type_ptr<opset1::Constant>(node->input_value(0).get_node_shared_ptr());
|
||||
if (parent != nullptr) {
|
||||
return parent;
|
||||
|
@ -195,8 +195,18 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
|
||||
return false;
|
||||
}
|
||||
|
||||
if (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
|
||||
((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant))) {
|
||||
bool ignorePerTensorQuantizationCheck = false;
|
||||
if (reshapeIgnorePerTensorQuantizationCheck) {
|
||||
const auto inputs = op->get_output_target_inputs(0);
|
||||
if (inputs.size() == 1ul) {
|
||||
const auto consumer = inputs.begin()->get_node();
|
||||
ignorePerTensorQuantizationCheck = ngraph::as_type<ngraph::opset1::MatMul>(consumer) != nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ignorePerTensorQuantizationCheck &&
|
||||
(((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
|
||||
((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -409,13 +409,47 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
||||
|
||||
return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
|
||||
});
|
||||
|
||||
if (!use_onednn) {
|
||||
lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
|
||||
return MatMulTransformation::is3DTensorOnActivations(node);
|
||||
});
|
||||
}
|
||||
|
||||
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization);
|
||||
lptPassConfig->set_callback<MultiplyToGroupConvolutionTransformation>([&](const_node_ptr& node) -> bool {
|
||||
// disable MultiplyToGroupConvolution if Multiply with Constant can be fused
|
||||
|
||||
const auto dequantization = NetworkHelper::getDequantization(node, 0, true);
|
||||
std::shared_ptr<ov::Node> parent = dequantization.empty() ? nullptr : dequantization.data.get_node()->shared_from_this();
|
||||
if (parent == nullptr) {
|
||||
const auto constantNode = NetworkHelper::getConstantInput(node);
|
||||
const auto constant = constantNode == nullptr ? nullptr : ngraph::as_type_ptr<ngraph::opset1::Constant>(constantNode);
|
||||
if (constant != nullptr) {
|
||||
auto parent = node->get_input_node_shared_ptr(0);
|
||||
if (parent == constant) {
|
||||
parent = node->get_input_node_shared_ptr(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (parent != nullptr) {
|
||||
const auto parentHasOneConsumer = parent->get_output_target_inputs(0).size() == 1ul;
|
||||
if (parentHasOneConsumer) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// disable MultiplyToGroupConvolution for Multiply with scalar
|
||||
|
||||
if (MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
|
||||
auto params = LayerTransformation::Params(true, element::f32, true);
|
||||
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization, params);
|
||||
lptManager.run_passes(func);
|
||||
}
|
||||
|
||||
|
@ -23,35 +23,40 @@ const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
"U8",
|
||||
true
|
||||
},
|
||||
// Multiply with scalar is not transformed to GroupConvolution
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
""
|
||||
"",
|
||||
true
|
||||
},
|
||||
// Multiply with scalar is not transformed to GroupConvolution
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{4.f}, element::f32, Shape{}},
|
||||
"output/GroupConvolution",
|
||||
""
|
||||
"",
|
||||
true
|
||||
},
|
||||
// Zero point
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
|
||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
"U8",
|
||||
true
|
||||
},
|
||||
// Zero point
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
|
||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
"U8",
|
||||
true
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -23,36 +23,41 @@ const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
},
|
||||
// Multiply with scalar is transformed to GroupConvolution
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
},
|
||||
// multiply with scalar is transformed to groupconvolution
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{4.f}, element::f32, Shape{}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
"U8",
|
||||
false
|
||||
},
|
||||
// zero point
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
|
||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"I8"
|
||||
"I8",
|
||||
false
|
||||
},
|
||||
// zero point
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
|
||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"U8"
|
||||
}
|
||||
"U8",
|
||||
false
|
||||
},
|
||||
|
||||
// Multiply => GroupConvolution optimizations
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{3.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||
{{3.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||
"output/GroupConvolution",
|
||||
"",
|
||||
true
|
||||
},
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
|
||||
|
@ -22,6 +22,7 @@ public:
|
||||
builder::subgraph::Constant constant;
|
||||
std::string layerName;
|
||||
std::string expectedKernelType;
|
||||
bool parentHasOneConsumer;
|
||||
};
|
||||
|
||||
typedef std::tuple <
|
||||
|
@ -34,7 +34,8 @@ std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(const test
|
||||
param.fqOnData << "_" <<
|
||||
param.constant << "_" <<
|
||||
param.layerName << "_" <<
|
||||
param.expectedKernelType;
|
||||
param.expectedKernelType << "_" <<
|
||||
param.parentHasOneConsumer;
|
||||
return result.str();
|
||||
}
|
||||
|
||||
@ -48,7 +49,8 @@ void MultiplyToGroupConvolutionTransformation::SetUp() {
|
||||
precision,
|
||||
shape,
|
||||
param.fqOnData,
|
||||
param.constant);
|
||||
param.constant,
|
||||
param.parentHasOneConsumer);
|
||||
}
|
||||
|
||||
void MultiplyToGroupConvolutionTransformation::Run() {
|
||||
|
@ -29,7 +29,8 @@ public:
|
||||
const ngraph::element::Type precision,
|
||||
const ngraph::PartialShape& inputShape,
|
||||
const FakeQuantizeOnData& fqOnData,
|
||||
const Constant& constant);
|
||||
const Constant& constant,
|
||||
const bool parentHasOneConsumer = true);
|
||||
|
||||
static std::shared_ptr<ngraph::Function> getReference(
|
||||
const ngraph::PartialShape& inputShape,
|
||||
|
@ -39,7 +39,8 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
|
||||
const ngraph::element::Type precision,
|
||||
const ngraph::PartialShape& inputShape,
|
||||
const FakeQuantizeOnData& fqOnData,
|
||||
const Constant& constant) {
|
||||
const Constant& constant,
|
||||
const bool parentHasOneConsumer) {
|
||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
|
||||
const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData);
|
||||
|
||||
@ -58,7 +59,9 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
|
||||
std::make_shared<ngraph::opset1::Constant>(constant.outPrecision, constant.shape, constant.values));
|
||||
multiply->set_friendly_name("output");
|
||||
|
||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(multiply)};
|
||||
ngraph::ResultVector results = parentHasOneConsumer ?
|
||||
ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(multiply)} :
|
||||
ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(maxPool), std::make_shared<ngraph::opset1::Result>(multiply)};
|
||||
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user