[LPT] [GPU] Multiply to group convolution (#9971)
* [LPT] MultiplyToGroupConvolution optimization for GPU * [LPT] MatMul in FP32 in GPU workarround support * [LPT] GPU plugin tests
This commit is contained in:
parent
8c7e0d9479
commit
cc19ff74f1
@ -239,9 +239,11 @@ public:
|
|||||||
public:
|
public:
|
||||||
Params(
|
Params(
|
||||||
const bool updatePrecisions = true,
|
const bool updatePrecisions = true,
|
||||||
element::Type deqPrecision = element::f32) :
|
element::Type deqPrecision = element::f32,
|
||||||
|
const bool reshapeIgnorePerTensorQuantizationCheck = false) :
|
||||||
updatePrecisions(updatePrecisions),
|
updatePrecisions(updatePrecisions),
|
||||||
deqPrecision(deqPrecision) {}
|
deqPrecision(deqPrecision),
|
||||||
|
reshapeIgnorePerTensorQuantizationCheck(reshapeIgnorePerTensorQuantizationCheck) {}
|
||||||
|
|
||||||
Params& setUpdatePrecisions(const bool updatePrecisions) {
|
Params& setUpdatePrecisions(const bool updatePrecisions) {
|
||||||
this->updatePrecisions = updatePrecisions;
|
this->updatePrecisions = updatePrecisions;
|
||||||
@ -255,6 +257,8 @@ public:
|
|||||||
|
|
||||||
bool updatePrecisions;
|
bool updatePrecisions;
|
||||||
element::Type deqPrecision;
|
element::Type deqPrecision;
|
||||||
|
// to support GPU workarround to keep Reshape and MatMul in FP32
|
||||||
|
bool reshapeIgnorePerTensorQuantizationCheck;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PrecisionDetails {
|
class PrecisionDetails {
|
||||||
@ -322,6 +326,7 @@ protected:
|
|||||||
|
|
||||||
bool updatePrecisions;
|
bool updatePrecisions;
|
||||||
element::Type deqPrecision;
|
element::Type deqPrecision;
|
||||||
|
bool reshapeIgnorePerTensorQuantizationCheck;
|
||||||
|
|
||||||
static constexpr char originalLayerPostfix[] = "_original";
|
static constexpr char originalLayerPostfix[] = "_original";
|
||||||
TransformationContext* context;
|
TransformationContext* context;
|
||||||
|
@ -87,7 +87,7 @@ public:
|
|||||||
|
|
||||||
static std::shared_ptr<opset1::Constant> toScalar(std::shared_ptr<opset1::Constant> constant);
|
static std::shared_ptr<opset1::Constant> toScalar(std::shared_ptr<opset1::Constant> constant);
|
||||||
|
|
||||||
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<Node>& node, const bool convertIsExpected = false);
|
static std::shared_ptr<Node> getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected = false);
|
||||||
|
|
||||||
static std::vector<size_t> updateReshapeValues(
|
static std::vector<size_t> updateReshapeValues(
|
||||||
const Shape& elementwiseConstantShape,
|
const Shape& elementwiseConstantShape,
|
||||||
|
@ -30,6 +30,7 @@ std::mutex LayerTransformation::defaultPrecisionsMutex;
|
|||||||
LayerTransformation::LayerTransformation(const Params& params) :
|
LayerTransformation::LayerTransformation(const Params& params) :
|
||||||
updatePrecisions(params.updatePrecisions),
|
updatePrecisions(params.updatePrecisions),
|
||||||
deqPrecision(params.deqPrecision),
|
deqPrecision(params.deqPrecision),
|
||||||
|
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
|
||||||
context(nullptr) {}
|
context(nullptr) {}
|
||||||
|
|
||||||
void LayerTransformation::setContext(TransformationContext* context) noexcept {
|
void LayerTransformation::setContext(TransformationContext* context) noexcept {
|
||||||
|
@ -373,7 +373,7 @@ std::shared_ptr<opset1::Constant> NetworkHelper::toScalar(std::shared_ptr<opset1
|
|||||||
return std::make_shared<opset1::Constant>(constant->get_element_type(), Shape{}, constant->get_data_ptr());
|
return std::make_shared<opset1::Constant>(constant->get_element_type(), Shape{}, constant->get_data_ptr());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<Node> NetworkHelper::getConstantInput(const std::shared_ptr<Node>& node, const bool convertIsExpected) {
|
std::shared_ptr<Node> NetworkHelper::getConstantInput(const std::shared_ptr<const Node>& node, const bool convertIsExpected) {
|
||||||
std::shared_ptr<Node> parent = ov::as_type_ptr<opset1::Constant>(node->input_value(0).get_node_shared_ptr());
|
std::shared_ptr<Node> parent = ov::as_type_ptr<opset1::Constant>(node->input_value(0).get_node_shared_ptr());
|
||||||
if (parent != nullptr) {
|
if (parent != nullptr) {
|
||||||
return parent;
|
return parent;
|
||||||
|
@ -195,8 +195,18 @@ bool ReshapeTransformation::canBeTransformed(const TransformationContext& contex
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
|
bool ignorePerTensorQuantizationCheck = false;
|
||||||
((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant))) {
|
if (reshapeIgnorePerTensorQuantizationCheck) {
|
||||||
|
const auto inputs = op->get_output_target_inputs(0);
|
||||||
|
if (inputs.size() == 1ul) {
|
||||||
|
const auto consumer = inputs.begin()->get_node();
|
||||||
|
ignorePerTensorQuantizationCheck = ngraph::as_type<ngraph::opset1::MatMul>(consumer) != nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ignorePerTensorQuantizationCheck &&
|
||||||
|
(((dequantization.subtract == nullptr) || NetworkHelper::isScalarLike(dequantization.subtractConstant)) &&
|
||||||
|
((dequantization.multiply == nullptr) || NetworkHelper::isScalarLike(dequantization.multiplyConstant)))) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -409,13 +409,47 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
|
|||||||
|
|
||||||
return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
|
return LayerTransformation::isAsymmetricQuantization(node) || WeightableLayerTransformation::isAsymmetricOnWeights(node);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!use_onednn) {
|
if (!use_onednn) {
|
||||||
lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
|
lptPassConfig->set_callback<MatMulTransformation>([](const_node_ptr& node) -> bool {
|
||||||
return MatMulTransformation::is3DTensorOnActivations(node);
|
return MatMulTransformation::is3DTensorOnActivations(node);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization);
|
lptPassConfig->set_callback<MultiplyToGroupConvolutionTransformation>([&](const_node_ptr& node) -> bool {
|
||||||
|
// disable MultiplyToGroupConvolution if Multiply with Constant can be fused
|
||||||
|
|
||||||
|
const auto dequantization = NetworkHelper::getDequantization(node, 0, true);
|
||||||
|
std::shared_ptr<ov::Node> parent = dequantization.empty() ? nullptr : dequantization.data.get_node()->shared_from_this();
|
||||||
|
if (parent == nullptr) {
|
||||||
|
const auto constantNode = NetworkHelper::getConstantInput(node);
|
||||||
|
const auto constant = constantNode == nullptr ? nullptr : ngraph::as_type_ptr<ngraph::opset1::Constant>(constantNode);
|
||||||
|
if (constant != nullptr) {
|
||||||
|
auto parent = node->get_input_node_shared_ptr(0);
|
||||||
|
if (parent == constant) {
|
||||||
|
parent = node->get_input_node_shared_ptr(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parent != nullptr) {
|
||||||
|
const auto parentHasOneConsumer = parent->get_output_target_inputs(0).size() == 1ul;
|
||||||
|
if (parentHasOneConsumer) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// disable MultiplyToGroupConvolution for Multiply with scalar
|
||||||
|
|
||||||
|
if (MultiplyToGroupConvolutionTransformation::isDynamicOrScalar(node)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
|
||||||
|
auto params = LayerTransformation::Params(true, element::f32, true);
|
||||||
|
lptManager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization, params);
|
||||||
lptManager.run_passes(func);
|
lptManager.run_passes(func);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,35 +23,40 @@ const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
|
|||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
"U8"
|
"U8",
|
||||||
|
true
|
||||||
},
|
},
|
||||||
// Multiply with scalar is not transformed to GroupConvolution
|
// Multiply with scalar is not transformed to GroupConvolution
|
||||||
{
|
{
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||||
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
|
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
""
|
"",
|
||||||
|
true
|
||||||
},
|
},
|
||||||
// Multiply with scalar is not transformed to GroupConvolution
|
// Multiply with scalar is not transformed to GroupConvolution
|
||||||
{
|
{
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||||
{{4.f}, element::f32, Shape{}},
|
{{4.f}, element::f32, Shape{}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
""
|
"",
|
||||||
|
true
|
||||||
},
|
},
|
||||||
// Zero point
|
// Zero point
|
||||||
{
|
{
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
|
||||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
"U8"
|
"U8",
|
||||||
|
true
|
||||||
},
|
},
|
||||||
// Zero point
|
// Zero point
|
||||||
{
|
{
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
|
||||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
"U8"
|
"U8",
|
||||||
|
true
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -23,36 +23,41 @@ const std::vector<MultiplyToGroupConvolutionTransformationParam> params = {
|
|||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
"U8"
|
"U8",
|
||||||
},
|
false
|
||||||
// Multiply with scalar is transformed to GroupConvolution
|
|
||||||
{
|
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
|
||||||
{{4.f}, element::f32, Shape{1, 1, 1, 1}},
|
|
||||||
"output/GroupConvolution",
|
|
||||||
"U8"
|
|
||||||
},
|
|
||||||
// multiply with scalar is transformed to groupconvolution
|
|
||||||
{
|
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
|
||||||
{{4.f}, element::f32, Shape{}},
|
|
||||||
"output/GroupConvolution",
|
|
||||||
"U8"
|
|
||||||
},
|
},
|
||||||
// zero point
|
// zero point
|
||||||
{
|
{
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
|
||||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
"I8"
|
"I8",
|
||||||
|
false
|
||||||
},
|
},
|
||||||
// zero point
|
// zero point
|
||||||
{
|
{
|
||||||
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f / 2.f }, { -1.28f }, { 1.27f / 2.f} },
|
||||||
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
{{1.f, 2.f, 3.f}, element::f32, Shape{1, 3, 1, 1}},
|
||||||
"output/GroupConvolution",
|
"output/GroupConvolution",
|
||||||
"U8"
|
"U8",
|
||||||
}
|
false
|
||||||
|
},
|
||||||
|
|
||||||
|
// Multiply => GroupConvolution optimizations
|
||||||
|
{
|
||||||
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||||
|
{{3.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||||
|
"output/GroupConvolution",
|
||||||
|
"",
|
||||||
|
false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
|
||||||
|
{{3.f}, element::f32, Shape{1, 1, 1, 1}},
|
||||||
|
"output/GroupConvolution",
|
||||||
|
"",
|
||||||
|
true
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
|
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyToGroupConvolutionTransformation,
|
||||||
|
@ -22,6 +22,7 @@ public:
|
|||||||
builder::subgraph::Constant constant;
|
builder::subgraph::Constant constant;
|
||||||
std::string layerName;
|
std::string layerName;
|
||||||
std::string expectedKernelType;
|
std::string expectedKernelType;
|
||||||
|
bool parentHasOneConsumer;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::tuple <
|
typedef std::tuple <
|
||||||
|
@ -34,7 +34,8 @@ std::string MultiplyToGroupConvolutionTransformation::getTestCaseName(const test
|
|||||||
param.fqOnData << "_" <<
|
param.fqOnData << "_" <<
|
||||||
param.constant << "_" <<
|
param.constant << "_" <<
|
||||||
param.layerName << "_" <<
|
param.layerName << "_" <<
|
||||||
param.expectedKernelType;
|
param.expectedKernelType << "_" <<
|
||||||
|
param.parentHasOneConsumer;
|
||||||
return result.str();
|
return result.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,7 +49,8 @@ void MultiplyToGroupConvolutionTransformation::SetUp() {
|
|||||||
precision,
|
precision,
|
||||||
shape,
|
shape,
|
||||||
param.fqOnData,
|
param.fqOnData,
|
||||||
param.constant);
|
param.constant,
|
||||||
|
param.parentHasOneConsumer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MultiplyToGroupConvolutionTransformation::Run() {
|
void MultiplyToGroupConvolutionTransformation::Run() {
|
||||||
|
@ -29,7 +29,8 @@ public:
|
|||||||
const ngraph::element::Type precision,
|
const ngraph::element::Type precision,
|
||||||
const ngraph::PartialShape& inputShape,
|
const ngraph::PartialShape& inputShape,
|
||||||
const FakeQuantizeOnData& fqOnData,
|
const FakeQuantizeOnData& fqOnData,
|
||||||
const Constant& constant);
|
const Constant& constant,
|
||||||
|
const bool parentHasOneConsumer = true);
|
||||||
|
|
||||||
static std::shared_ptr<ngraph::Function> getReference(
|
static std::shared_ptr<ngraph::Function> getReference(
|
||||||
const ngraph::PartialShape& inputShape,
|
const ngraph::PartialShape& inputShape,
|
||||||
|
@ -39,7 +39,8 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
|
|||||||
const ngraph::element::Type precision,
|
const ngraph::element::Type precision,
|
||||||
const ngraph::PartialShape& inputShape,
|
const ngraph::PartialShape& inputShape,
|
||||||
const FakeQuantizeOnData& fqOnData,
|
const FakeQuantizeOnData& fqOnData,
|
||||||
const Constant& constant) {
|
const Constant& constant,
|
||||||
|
const bool parentHasOneConsumer) {
|
||||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
|
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
|
||||||
const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData);
|
const auto fakeQuantize = makeFakeQuantize(input, precision, fqOnData);
|
||||||
|
|
||||||
@ -58,7 +59,9 @@ std::shared_ptr<ngraph::Function> MultiplyToGroupConvolutionFunction::getOrigina
|
|||||||
std::make_shared<ngraph::opset1::Constant>(constant.outPrecision, constant.shape, constant.values));
|
std::make_shared<ngraph::opset1::Constant>(constant.outPrecision, constant.shape, constant.values));
|
||||||
multiply->set_friendly_name("output");
|
multiply->set_friendly_name("output");
|
||||||
|
|
||||||
ngraph::ResultVector results{std::make_shared<ngraph::opset1::Result>(multiply)};
|
ngraph::ResultVector results = parentHasOneConsumer ?
|
||||||
|
ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(multiply)} :
|
||||||
|
ngraph::ResultVector{std::make_shared<ngraph::opset1::Result>(maxPool), std::make_shared<ngraph::opset1::Result>(multiply)};
|
||||||
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction");
|
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyToGroupConvolutionFunction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user