[CPU] Optimize quantization scheme for SPR/ATS (#7549)

* [CPU] Optimize quantization scheme for SPR/ATS

* [CPU] [LPT] plugin tests

* [GPU] [LPT] plugin tests

* [CPU] limitation was removed

* [CPU] optimization FP32 old way support

* [LPT] comment fix

* [LPT] Multiply plugin test improvement

* [LPT] Multiply support

* [LPT] GPU tests fix

* [LPT] test quick fix

* [LPT] new ppi fix

* look like spent time for tests refactoring
This commit is contained in:
Edward Shogulin 2021-12-20 11:19:40 +03:00 committed by GitHub
parent abee3ea4d4
commit 37ad512d98
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 892 additions and 118 deletions

View File

@ -1134,8 +1134,38 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
if (!isSuitableParent1 && !isSuitableParent2)
continue;
auto mergedConv = isSuitableParent1 ? parent1 : parent2;
auto peerNode = isSuitableParent1 ? parent2 : parent1;
std::shared_ptr<MKLDNNNode> mergedConv;
std::shared_ptr<MKLDNNNode> peerNode;
if (isSuitableParent1 && isSuitableParent2) {
// not merged operation (peerNode) has to be in low precision
const auto isBranchQuantized = [](const MKLDNNNodePtr& branchParent) {
const auto& fused = branchParent->getFusedWith();
const auto branchPrecision = fused.empty() ?
branchParent->getOriginalOutputPrecisionAtPort(0) :
fused[fused.size() - 1]->getOriginalOutputPrecisionAtPort(0);
return (branchPrecision == Precision::I8) || (branchPrecision == Precision::U8);
};
const auto isBranch1Quantized = isBranchQuantized(graphNode->getParentEdgesAtPort(0)[0]->getParent());
const auto isBranch2Quantized = isBranchQuantized(graphNode->getParentEdgesAtPort(1)[0]->getParent());
if (isBranch1Quantized || isBranch2Quantized) {
// INT8
const auto parent1CanBeMerged = parent1->getChildEdges().size() == 1ul;
// if both branches are quantized, then parent1 is selected (result is not changed)
mergedConv = isBranch2Quantized && parent1CanBeMerged ? parent1 : parent2;
peerNode = isBranch2Quantized && parent1CanBeMerged ? parent2 : parent1;
} else {
// original FP32
mergedConv = isSuitableParent1 ? parent1 : parent2;
peerNode = isSuitableParent1 ? parent2 : parent1;
}
} else {
mergedConv = isSuitableParent1 ? parent1 : parent2;
peerNode = isSuitableParent1 ? parent2 : parent1;
}
if (isSuitableParent1 && isSuitableParent2) {
if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
mergedConv->getChildEdges().size() != 1) {

View File

@ -97,18 +97,14 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
} else {
const int emptyPathIndex = fullPathIndex == 0 ? 1 : 0;
FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::getDequantization(multiply, emptyPathIndex);
if ((updatePrecisions && !dequantizationEmptyPath.empty() && !dequantizationEmptyPath.isLowPrecision()) ||
(dequantizationEmptyPath.multiply == nullptr && dequantizationEmptyPath.subtract == nullptr)) {
if (updatePrecisions) {
const FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::getDequantization(multiply, emptyPathIndex);
if (!dequantizationEmptyPath.empty() && !dequantizationEmptyPath.isLowPrecision()) {
return false;
}
FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::getDequantization(multiply, fullPathIndex);
if (updatePrecisions && !dequantizationFullPath.empty() && !dequantizationFullPath.isLowPrecision()) {
return false;
}
dequantizationEmptyPath = NetworkHelper::foldDequantization(multiply, emptyPathIndex);
FakeQuantizeDequantization dequantizationEmptyPath = NetworkHelper::foldDequantization(multiply, emptyPathIndex);
std::shared_ptr<Node> subtractValuesEmptyPath;
std::shared_ptr<Node> multiplyValuesEmptyPath;
std::tie(subtractValuesEmptyPath, multiplyValuesEmptyPath) = NetworkHelper::createEmptyValues(dequantizationEmptyPath, deqPrecision);
@ -118,7 +114,7 @@ bool MultiplyTransformation::transform(TransformationContext& context, ngraph::p
return false;
}
dequantizationFullPath = NetworkHelper::foldDequantization(multiply, fullPathIndex);
FakeQuantizeDequantization dequantizationFullPath = NetworkHelper::foldDequantization(multiply, fullPathIndex);
std::shared_ptr<Node> subtractValuesFullPath;
std::shared_ptr<Node> multiplyValuesFullPath;
std::tie(subtractValuesFullPath, multiplyValuesFullPath) = NetworkHelper::createEmptyValues(dequantizationFullPath, deqPrecision);

View File

@ -0,0 +1,100 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/elementwise_branch_selection_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
};
const std::vector<std::string> elementwiseTypes = {
"add",
"multiply"
};
const std::vector<LayerTestsDefinitions::ElementwiseBranchSelectionTestValues> params = {
{
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{}
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{"Constant", "convolution1"},
{"Constant", "convolution2"},
{"fakeQuantizeBefore1", "convolution1"},
{"fakeQuantizeBefore2", "convolution2"},
{"maxPool", "result"}
},
{
{"convolution1", "U8"},
{"convolution2", "U8"},
{"eltwise", "U8"}
}
},
{
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{"Constant", "convolution1"},
{"Constant", "convolution2"},
{"fakeQuantizeBefore1", "convolution1"},
{"fakeQuantizeBefore2", "convolution2"},
{"maxPool", "result"}
},
{
{"convolution1", "U8"},
{"convolution2", "U8"},
{"eltwise", "U8"}
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, ElementwiseBranchSelectionTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(ngraph::PartialShape({ 1, 3, 16, 16 })),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(params),
::testing::ValuesIn(elementwiseTypes)),
ElementwiseBranchSelectionTransformation::getTestCaseName);
} // namespace

View File

@ -17,54 +17,63 @@ const std::vector<ngraph::element::Type> netPrecisions = {
const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -128.f }, { 127.f } },
false,
{ngraph::element::i8}, {ngraph::element::f32, ngraph::element::i8}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ngraph::element::i8}, {ngraph::element::f32, ngraph::element::f32}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
ngraph::element::i8
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -128.f }, { 127.f } },
true,
{ngraph::element::i8}, {ngraph::element::f32, ngraph::element::f32}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
true,
{ngraph::element::i8}, {ngraph::element::i8, ngraph::element::f32}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -127.f }, { 128.f } },
false,
{ngraph::element::u8}, {ngraph::element::f32, ngraph::element::f32}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ngraph::element::u8}, {ngraph::element::f32, ngraph::element::u8}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::u8
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -127.f }, { 128.f } },
true,
{ngraph::element::u8}, {ngraph::element::u8, ngraph::element::f32}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::u8
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
true,
{ngraph::element::u8}, {ngraph::element::f32, ngraph::element::f32}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
ngraph::element::i8
},
{ {}, {}, false }, { {}, {}, true },
{
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
true,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
ngraph::element::i8
},
{
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -128.f }, { 1.27f } },
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::u8
},
{
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
true,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.27f }, { 1.28f }, { -1.27f }, { 1.28f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::u8
},
{ false, {}, false, {}, {}, ngraph::element::f32 },
{ true, {}, true, {}, {}, ngraph::element::f32 },
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyTransformation,

View File

@ -0,0 +1,88 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/elementwise_branch_selection_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
using namespace InferenceEngine::details;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
};
const std::vector<std::string> elementwiseTypes = {
"add",
"multiply"
};
const std::vector<LayerTestsDefinitions::ElementwiseBranchSelectionTestValues> params = {
{
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{}
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{}, // GPU doesn't returns Reorders in performance counters
{
{"convolution1", "U8"},
{"convolution2", "U8"},
{"eltwise", "U8"}
}
},
{
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{
{},
{ std::vector<float>(9, 1.f), ngraph::element::i8, {3, 3, 1, 1} },
{ {ngraph::element::f32}, {}, {std::vector<float>(3, 1.f), ngraph::element::f32, {3, 1, 1, 1}} }
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
},
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{}, // GPU doesn't returns Reorders in performance counters
{
{"convolution1", "U8"},
{"convolution2", "U8"},
{"eltwise", "U8"}
}
}
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, ElementwiseBranchSelectionTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(ngraph::PartialShape({ 1, 3, 16, 16 })),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(params),
::testing::ValuesIn(elementwiseTypes)),
ElementwiseBranchSelectionTransformation::getTestCaseName);
} // namespace

View File

@ -12,59 +12,68 @@ using namespace LayerTestsDefinitions;
namespace {
const std::vector<ngraph::element::Type> netPrecisions = {
ngraph::element::f32,
ngraph::element::f16
//ngraph::element::f16
};
const std::vector<LayerTestsDefinitions::MultiplyTestValues> params = {
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -128.f }, { 127.f } },
false,
{ngraph::element::i8}, {ngraph::element::f32, ngraph::element::i8}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ngraph::element::i8}, {ngraph::element::f32, ngraph::element::f32}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
ngraph::element::undefined // ngraph::element::i8
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -128.f }, { 127.f } },
true,
{ngraph::element::i8}, {ngraph::element::f32, ngraph::element::f32}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
true,
{ngraph::element::i8}, {ngraph::element::i8, ngraph::element::f32}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -127.f }, { 128.f } },
false,
{ngraph::element::u8}, {ngraph::element::f32, ngraph::element::f32}
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ngraph::element::u8}, {ngraph::element::f32, ngraph::element::u8}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::undefined // ngraph::element::u8
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { -127.f }, { 128.f } },
true,
{ngraph::element::u8}, {ngraph::element::u8, ngraph::element::f32}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::undefined //ngraph::element::u8
},
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -128.f }, { 127.f }, { -128.f }, { 127.f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } },
true,
{ngraph::element::u8}, {ngraph::element::f32, ngraph::element::f32}
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
ngraph::element::undefined // ngraph::element::i8
},
{ {}, {}, false }, { {}, {}, true },
{
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
true,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -1.28f }, { 1.27f } },
ngraph::element::undefined // ngraph::element::i8
},
{
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.28f }, { 1.27f }, { -128.f }, { 1.27f } },
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::undefined // ngraph::element::u8
},
{
false,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
true,
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { -1.27f }, { 1.28f }, { -1.27f }, { 1.28f } },
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 2.55f }, { 0.f }, { 2.55f } },
ngraph::element::undefined // ngraph::element::u8
},
{ false, {}, false, {}, {}, ngraph::element::undefined /* ngraph::element::f32 */ },
{ true, {}, true, {}, {}, ngraph::element::undefined /* ngraph::element::f32 */ },
};
INSTANTIATE_TEST_SUITE_P(smoke_LPT, MultiplyTransformation,

View File

@ -0,0 +1,51 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <memory>
#include "shared_test_classes/base/low_precision_transformations/layer_transformation.hpp"
#include "lpt_ngraph_functions/common/convolution.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
namespace LayerTestsDefinitions {
class ElementwiseBranchSelectionTestValues{
public:
class Branch {
public:
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeBefore;
ngraph::builder::subgraph::Convolution convolution;
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeAfter;
};
Branch branch1;
Branch branch2;
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeAfter;
std::vector<std::pair<std::string, std::string>> expectedReorders;
// expected operation name + expected operation precision
std::vector<std::pair<std::string, std::string>> expectedPrecisions;
};
typedef std::tuple<
ngraph::element::Type,
ngraph::PartialShape,
std::string,
ElementwiseBranchSelectionTestValues,
std::string
> ElementwiseBranchSelectionTransformationParams;
class ElementwiseBranchSelectionTransformation :
public testing::WithParamInterface<ElementwiseBranchSelectionTransformationParams>,
public LayerTestsUtils::LayerTransformation {
public:
static std::string getTestCaseName(const testing::TestParamInfo<ElementwiseBranchSelectionTransformationParams>& obj);
protected:
void SetUp() override;
void Run() override;
};
} // namespace LayerTestsDefinitions

View File

@ -14,11 +14,12 @@ namespace LayerTestsDefinitions {
class MultiplyTestValues {
public:
bool broadcast1;
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize1;
bool broadcast2;
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize2;
bool broadcast;
std::vector<ngraph::element::Type> precisionOnActivations;
std::vector<ngraph::element::Type> expectedPrecisions;
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantizeAfter;
ngraph::element::Type expectedPrecisions;
};
typedef std::tuple<
@ -36,6 +37,7 @@ public:
protected:
void SetUp() override;
void Run() override;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,120 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision_transformations/elementwise_branch_selection_transformation.hpp"
#include <memory>
#include <tuple>
#include <transformations/init_node_info.hpp>
#include "lpt_ngraph_functions/add_function.hpp"
namespace LayerTestsDefinitions {
std::string ElementwiseBranchSelectionTransformation::getTestCaseName(const testing::TestParamInfo<ElementwiseBranchSelectionTransformationParams>& obj) {
ngraph::element::Type netPrecision;
ngraph::PartialShape inputShapes;
std::string targetDevice;
auto params = LayerTestsUtils::LayerTransformationParamsNGraphFactory::createParamsU8I8();
ElementwiseBranchSelectionTestValues param;
std::string elementwiseType;
std::tie(netPrecision, inputShapes, targetDevice, param, elementwiseType) = obj.param;
std::ostringstream result;
result << getTestCaseNameByParams(netPrecision, inputShapes, targetDevice, params) <<
"_elementwiseType_" << elementwiseType;
auto toString = [](const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnData) -> std::string {
if (fqOnData.empty()) {
return "";
}
std::stringstream ss;
ss << "_on_branch1_" <<
fqOnData.inputLowValues[0] << "_" <<
fqOnData.inputHighValues[0] << "_" <<
fqOnData.outputLowValues[0] << "_" <<
fqOnData.outputHighValues[0];
return ss.str();
};
result <<
"_on_branch1_" << toString(param.branch1.fakeQuantizeBefore) << toString(param.branch1.fakeQuantizeAfter) <<
"_on_branch1_" << toString(param.branch1.fakeQuantizeBefore) << toString(param.branch1.fakeQuantizeAfter) <<
"_" << toString(param.fakeQuantizeAfter);
return result.str();
}
void ElementwiseBranchSelectionTransformation::SetUp() {
ngraph::element::Type precision;
ngraph::PartialShape inputShape;
ElementwiseBranchSelectionTestValues param;
std::string elementwiseType;
std::tie(precision, inputShape, targetDevice, param, elementwiseType) = this->GetParam();
function = ngraph::builder::subgraph::AddFunction::getOriginalSubgraphWithConvolutions(
precision,
inputShape,
false,
elementwiseType,
param.branch1.fakeQuantizeBefore,
param.branch1.convolution,
param.branch1.fakeQuantizeAfter,
param.branch2.fakeQuantizeBefore,
param.branch2.convolution,
param.branch2.fakeQuantizeAfter,
param.fakeQuantizeAfter);
ngraph::pass::InitNodeInfo().run_on_function(function);
}
void ElementwiseBranchSelectionTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<3>(GetParam());
const auto elementwiseType = std::get<4>(GetParam());
std::vector<std::pair<std::string, std::string>> expectedReorders = params.expectedReorders;
if (!expectedReorders.empty()) {
auto rtInfo = LayerTestsCommon::getRuntimeInfo();
for (auto it : rtInfo) {
const auto& typeIt = it.second.find("layerType");
const auto type = typeIt->second.as<std::string>();
if (type == "Reorder") {
const auto name = it.first;
bool wasFound = false;
for (auto it = expectedReorders.begin(); it != expectedReorders.end(); ++it) {
auto pair = *it;
const std::string parent = name.substr(0, name.find("_"));
const std::string child = name.substr(name.rfind("_") + 1, name.size() - name.rfind("_") - 1);
if ((pair.first == parent) && (pair.second == child)) {
expectedReorders.erase(it);
wasFound = true;
break;
}
}
ASSERT_TRUE(wasFound) << it.first << " was not found in expected list";
} else if (type == "Convolution") {
const auto& precisionIt = it.second.find("runtimePrecision");
const auto precision = precisionIt->second.as<std::string>();
ASSERT_EQ("U8", precision);
}
}
ASSERT_TRUE(expectedReorders.empty()) << "Some Reorder operations were not found in execution graph";
}
for (auto it : params.expectedPrecisions) {
const auto actualPrecision = getRuntimePrecisionByFusedName(it.first == "eltwise" ? elementwiseType : it.first);
ASSERT_EQ(it.second, actualPrecision) << "actual precision for operation '" << it.first << "' is not correct";
}
}
TEST_P(ElementwiseBranchSelectionTransformation, CompareWithRefImpl) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -27,14 +27,10 @@ std::string MultiplyTransformation::getTestCaseName(const testing::TestParamInfo
std::ostringstream result;
result << getTestCaseNameByParams(precision, inputShapes, targetDevice, params) <<
(param.broadcast ? "_broadcast" : "");
for (const auto& elem : param.precisionOnActivations) {
result << "_" << elem << "_";
}
result << "expected_precisions_";
for (const auto& elem : param.expectedPrecisions) {
result << "_" << elem << "_";
}
(param.broadcast1 ? "_broadcast1" : "") <<
(param.broadcast2 ? "_broadcast2" : "");
result << "_" << param.expectedPrecisions << "_";
if (!param.fakeQuantize1.empty()) {
result << "_on_branch1_" <<
@ -62,13 +58,42 @@ void MultiplyTransformation::SetUp() {
function = ngraph::builder::subgraph::MultiplyFunction::getOriginal(
precision,
inputShape,
param.broadcast,
param.broadcast1,
param.fakeQuantize1,
param.fakeQuantize2);
param.broadcast2,
param.fakeQuantize2,
param.fakeQuantizeAfter);
ngraph::pass::InitNodeInfo().run_on_function(function);
}
void MultiplyTransformation::Run() {
LayerTestsCommon::Run();
const auto params = std::get<3>(GetParam());
auto to_string = [](const ngraph::element::Type& precision) -> std::string {
switch (precision) {
case ngraph::element::f32: {
return "FP32";
}
case ngraph::element::i8: {
return "I8";
}
case ngraph::element::u8: {
return "U8";
}
default: {
return "";
}
}
};
const auto expectedFqPrecision = to_string(params.expectedPrecisions);
const auto actualFqPrecision = getRuntimePrecision("multiply");
EXPECT_EQ(expectedFqPrecision, actualFqPrecision);
}
TEST_P(MultiplyTransformation, CompareWithRefImpl) {
Run();
};

View File

@ -85,9 +85,17 @@ public:
std::map<std::string, std::string>& GetConfiguration();
// get runtime precision by operation friendly name
std::string getRuntimePrecision(const std::string& layerName);
// get runtime precision by operation type
std::string getRuntimePrecisionByType(const std::string& layerType);
// get runtime precision by operation friendly name which can be fused
std::string getRuntimePrecisionByFusedName(const std::string& layerName);
std::map<std::string, ngraph::Node::RTMap> getRuntimeInfo();
#ifndef NDEBUG
void showRuntimePrecisions();
#endif

View File

@ -516,6 +516,54 @@ std::string LayerTestsCommon::getRuntimePrecisionByType(const std::string& layer
return "";
}
std::string LayerTestsCommon::getRuntimePrecisionByFusedName(const std::string& layerName) {
const auto execGraph = executableNetwork.GetExecGraphInfo();
const auto execFunction = execGraph.getFunction();
const auto parse = [](const std::string& originalLayersNames) -> std::set<std::string> {
std::set<std::string> names;
std::string tmp = originalLayersNames;
size_t beginPosition = 0ul;
size_t endPosition;
while ((endPosition = tmp.find(",", beginPosition)) != std::string::npos) {
names.insert(tmp.substr(beginPosition, endPosition - beginPosition));
beginPosition = endPosition + 1;
}
names.insert(tmp.substr(beginPosition, endPosition - beginPosition));
return names;
};
for (const auto& op : execFunction->get_ops()) {
const auto& rtInfo = op->get_rt_info();
const auto& nameIt = rtInfo.find("originalLayersNames");
IE_ASSERT(nameIt != rtInfo.end()) << "originalLayersNames is not found for node: " << layerName;
const auto fusedName = parse(nameIt->second.as<std::string>());
if (fusedName.find(layerName) == fusedName.end()) {
continue;
}
const auto& it = rtInfo.find("runtimePrecision");
IE_ASSERT(it != rtInfo.end()) << "runtimePrecision is not found for node: " << layerName;
const auto rtPrecisionPtr = it->second.as<std::string>();
return rtPrecisionPtr;
}
return "";
}
std::map<std::string, ngraph::Node::RTMap> LayerTestsCommon::getRuntimeInfo() {
const auto execGraph = executableNetwork.GetExecGraphInfo();
const auto function = execGraph.getFunction();
std::map<std::string, ngraph::Node::RTMap> runtimeInfo;
for (const auto& op : function->get_ops()) {
runtimeInfo[op->get_friendly_name()] = op->get_rt_info();
}
return runtimeInfo;
}
#ifndef NDEBUG
void LayerTestsCommon::showRuntimePrecisions() {
const auto execGraph = executableNetwork.GetExecGraphInfo();
@ -523,13 +571,17 @@ void LayerTestsCommon::showRuntimePrecisions() {
for (const auto& op : execFunction->get_ops()) {
const auto& rtInfo = op->get_rt_info();
const auto& nameIt = rtInfo.find("originalLayersNames");
const auto name = nameIt->second.as<std::string>();
const auto& typeIt = rtInfo.find("layerType");
const auto type = typeIt->second.as<std::string>();
const auto& it = rtInfo.find("runtimePrecision");
const auto& it = rtInfo.find("runtimePrecision");
const auto rtPrecisionPtr = it->second.as<std::string>();
std::cout << type << ": " << rtPrecisionPtr << std::endl;
std::cout << type << "(" << name << "): " << rtPrecisionPtr << std::endl;
}
}
#endif

View File

@ -8,8 +8,11 @@
#include <ngraph/ngraph.hpp>
#include <low_precision/layer_transformation.hpp>
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "elementwise_function.hpp"
#include "lpt_ngraph_functions/common/builders.hpp"
#include "lpt_ngraph_functions/common/convolution.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
namespace ngraph {
namespace builder {
@ -53,7 +56,7 @@ inline std::ostream& operator<<(std::ostream& out, const AddExpectedValues& valu
"_mutliply" << values.mutliplyValuesAfter.size();
}
class AddFunction {
class AddFunction : public ElementwiseFunction {
public:
static std::shared_ptr<ngraph::Function> getOriginal(
const ngraph::element::Type precision,

View File

@ -15,8 +15,9 @@
#include "low_precision/network_helper.hpp"
#include "lpt_ngraph_functions/common/add.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/convolution.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
#include "lpt_ngraph_functions/common/reshape.hpp"
#include "lpt_ngraph_functions/common/transpose.hpp"
@ -78,6 +79,8 @@ std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantize(
const ngraph::element::Type precision,
const FakeQuantizeOnData& fqOnData);
std::shared_ptr<ngraph::opset1::Convolution> makeConvolution(const Output<Node>& output, const Convolution& convolution);
std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantizeTypeRelaxed(
const Output<ngraph::Node>& output,
const ngraph::element::Type precision,

View File

@ -0,0 +1,36 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <vector>
#include <ngraph/ngraph.hpp>
#include "constant.hpp"
#include "dequantization_operations.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
class Convolution {
public:
Convolution();
Convolution(
const DequantizationOperations::Subtract zeroPointOnActivations,
const Constant& constantOnWeights,
const DequantizationOperations& dequantizationOnWeights);
bool empty() const;
DequantizationOperations::Subtract zeroPointOnActivations;
Constant constantOnWeights;
DequantizationOperations dequantizationOnWeights;
};
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,38 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <ngraph/ngraph.hpp>
#include <low_precision/layer_transformation.hpp>
#include "lpt_ngraph_functions/common/builders.hpp"
#include "lpt_ngraph_functions/common/convolution.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "lpt_ngraph_functions/common/fake_quantize_on_data.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
class ElementwiseFunction {
public:
static std::shared_ptr<ngraph::Function> getOriginalSubgraphWithConvolutions(
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const bool broadcast,
const std::string& elementWiseType,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataBefore1,
const ngraph::builder::subgraph::Convolution& convolution1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataBefore2,
const ngraph::builder::subgraph::Convolution& convolution2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter);
};
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -7,6 +7,7 @@
#include <memory>
#include <ngraph/ngraph.hpp>
#include "elementwise_function.hpp"
#include "lpt_ngraph_functions/common/constant.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
@ -37,7 +38,7 @@ inline std::ostream& operator<<(std::ostream& out, const MultiplyValues& values)
return out << "_" << values.branch1 << "_" << values.branch2 << (values.isDequantization ? "_isDequantization" : "");
}
class MultiplyFunction {
class MultiplyFunction : public ElementwiseFunction {
public:
static std::shared_ptr<ngraph::Function> get(
const element::Type precision,
@ -46,9 +47,11 @@ public:
static std::shared_ptr<ngraph::Function> getOriginal(
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const bool broadcast,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnData1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnData2);
const bool broadcast1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fq1,
const bool broadcast2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fq2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqAfter);
};
} // namespace subgraph

View File

@ -2,6 +2,8 @@
// SPDX-License-Identifier: Apache-2.0
//
#include "lpt_ngraph_functions/add_function.hpp"
#include "low_precision/network_helper.hpp"
#include "low_precision/layer_transformation.hpp"
@ -9,7 +11,6 @@
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
#include "ngraph_functions/subgraph_builders.hpp"
#include "lpt_ngraph_functions/add_function.hpp"
using namespace ngraph::pass::low_precision;

View File

@ -205,6 +205,36 @@ std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantize(
fqOnData.outputHighValues));
}
std::shared_ptr<ngraph::opset1::Convolution> makeConvolution(const Output<Node>& output, const Convolution& convolution) {
auto parentOnActivations = output;
if (!convolution.zeroPointOnActivations.empty()) {
auto constant = std::make_shared<ngraph::opset1::Constant>(
convolution.zeroPointOnActivations.outPrecision,
convolution.zeroPointOnActivations.constantShape,
convolution.zeroPointOnActivations.values);
parentOnActivations = std::make_shared<ngraph::opset1::Subtract>(parentOnActivations, constant);
}
assert(!convolution.constantOnWeights.empty());
ngraph::Output<ngraph::Node> weights = std::make_shared<ngraph::opset1::Constant>(
convolution.constantOnWeights.outPrecision,
convolution.constantOnWeights.shape,
convolution.constantOnWeights.values);
if (!convolution.dequantizationOnWeights.empty()) {
weights = makeDequantization(weights, convolution.dequantizationOnWeights);
}
return std::make_shared<ngraph::opset1::Convolution>(
parentOnActivations,
weights,
ngraph::Strides{ 1, 1 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::CoordinateDiff{ 0, 0 },
ngraph::Strides{ 1, 1 });
}
std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantizeTypeRelaxed(
const Output<ngraph::Node>& output,
const ngraph::element::Type precision,

View File

@ -0,0 +1,29 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lpt_ngraph_functions/common/convolution.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
Convolution::Convolution() {
}
Convolution::Convolution(
const DequantizationOperations::Subtract zeroPointOnActivations,
const Constant& constantOnWeights,
const DequantizationOperations& dequantizationOnWeights) :
zeroPointOnActivations(zeroPointOnActivations),
constantOnWeights(constantOnWeights),
dequantizationOnWeights(dequantizationOnWeights) {
}
bool Convolution::empty() const {
return zeroPointOnActivations.empty() && constantOnWeights.empty() && dequantizationOnWeights.empty();
}
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,119 @@
// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "lpt_ngraph_functions/elementwise_function.hpp"
#include "low_precision/layer_transformation.hpp"
#include "ngraph/opsets/opset1.hpp"
#include "lpt_ngraph_functions/common/dequantization_operations.hpp"
using namespace ngraph::pass::low_precision;
namespace ngraph {
namespace builder {
namespace subgraph {
namespace {
std::shared_ptr<ngraph::opset1::FakeQuantize> makeFakeQuantizeWithNames(
const Output<Node>& parent,
const ngraph::element::Type precision,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnData,
const std::string name) {
auto fq = ngraph::builder::subgraph::makeFakeQuantize(parent, precision, fqOnData);
fq->set_friendly_name(name);
fq->get_input_node_ptr(1)->set_friendly_name(name + "/inputLow");
fq->get_input_node_ptr(2)->set_friendly_name(name + "/inputHigh");
fq->get_input_node_ptr(3)->set_friendly_name(name + "/outputLow");
fq->get_input_node_ptr(4)->set_friendly_name(name + "/outputHigh");
return fq;
}
} // namespace
std::shared_ptr<ngraph::Function> ElementwiseFunction::getOriginalSubgraphWithConvolutions(
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const bool broadcast,
const std::string& elementWiseType,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataBefore1,
const ngraph::builder::subgraph::Convolution& convolution1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataBefore2,
const ngraph::builder::subgraph::Convolution& convolution2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter) {
ngraph::PartialShape inputShape2 = inputShape;
if (broadcast) {
inputShape2[2] = 1;
inputShape2[3] = 1;
}
auto makeBranch = [&](
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const size_t index,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataBefore,
const ngraph::builder::subgraph::Convolution& convolution,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqOnDataAfter) ->
std::pair<std::shared_ptr<ngraph::opset1::Parameter>, std::shared_ptr<ngraph::Node>> {
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
input->set_friendly_name("input" + std::to_string(index));
std::shared_ptr<ngraph::Node> parent = input;
if (!fqOnDataBefore.empty()) {
parent = makeFakeQuantizeWithNames(parent, precision, fqOnDataBefore, "fakeQuantizeBefore" + std::to_string(index));
}
if (!convolution.empty()) {
parent = makeConvolution(parent, convolution);
parent->set_friendly_name("convolution" + std::to_string(index));
}
if (!fqOnDataAfter.empty()) {
parent = makeFakeQuantizeWithNames(parent, precision, fqOnDataAfter, "fakeQuantizeAfter" + std::to_string(index));
}
return std::make_pair(input, parent);
};
const auto branch1 = makeBranch(precision, inputShape, 1, fqOnDataBefore1, convolution1, fqOnDataAfter1);
const auto branch2 = makeBranch(precision, inputShape, 2, fqOnDataBefore2, convolution2, fqOnDataAfter2);
std::shared_ptr<ngraph::Node> result;
if (elementWiseType == "add") {
result = std::make_shared<ngraph::opset1::Add>(branch1.second, branch2.second);
result->set_friendly_name("add");
} else if (elementWiseType == "multiply") {
result = std::make_shared<ngraph::opset1::Multiply>(branch1.second, branch2.second);
result->set_friendly_name("multiply");
} else {
THROW_TRANSFORMATION_EXCEPTION << "not supported element-wise operation type " << elementWiseType;
}
if (!fqOnDataAfter.empty()) {
result = makeFakeQuantizeWithNames(result, precision, fqOnDataAfter, "fakeQuantizeAfter");
// we need a some operation to move dequantization operations away from FakeQuantize to avoid cleanup fuse
result = std::make_shared<ngraph::opset1::MaxPool>(
result,
Strides{ 1, 1 },
Shape{ 1, 1 },
Shape{ 0, 0 },
Shape{ 2, 2 },
op::RoundingType::FLOOR);
result->set_friendly_name("maxPool");
}
result = std::make_shared<ngraph::opset1::Result>(result);
result->set_friendly_name("result");
ngraph::ResultVector results{ std::dynamic_pointer_cast<ngraph::opset1::Result>(result) };
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ branch1.first, branch2.first }, "AddTransformation");
}
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -169,25 +169,25 @@ std::shared_ptr<ngraph::Function> GroupConvolutionFunction::getOriginal(
if (!fakeQuantizeOnData.empty()) {
parent = std::make_shared<ngraph::opset1::FakeQuantize>(
input,
std::make_shared<Constant>(
std::make_shared<ngraph::opset1::Constant>(
precision,
rankLength == 3 ?
Shape{ 1, fakeQuantizeOnData.inputLowValues.size(), 1 } :
Shape{ 1, fakeQuantizeOnData.inputLowValues.size(), 1, 1 },
fakeQuantizeOnData.inputLowValues),
std::make_shared<Constant>(
std::make_shared<ngraph::opset1::Constant>(
precision,
rankLength == 3 ?
Shape{ 1, fakeQuantizeOnData.inputHighValues.size(), 1 } :
Shape{ 1, fakeQuantizeOnData.inputHighValues.size(), 1, 1 },
fakeQuantizeOnData.inputHighValues),
std::make_shared<Constant>(
std::make_shared<ngraph::opset1::Constant>(
precision,
rankLength == 3 ?
Shape{ 1, fakeQuantizeOnData.outputLowValues.size(), 1 } :
Shape{ 1, fakeQuantizeOnData.outputLowValues.size(), 1, 1 },
fakeQuantizeOnData.outputLowValues),
std::make_shared<Constant>(
std::make_shared<ngraph::opset1::Constant>(
precision,
rankLength == 3 ?
Shape{ 1, fakeQuantizeOnData.outputHighValues.size(), 1 } :

View File

@ -90,22 +90,32 @@ std::shared_ptr<ngraph::Function> MultiplyFunction::get(
std::shared_ptr<ngraph::Function> MultiplyFunction::getOriginal(
const ngraph::element::Type precision,
const ngraph::PartialShape& inputShape,
const bool broadcast,
const bool broadcast1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fq1,
const ngraph::builder::subgraph::FakeQuantizeOnData& fq2) {
auto inputShape2 = inputShape;
const bool broadcast2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fq2,
const ngraph::builder::subgraph::FakeQuantizeOnData& fqAfter) {
auto inputShape1 = inputShape;
if (broadcast1) {
inputShape1[2] = 1;
inputShape1[3] = 1;
}
if (broadcast) {
auto inputShape2 = inputShape;
if (broadcast2) {
inputShape2[2] = 1;
inputShape2[3] = 1;
}
const auto input1 = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape);
const auto input1 = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape1);
const auto fakeQuantize1 = fq1.empty() ?
nullptr :
ngraph::builder::makeFakeQuantize(
input1, precision, fq1.quantizationLevel, fq1.constantShape,
fq1.inputLowValues, fq1.inputHighValues, fq1.outputLowValues, fq1.outputHighValues);
if (fakeQuantize1 != nullptr) {
fakeQuantize1->set_friendly_name("fakeQuantize1");
}
const auto input2 = std::make_shared<ngraph::opset1::Parameter>(precision, inputShape2);
const auto fakeQuantize2 = fq2.empty() ?
@ -113,12 +123,24 @@ std::shared_ptr<ngraph::Function> MultiplyFunction::getOriginal(
ngraph::builder::makeFakeQuantize(
input2, precision, fq2.quantizationLevel, fq2.constantShape,
fq2.inputLowValues, fq2.inputHighValues, fq2.outputLowValues, fq2.outputHighValues);
if (fakeQuantize2 != nullptr) {
fakeQuantize2->set_friendly_name("fakeQuantize2");
}
const auto multiply = std::make_shared<ngraph::opset1::Multiply>(
fq1.empty() ? input1 : fakeQuantize1,
fq2.empty() ? input2 : fakeQuantize2);
multiply->set_friendly_name("multiply");
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(multiply) };
auto const fakeQuantizeAfter = fqAfter.empty() ?
nullptr :
makeFakeQuantize(multiply, precision, fqAfter);
if (fakeQuantizeAfter != nullptr) {
fakeQuantizeAfter->set_friendly_name("fakeQuantizeAfter");
}
const std::shared_ptr<Node> result = fakeQuantizeAfter == nullptr ? std::dynamic_pointer_cast<Node>(multiply) : fakeQuantizeAfter;
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(result) };
std::shared_ptr<ngraph::Function> function = std::make_shared<ngraph::Function>(
results,
ngraph::ParameterVector{ input1, input2 },