[LPT] Copy constant with several outputs before blob update (cherry-pick to master) (#2198)
* [LPT] Copy constant implementation * [LPT] the same Constant ops as FQ interval boundaries
This commit is contained in:
@@ -11,8 +11,7 @@ using namespace LayerTestsDefinitions;
|
||||
|
||||
namespace {
|
||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||
InferenceEngine::Precision::FP32,
|
||||
InferenceEngine::Precision::FP16
|
||||
InferenceEngine::Precision::FP32
|
||||
};
|
||||
|
||||
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
|
||||
@@ -22,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
|
||||
},
|
||||
};
|
||||
|
||||
const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||
::testing::ValuesIn(params)),
|
||||
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
|
||||
|
||||
@@ -21,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
|
||||
}
|
||||
};
|
||||
|
||||
const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
|
||||
::testing::Combine(
|
||||
::testing::ValuesIn(netPrecisions),
|
||||
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
|
||||
::testing::ValuesIn(inputShapes),
|
||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||
::testing::ValuesIn(params)),
|
||||
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
|
||||
|
||||
@@ -20,7 +20,7 @@ public:
|
||||
|
||||
typedef std::tuple<
|
||||
InferenceEngine::Precision,
|
||||
InferenceEngine::SizeVector,
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>,
|
||||
std::string,
|
||||
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
|
||||
> MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;
|
||||
|
||||
@@ -23,16 +23,16 @@ namespace LayerTestsDefinitions {
|
||||
std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
|
||||
testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
|
||||
std::string targetDevice;
|
||||
InferenceEngine::details::LayerTransformation::Params params;
|
||||
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
|
||||
|
||||
std::tie(netPrecision, inputShape, targetDevice, param) = obj.param;
|
||||
std::tie(netPrecision, shapes, targetDevice, param) = obj.param;
|
||||
|
||||
std::ostringstream result;
|
||||
result << netPrecision.name() << "_" <<
|
||||
CommonTestUtils::vec2str(inputShape) << "_" <<
|
||||
CommonTestUtils::vec2str(shapes.first) << "_" << CommonTestUtils::vec2str(shapes.second) << "_" <<
|
||||
targetDevice << "_" <<
|
||||
param.fqOnData << "_" <<
|
||||
param.fqOnWeights;
|
||||
@@ -43,15 +43,16 @@ void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() {
|
||||
threshold = 0.01f;
|
||||
|
||||
InferenceEngine::Precision netPrecision;
|
||||
InferenceEngine::SizeVector inputShape;
|
||||
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
|
||||
InferenceEngine::details::LayerTransformation::Params params;
|
||||
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
|
||||
std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam();
|
||||
std::tie(netPrecision, shapes, targetDevice, param) = this->GetParam();
|
||||
auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||
|
||||
function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
|
||||
precision,
|
||||
inputShape,
|
||||
shapes.first,
|
||||
shapes.second,
|
||||
param.fqOnData,
|
||||
param.fqOnWeights);
|
||||
}
|
||||
|
||||
@@ -16,7 +16,8 @@ class MatMulWithOptimizedConstantFakeQuantizeFunction {
|
||||
public:
|
||||
static std::shared_ptr<ngraph::Function> getOriginal(
|
||||
const ngraph::element::Type precision,
|
||||
const ngraph::Shape& inputShape,
|
||||
const ngraph::Shape& inputShape1,
|
||||
const ngraph::Shape& inputShape2,
|
||||
const FakeQuantizeOnData& fqOnData,
|
||||
const FakeQuantizeOnData& fqOnWeights);
|
||||
};
|
||||
|
||||
@@ -13,34 +13,41 @@ namespace subgraph {
|
||||
|
||||
std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
|
||||
const ngraph::element::Type precision,
|
||||
const ngraph::Shape& inputShape,
|
||||
const ngraph::Shape& inputShape1,
|
||||
const ngraph::Shape& inputShape2,
|
||||
const FakeQuantizeOnData& fqOnData,
|
||||
const FakeQuantizeOnData& fqOnWeights) {
|
||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
|
||||
const auto fakeQuantizeOnActivations = fqOnData.empty() ?
|
||||
nullptr :
|
||||
ngraph::builder::makeFakeQuantize(
|
||||
input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
|
||||
fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
|
||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
|
||||
|
||||
const ngraph::Shape weightsShape = { inputShape[1], 10 };
|
||||
const auto lowConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputLowValues);
|
||||
const auto highConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputHighValues);
|
||||
const auto fakeQuantizeOnActivations = std::make_shared<ngraph::opset1::FakeQuantize>(
|
||||
input,
|
||||
lowConstantOnActivations,
|
||||
highConstantOnActivations,
|
||||
lowConstantOnActivations,
|
||||
highConstantOnActivations,
|
||||
fqOnWeights.quantizationLevel);
|
||||
|
||||
const ngraph::Shape weightsShape = { inputShape2[0], inputShape1[1] };
|
||||
const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
|
||||
|
||||
const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
|
||||
const auto lowConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
|
||||
const auto highConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
|
||||
const auto lowConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
|
||||
const auto highConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
|
||||
const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
|
||||
weightsConst,
|
||||
lowConstant,
|
||||
highConstant,
|
||||
lowConstant,
|
||||
highConstant,
|
||||
lowConstantOnWeights,
|
||||
highConstantOnWeights,
|
||||
lowConstantOnWeights,
|
||||
highConstantOnWeights,
|
||||
fqOnWeights.quantizationLevel);
|
||||
|
||||
const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
|
||||
fqOnData.empty() ? input : fakeQuantizeOnActivations,
|
||||
fakeQuantizeOnActivations,
|
||||
fakeQuantizeOnWeights,
|
||||
false,
|
||||
false);
|
||||
inputShape1[1] != inputShape2[0]);
|
||||
|
||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
|
||||
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");
|
||||
|
||||
Reference in New Issue
Block a user