[LPT] Multiinput with one parent and FQ with three Constant (#2066)

* [LPT] FakeQuantize with three constants

* [LPT] Dequantization ops on thw inputs with one parent
This commit is contained in:
Edward Shogulin 2020-09-07 20:31:45 +03:00 committed by GitHub
parent b225ddf414
commit dc8bbd930f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 616 additions and 74 deletions

View File

@ -140,7 +140,9 @@ public:
static void replaceLayer(TransformationContext& context, const CNNLayerPtr source, const CNNLayerPtr target);
static CNNLayerPtr addScaleShiftBetween(
// Add ScaleShift beween parent and child layers. Affected edges (output and input ports) are not specified.
// As result ScaleShift will be added for all edges between parent and children.
static std::vector<CNNLayerPtr> addScaleShiftBetween(
TransformationContext& context,
const CNNLayerPtr parent,
const CNNLayerPtr child,
@ -158,7 +160,8 @@ public:
DataPtr parentOutData,
CNNLayer::Ptr layer,
const std::string& nextLayerName,
ICNNNetwork& net);
ICNNNetwork& net,
const int childInsDataIndex = -1);
IE_SUPPRESS_DEPRECATED_START
static void fillInScaleShift(ScaleShiftLayer* layer, const size_t channels, const float* scales, const float* shifts);

View File

@ -105,8 +105,14 @@ void ActivationTransformation::transform(TransformationContext& context, CNNLaye
const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(*activationLayer);
for (const CNNLayerPtr& child : children) {
CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(context, activationLayer, child,
DequantizationDetails(scales, shifts));
context.dequantizationLayersNames.insert(dequantizationLayer->name);
const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
context,
activationLayer,
child,
DequantizationDetails(scales, shifts));
for (const auto& dequantizationLayer : dequantizationLayers) {
context.dequantizationLayersNames.insert(dequantizationLayer->name);
}
}
}

View File

@ -253,12 +253,15 @@ void ConcatTransformation::addDequantizationLayers(
getLayerDequantizationCallback(*layer, layer->name, layerDequantizationScales, layerDequantizationShifts);
}
CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
context,
std::make_shared<CNNLayer>(*layer),
child,
DequantizationDetails(layerDequantizationScales, layerDequantizationShifts, layerDequantizationScales.size()));
context.dequantizationLayersNames.insert(dequantizationLayer->name);
for (const CNNLayerPtr& dequantizationLayer : dequantizationLayers) {
context.dequantizationLayersNames.insert(dequantizationLayer->name);
}
}
}
@ -275,14 +278,17 @@ void ConcatTransformation::addDequantizationLayers(
getLayerDequantizationCallback(*layer, originalName, layerDequantizationScales, layerDequantizationShifts);
}
CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
context,
std::make_shared<CNNLayer>(*layer),
nullptr,
DequantizationDetails(layerDequantizationScales, layerDequantizationShifts, layerDequantizationScales.size()),
originalName);
context.dequantizationLayersNames.insert(dequantizationLayer->name);
subgraph.layers[dequantizationLayer->name] = dequantizationLayer.get();
for (const CNNLayerPtr& dequantizationLayer : dequantizationLayers) {
context.dequantizationLayersNames.insert(dequantizationLayer->name);
subgraph.layers[dequantizationLayer->name] = dequantizationLayer.get();
}
}
}
}

View File

@ -254,12 +254,15 @@ void LayerTransformation::addDequantizationLayer(
const std::vector<CNNLayerPtr> children = CNNNetworkHelper::getChildren(layer);
for (const CNNLayerPtr& child : children) {
const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
context,
std::make_shared<CNNLayer>(layer),
child,
DequantizationDetails(dequantizationScales, dequantizationShifts, outputChannelsCount));
context.dequantizationLayersNames.insert(dequantizationLayer->name);
for (const auto& dequantizationLayer : dequantizationLayers) {
context.dequantizationLayersNames.insert(dequantizationLayer->name);
}
}
OutputsDataMap outputs;
@ -269,13 +272,16 @@ void LayerTransformation::addDequantizationLayer(
const std::string dequantizationLayerName = layer.name;
CNNNetworkHelper::renameLayer(context.network, layer.name, layer.name + LayerTransformation::lastLayerPostfix);
const CNNLayerPtr dequantizationLayer = CNNNetworkHelper::addScaleShiftBetween(
const std::vector<CNNLayerPtr> dequantizationLayers = CNNNetworkHelper::addScaleShiftBetween(
context,
std::make_shared<CNNLayer>(layer),
nullptr,
DequantizationDetails(dequantizationScales, dequantizationShifts, outputChannelsCount),
dequantizationLayerName);
context.dequantizationLayersNames.insert(dequantizationLayer->name);
for (const auto& dequantizationLayer : dequantizationLayers) {
context.dequantizationLayersNames.insert(dequantizationLayer->name);
}
}
}

View File

@ -439,8 +439,7 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::transformFakeQuantizeToConst(Transfor
const CNNLayerPtr fakeQuantize,
const Blob::Ptr weights,
const std::string& constLayerName) {
std::vector<CNNLayerPtr> constLayersToRemove;
constLayersToRemove.reserve(fakeQuantize->insData.size());
std::set<CNNLayerPtr> constLayersToRemove;
for (const DataWeakPtr& insDataWeak : fakeQuantize->insData) {
const DataPtr insData = insDataWeak.lock();
@ -456,7 +455,7 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::transformFakeQuantizeToConst(Transfor
<< fakeQuantize->name << "' is nullable";
}
constLayersToRemove.push_back(parent);
constLayersToRemove.insert(parent);
}
for (const CNNLayerPtr& parent : constLayersToRemove) {
@ -1049,7 +1048,7 @@ void CNNNetworkHelper::replaceLayer(TransformationContext& context, const CNNLay
networkImpl->addLayer(target);
}
CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& context, const CNNLayerPtr parent,
std::vector<CNNLayerPtr> CNNNetworkHelper::addScaleShiftBetween(TransformationContext& context, const CNNLayerPtr parent,
const CNNLayerPtr child,
const DequantizationDetails& dequantizationDetails,
const std::string& name) {
@ -1078,66 +1077,92 @@ CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& contex
CNNNetworkHelper::updateBlobs(*child, "biases", updatedShifts);
}
return child;
return { child };
}
// Searching the connection between the layers
int l1_out_i = 0;
// specify parent/child edges here and manipulate with them below
std::vector<int> parentOutDataIndexes;
std::vector<int> childInsDataIndexes;
if (child != nullptr) {
for (; l1_out_i < parent->outData.size(); l1_out_i++) {
if (getInputTo(parent->outData[l1_out_i]).find(child->name) !=
getInputTo(parent->outData[l1_out_i]).end()) {
break;
for (int l1_out_i = 0; l1_out_i < parent->outData.size(); l1_out_i++) {
auto& inputTo = getInputTo(parent->outData[l1_out_i]);
if (inputTo.find(child->name) != inputTo.end()) {
parentOutDataIndexes.push_back(l1_out_i);
}
}
for (size_t i = 0; i < child->insData.size(); ++i) {
const auto& insData = child->insData[i];
const CNNLayerPtr& creatorLayer = getCreatorLayer(insData.lock()).lock();
if (creatorLayer->name == parent->name) {
childInsDataIndexes.push_back(i);
}
}
} else {
parentOutDataIndexes.push_back(0);
childInsDataIndexes.push_back(0);
}
if (l1_out_i == parent->outData.size()) {
if (childInsDataIndexes.empty()) {
if (child != nullptr)
THROW_IE_EXCEPTION << "Can't find layer " << child->name << " among layer " << parent->name << " outputs";
else
THROW_IE_EXCEPTION << "Layer '" << parent->name << "' has invalid output";
}
DataPtr outData = parent->outData[l1_out_i];
std::vector<CNNLayerPtr> ssCnnLayers;
ssCnnLayers.reserve(childInsDataIndexes.size());
for (int l1_out_i : parentOutDataIndexes) {
DataPtr outData = parent->outData[l1_out_i];
std::string layerName = name.empty() ? (child != nullptr ? (parent->name + "_ScaleShift_" + child->name)
: (parent->name + "_ScaleShift"))
: name;
for (int i = 0; i < childInsDataIndexes.size(); ++i) {
const int childInsDataIndex = childInsDataIndexes[i];
std::string layerName = name.empty() ?
(child != nullptr ?
(parent->name + "_ScaleShift" + (childInsDataIndexes.size() == 1 ? "" : std::to_string(childInsDataIndex)) + "_" + child->name) :
(parent->name + "_ScaleShift" + (childInsDataIndexes.size() == 1 ? "" : std::to_string(childInsDataIndex))))
: name;
Precision ssPrecision = context.getOriginalLayerPrecision(parent->name, outData->getName());
if (ssPrecision == Precision::UNSPECIFIED) {
if (child != nullptr)
ssPrecision = child->precision;
else
ssPrecision = Precision::FP32;
}
Precision ssPrecision = context.getOriginalLayerPrecision(parent->name, outData->getName());
if (ssPrecision == Precision::UNSPECIFIED) {
if (child != nullptr)
ssPrecision = child->precision;
else
ssPrecision = Precision::FP32;
}
LayerParams ssCnnLayerParams {layerName, "ScaleShift", ssPrecision};
CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
LayerParams ssCnnLayerParams{ layerName, "ScaleShift", ssPrecision };
CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
const std::vector<size_t> dims = outData->getDims();
const std::vector<size_t> dims = outData->getDims();
if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
}
}
addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network, childInsDataIndex);
{
ScaleShiftLayer* scshLayer = dynamic_cast<ScaleShiftLayer*>(ssCnnLayer.get());
if (scshLayer == nullptr) {
THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class";
}
fillInScaleShift(
scshLayer,
dequantizationDetails.channelsCount,
dequantizationDetails.scales.data(),
dequantizationDetails.shifts.data());
}
CNNNetworkHelper::setOutDataPrecision(*ssCnnLayer, ssPrecision);
ssCnnLayers.push_back(ssCnnLayer);
}
}
addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network);
{
ScaleShiftLayer* scshLayer = dynamic_cast<ScaleShiftLayer*>(ssCnnLayer.get());
if (scshLayer == nullptr) {
THROW_IE_EXCEPTION << "Layer " << ssCnnLayer->name << " is not instance of ScaleShiftLayer class";
}
fillInScaleShift(
scshLayer,
dequantizationDetails.channelsCount,
dequantizationDetails.scales.data(),
dequantizationDetails.shifts.data());
}
CNNNetworkHelper::setOutDataPrecision(*ssCnnLayer, ssPrecision);
return ssCnnLayer;
return ssCnnLayers;
}
CNNLayerPtr CNNNetworkHelper::addConstBetween(ICNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2,
@ -1177,7 +1202,8 @@ void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
DataPtr parentOutData,
CNNLayer::Ptr layer,
const std::string& nextLayerName,
ICNNNetwork& net) {
ICNNNetwork& net,
const int childInsDataIndex) {
CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
if (netImpl == nullptr) {
THROW_IE_EXCEPTION << "unexpected network type";
@ -1188,7 +1214,7 @@ void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
netImpl->getLayerByName(nextLayerName.c_str(), nextLayer, nullptr);
}
if (layer && (nextLayerName.empty() || (parentOutData == nullptr) ||
if (layer && (nextLayerName.empty() || (parentOutData == nullptr) || (childInsDataIndex != -1) ||
(getInputTo(parentOutData).find(nextLayerName) != getInputTo(parentOutData).end()))) {
auto getTensorDesc = [](CNNLayerPtr& nextLayer) {
const DataPtr insData = nextLayer->insData[0].lock();
@ -1222,12 +1248,18 @@ void CNNNetworkHelper::addLayerToCNNNetworkAfterData(
if (!nextLayerName.empty()) {
// CNNLayerPtr nextLayer = getInputTo(parentOutData)[nextLayerName];
getInputTo(newEdgeAfterLayer)[nextLayerName] = nextLayer;
if (parentOutData != nullptr) {
getInputTo(parentOutData).erase(nextLayerName);
for (size_t i = 0; i < nextLayer->insData.size(); i++) {
if (nextLayer->insData[i].lock() == parentOutData) {
nextLayer->insData[i] = newEdgeAfterLayer;
if (childInsDataIndex == -1) {
for (size_t i = 0; i < nextLayer->insData.size(); i++) {
if (nextLayer->insData[i].lock() == parentOutData) {
nextLayer->insData[i] = newEdgeAfterLayer;
}
}
} else {
nextLayer->insData[childInsDataIndex] = newEdgeAfterLayer;
}
} else {
// TODO: why new?
@ -1348,20 +1380,21 @@ size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLaye
bool wasFound = false;
for (auto dataIt = parentLayer->outData.begin(); dataIt != parentLayer->outData.end(); ++dataIt) {
auto data = *dataIt;
for (auto inputIt = getInputTo(data).begin(); inputIt != getInputTo(data).end(); ++inputIt) {
auto inputIt = getInputTo(data).begin();
while (inputIt != getInputTo(data).end()) {
auto currentChildLayer = inputIt->second;
if (currentChildLayer == nullptr) {
THROW_IE_EXCEPTION << "Output layer for '" << parentLayer->name << "'is absent";
}
if (currentChildLayer->name == childLayer->name) {
getInputTo(data).erase(inputIt);
wasFound = true;
break;
}
}
if (wasFound) {
break;
if (currentChildLayer->name == childLayer->name) {
inputIt = getInputTo(data).erase(inputIt);
wasFound = true;
continue;
}
++inputIt;
}
}
if (!wasFound) {
@ -1370,7 +1403,8 @@ size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLaye
}
wasFound = false;
for (auto it = childLayer->insData.begin(); it != childLayer->insData.end(); ++it) {
auto it = childLayer->insData.begin();
while (it != childLayer->insData.end()) {
auto data = it->lock();
if (data == nullptr) {
THROW_IE_EXCEPTION << "Input layer data for '" << childLayer->name << "'is absent";
@ -1379,11 +1413,14 @@ size_t CNNNetworkHelper::disconnectLayers(CNNNetworkImpl* network, const CNNLaye
if (currentParentLayer == nullptr) {
THROW_IE_EXCEPTION << "Input layer for '" << childLayer->name << "'is absent";
}
if (currentParentLayer->name == parentLayer->name) {
childLayer->insData.erase(it);
it = childLayer->insData.erase(it);
wasFound = true;
break;
continue;
}
++it;
}
if (!wasFound) {
THROW_IE_EXCEPTION << "Input layer '" << parentLayer->name << "' was not found for '" << childLayer->name

View File

@ -0,0 +1,32 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
{
{ 256ul, ngraph::Shape { 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{ 255ul, ngraph::Shape { 1 }, { -12.8f }, { 12.7f }, { -12.8f }, { 12.7f } }
},
};
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(params)),
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
} // namespace

View File

@ -0,0 +1,31 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/multiply_with_one_parent_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<MultiplyWithOneParentTransformationValues> values = {
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } }
}
};
INSTANTIATE_TEST_CASE_P(LPT, MultiplyWithOneParentTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })),
::testing::Values(CommonTestUtils::DEVICE_CPU),
::testing::ValuesIn(values)),
MultiplyWithOneParentTransformation::getTestCaseName);
} // namespace

View File

@ -0,0 +1,31 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32
};
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
{
{ 256ul, ngraph::Shape { 1 }, { 0.f }, { 25.5f }, { 0.f }, { 25.5f } },
{ 255ul, ngraph::Shape { 1 }, { -12.8f }, { 12.7f }, { -12.8f }, { 12.7f } }
}
};
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(params)),
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
} // namespace

View File

@ -0,0 +1,31 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include <vector>
#include "low_precision_transformations/multiply_with_one_parent_transformation.hpp"
#include "common_test_utils/test_constants.hpp"
using namespace LayerTestsDefinitions;
namespace {
const std::vector<InferenceEngine::Precision> netPrecisions = {
InferenceEngine::Precision::FP32,
InferenceEngine::Precision::FP16
};
const std::vector<MultiplyWithOneParentTransformationValues> values = {
{
{ 256ul, ngraph::Shape { 1, 1, 1, 1 }, { 0.f }, { 255.f }, { 0.f }, { 255.f } }
}
};
INSTANTIATE_TEST_CASE_P(LPT, MultiplyWithOneParentTransformation,
::testing::Combine(
::testing::ValuesIn(netPrecisions),
::testing::Values(InferenceEngine::SizeVector({ 1, 3, 16, 16 })),
::testing::Values(CommonTestUtils::DEVICE_GPU),
::testing::ValuesIn(values)),
MultiplyWithOneParentTransformation::getTestCaseName);
} // namespace

View File

@ -0,0 +1,38 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <memory>
#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
namespace LayerTestsDefinitions {
class MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues {
public:
ngraph::builder::subgraph::FakeQuantizeOnData fqOnData;
ngraph::builder::subgraph::FakeQuantizeOnData fqOnWeights;
};
typedef std::tuple<
InferenceEngine::Precision,
InferenceEngine::SizeVector,
std::string,
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
> MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;
class MatMulWithOptimizedConstantFakeQuantizeTransformation :
public testing::WithParamInterface<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams>,
public LayerTestsUtils::LayerTransformation {
public:
static std::string getTestCaseName(testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj);
protected:
void SetUp() override;
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,40 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <string>
#include <memory>
#include "functional_test_utils/low_precision_transformations/layer_transformation.hpp"
#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
namespace LayerTestsDefinitions {
class MultiplyWithOneParentTransformationValues {
public:
ngraph::builder::subgraph::FakeQuantizeOnData fakeQuantize;
};
typedef std::tuple<
InferenceEngine::Precision,
InferenceEngine::SizeVector,
std::string,
MultiplyWithOneParentTransformationValues
> MultiplyWithOneParentTransformationParams;
class MultiplyWithOneParentTransformation :
public testing::WithParamInterface<MultiplyWithOneParentTransformationParams>,
public LayerTestsUtils::LayerTransformation {
public:
static std::string getTestCaseName(testing::TestParamInfo<MultiplyWithOneParentTransformationParams> obj);
protected:
void SetUp() override;
private:
void validate();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,63 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_transformation.hpp"
#include <memory>
#include <tuple>
#include <vector>
#include <string>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "functional_test_utils/plugin_cache.hpp"
#include "functional_test_utils/layer_test_utils.hpp"
#include "functional_test_utils/blob_utils.hpp"
#include "ngraph_functions/pass/convert_prc.hpp"
#include "ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp"
namespace LayerTestsDefinitions {
std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
std::string targetDevice;
InferenceEngine::details::LayerTransformation::Params params;
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
std::tie(netPrecision, inputShape, targetDevice, param) = obj.param;
std::ostringstream result;
result << netPrecision.name() << "_" <<
CommonTestUtils::vec2str(inputShape) << "_" <<
targetDevice << "_" <<
param.fqOnData << "_" <<
param.fqOnWeights;
return result.str();
}
void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() {
threshold = 0.01f;
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
InferenceEngine::details::LayerTransformation::Params params;
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam();
auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
precision,
inputShape,
param.fqOnData,
param.fqOnWeights);
}
TEST_P(MatMulWithOptimizedConstantFakeQuantizeTransformation, CompareWithRefImpl) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,84 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision_transformations/multiply_with_one_parent_transformation.hpp"
#include <memory>
#include <string>
#include <tuple>
#include <vector>
#include <ie_core.hpp>
#include "common_test_utils/common_utils.hpp"
#include "ngraph_functions/low_precision_transformations/multiply_with_one_parent_function.hpp"
namespace LayerTestsDefinitions {
std::string MultiplyWithOneParentTransformation::getTestCaseName(testing::TestParamInfo<MultiplyWithOneParentTransformationParams> obj) {
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
std::string targetDevice;
MultiplyWithOneParentTransformationValues values;
std::tie(netPrecision, inputShape, targetDevice, values) = obj.param;
std::ostringstream result;
result << netPrecision.name() << "_" << CommonTestUtils::vec2str(inputShape);
return result.str();
}
void MultiplyWithOneParentTransformation::SetUp() {
threshold = 0.01f;
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
InferenceEngine::details::LayerTransformation::Params params;
MultiplyWithOneParentTransformationValues values;
std::tie(netPrecision, inputShape, targetDevice, values) = this->GetParam();
auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
function = ngraph::builder::subgraph::MultiplyWithOneParentFunction::getOriginal(precision, inputShape, values.fakeQuantize);
validate();
}
void MultiplyWithOneParentTransformation::validate() {
InferenceEngine::Precision netPrecision;
InferenceEngine::SizeVector inputShape;
std::string targetDevice;
InferenceEngine::details::LayerTransformation::Params params = LayerTestsUtils::LayerTransformationParamsFactory::createParams();
MultiplyWithOneParentTransformationValues values;
std::tie(netPrecision, inputShape, targetDevice, values) = this->GetParam();
const InferenceEngine::CNNNetwork network = transform(params);
IE_SUPPRESS_DEPRECATED_START
InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
EXPECT_EQ(1, outputs.size());
std::map<std::string, InferenceEngine::DataPtr>::iterator it = outputs.begin();
const InferenceEngine::CNNLayerPtr outputLayer = getCreatorLayer(it->second).lock();
EXPECT_TRUE(outputLayer != nullptr);
EXPECT_EQ("Eltwise", outputLayer->type);
// check #1: successful transformation execution
EXPECT_EQ(2ul, outputLayer->insData.size());
const auto parents = InferenceEngine::details::CNNNetworkHelper::getParents(*outputLayer);
EXPECT_EQ(2ul, parents.size());
EXPECT_EQ("ScaleShift", parents[0]->type);
// check #2: successful graph handling
EXPECT_EQ("FakeQuantize", parents[1]->type);
EXPECT_EQ(1ul, InferenceEngine::details::CNNNetworkHelper::getParents(*parents[0]).size());
EXPECT_EQ("FakeQuantize", InferenceEngine::details::CNNNetworkHelper::getParents(*parents[0])[0]->type);
IE_SUPPRESS_DEPRECATED_END
}
TEST_P(MultiplyWithOneParentTransformation, CompareWithRefImpl) {
Run();
};
} // namespace LayerTestsDefinitions

View File

@ -0,0 +1,26 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <ngraph/ngraph.hpp>
#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
class MatMulWithOptimizedConstantFakeQuantizeFunction {
public:
static std::shared_ptr<ngraph::Function> getOriginal(
const ngraph::element::Type precision,
const ngraph::Shape& inputShape,
const FakeQuantizeOnData& fqOnData,
const FakeQuantizeOnData& fqOnWeights);
};
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,25 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#pragma once
#include <memory>
#include <ngraph/ngraph.hpp>
#include "ngraph_functions/low_precision_transformations/common/fake_quantize_on_data.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
class MultiplyWithOneParentFunction {
public:
static std::shared_ptr<ngraph::Function> getOriginal(
const ngraph::element::Type precision,
const ngraph::Shape& inputShape,
const FakeQuantizeOnData& fakeQuantize);
};
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,51 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/low_precision_transformations/mat_mul_with_optimized_constant_fake_quantize_function.hpp"
#include <ngraph/opsets/opset1.hpp>
#include "ngraph_functions/builders.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
const ngraph::element::Type precision,
const ngraph::Shape& inputShape,
const FakeQuantizeOnData& fqOnData,
const FakeQuantizeOnData& fqOnWeights) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
const auto fakeQuantizeOnActivations = fqOnData.empty() ?
nullptr :
ngraph::builder::makeFakeQuantize(
input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
const ngraph::Shape weightsShape = { inputShape[1], 10 };
const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
const auto lowConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
const auto highConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
weightsConst,
lowConstant,
highConstant,
lowConstant,
highConstant,
fqOnWeights.quantizationLevel);
const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
fqOnData.empty() ? input : fakeQuantizeOnActivations,
fakeQuantizeOnWeights,
false,
false);
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");
}
} // namespace subgraph
} // namespace builder
} // namespace ngraph

View File

@ -0,0 +1,32 @@
// Copyright (C) 2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "ngraph_functions/low_precision_transformations/multiply_with_one_parent_function.hpp"
#include <ngraph/opsets/opset1.hpp>
#include "ngraph_functions/builders.hpp"
namespace ngraph {
namespace builder {
namespace subgraph {
std::shared_ptr<ngraph::Function> MultiplyWithOneParentFunction::getOriginal(
const ngraph::element::Type precision,
const ngraph::Shape& inputShape,
const FakeQuantizeOnData& fqOnData) {
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
const auto fakeQuantize = ngraph::builder::makeFakeQuantize(
input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
const auto multiply = std::make_shared<ngraph::opset1::Multiply>(fakeQuantize->output(0), fakeQuantize->output(0));
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(multiply) };
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MultiplyWithOneParentFunction");
}
} // namespace subgraph
} // namespace builder
} // namespace ngraph