[LPT] Copy constant with several outputs before blob update (cherry-pick to master) (#2198)
* [LPT] Copy constant implementation * [LPT] the same Constant ops as FQ interval boundaries
This commit is contained in:
parent
ff3c5fce99
commit
ac2370b420
@ -48,16 +48,22 @@ public:
|
|||||||
|
|
||||||
static Blob::Ptr makeNewBlobPtr(const TensorDesc& desc);
|
static Blob::Ptr makeNewBlobPtr(const TensorDesc& desc);
|
||||||
|
|
||||||
static void invertFakeQuantize(const CNNLayer& fakeQuantize);
|
|
||||||
|
|
||||||
static void updateBlobs(CNNLayer& layer, const std::string& blobName, float value);
|
|
||||||
|
|
||||||
static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, float value);
|
static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, float value);
|
||||||
|
|
||||||
static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
|
static void updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
|
||||||
|
|
||||||
|
static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value);
|
||||||
|
|
||||||
|
static void updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, const std::vector<float>& values);
|
||||||
|
|
||||||
static void updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values);
|
static void updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values);
|
||||||
|
|
||||||
|
static CNNLayerPtr copyConstant(
|
||||||
|
TransformationContext& context,
|
||||||
|
const CNNLayer& quantizeLayer,
|
||||||
|
const CNNLayerPtr& blobLayer,
|
||||||
|
const size_t constLayerIndex);
|
||||||
|
|
||||||
// return true if at least one child uses layer on weights
|
// return true if at least one child uses layer on weights
|
||||||
static bool onWeights(const CNNLayer& layer);
|
static bool onWeights(const CNNLayer& layer);
|
||||||
|
|
||||||
|
@ -47,6 +47,7 @@ protected:
|
|||||||
std::vector<float>& biasesShifts) const;
|
std::vector<float>& biasesShifts) const;
|
||||||
|
|
||||||
void updateWeights(
|
void updateWeights(
|
||||||
|
TransformationContext& context,
|
||||||
const CNNLayerPtr fakeQuantize,
|
const CNNLayerPtr fakeQuantize,
|
||||||
std::vector<float>& outputLowValues,
|
std::vector<float>& outputLowValues,
|
||||||
std::vector<float>& outputHighValues) const;
|
std::vector<float>& outputHighValues) const;
|
||||||
@ -68,6 +69,7 @@ protected:
|
|||||||
const bool onWeights) const;
|
const bool onWeights) const;
|
||||||
|
|
||||||
DataPrecision fillDequantizationsForWeightsPath(
|
DataPrecision fillDequantizationsForWeightsPath(
|
||||||
|
TransformationContext& context,
|
||||||
const CNNLayer& weightableLayer,
|
const CNNLayer& weightableLayer,
|
||||||
const bool supportAsymmetricQuantization,
|
const bool supportAsymmetricQuantization,
|
||||||
std::vector<float>& dequantizationScales,
|
std::vector<float>& dequantizationScales,
|
||||||
|
@ -148,10 +148,10 @@ void ConcatTransformation::transform(TransformationContext& context, CNNLayer& c
|
|||||||
switch (quantizedTensorAlignmentOnActivations) {
|
switch (quantizedTensorAlignmentOnActivations) {
|
||||||
case QuantizedTensorAlignment::None: {
|
case QuantizedTensorAlignment::None: {
|
||||||
const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
|
const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
||||||
|
|
||||||
const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
|
const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -165,18 +165,18 @@ void ConcatTransformation::transform(TransformationContext& context, CNNLayer& c
|
|||||||
(outputHighValue / quantizationDetails.outputHighValues[0]))
|
(outputHighValue / quantizationDetails.outputHighValues[0]))
|
||||||
: outputHighValue;
|
: outputHighValue;
|
||||||
|
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 1, inputLowValue);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 1, inputLowValue);
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 2, inputHighValue);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 2, inputHighValue);
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, dataPrecision.min);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, dataPrecision.min);
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, dataPrecision.max);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, dataPrecision.max);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case QuantizedTensorAlignment::UpdateLevel: {
|
case QuantizedTensorAlignment::UpdateLevel: {
|
||||||
const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
|
const float updatedOutputLowValue = quantizationDetails.outputLowValues[0] * quantizationScale + quantizationShift;
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 3, updatePrecisions ? roundf(updatedOutputLowValue) : updatedOutputLowValue);
|
||||||
|
|
||||||
const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
|
const float updatedOutputHighValue = quantizationDetails.outputHighValues[0] * quantizationScale + quantizationShift;
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
CNNNetworkHelper::updateBlobs(context, fakeQuantizeLayer, 4, updatePrecisions ? roundf(updatedOutputHighValue) : updatedOutputHighValue);
|
||||||
|
|
||||||
const int levels = static_cast<int>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
|
const int levels = static_cast<int>(fabs(roundf(updatedOutputHighValue) - roundf(updatedOutputLowValue)) + 1.0);
|
||||||
fakeQuantizeLayer.params["levels"] = std::to_string(levels);
|
fakeQuantizeLayer.params["levels"] = std::to_string(levels);
|
||||||
|
@ -106,8 +106,8 @@ void ConcatMultiChannelsTransformation::transform(TransformationContext& context
|
|||||||
dequantizationScalesLayers[fakeQuantizeLayer->name] = dequantizationScales;
|
dequantizationScalesLayers[fakeQuantizeLayer->name] = dequantizationScales;
|
||||||
dequantizationShiftsLayers[fakeQuantizeLayer->name] = dequantizationShifts;
|
dequantizationShiftsLayers[fakeQuantizeLayer->name] = dequantizationShifts;
|
||||||
|
|
||||||
CNNNetworkHelper::updateBlobs(*fakeQuantizeLayer, 3, dataPrecision.min);
|
CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 3, dataPrecision.min);
|
||||||
CNNNetworkHelper::updateBlobs(*fakeQuantizeLayer, 4, dataPrecision.max);
|
CNNNetworkHelper::updateBlobs(context, *fakeQuantizeLayer, 4, dataPrecision.max);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (updatePrecisions) {
|
if (updatePrecisions) {
|
||||||
|
@ -105,6 +105,7 @@ void ConvolutionTransformation::transform(TransformationContext& context, CNNLay
|
|||||||
const CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul);
|
const CNNLayerPtr parentOnData = CNNNetworkHelper::getParent(layer, 0ul);
|
||||||
|
|
||||||
const DataPrecision dataPrecisionOnWeights = fillDequantizationsForWeightsPath(
|
const DataPrecision dataPrecisionOnWeights = fillDequantizationsForWeightsPath(
|
||||||
|
context,
|
||||||
layer,
|
layer,
|
||||||
supportAsymmetricQuantization,
|
supportAsymmetricQuantization,
|
||||||
originalWeightsDequantizationScales,
|
originalWeightsDequantizationScales,
|
||||||
|
@ -34,8 +34,6 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa
|
|||||||
THROW_IE_EXCEPTION << "Layer '" << layer.insData.size() << "' has invalid inputs number. 5 is expected.";
|
THROW_IE_EXCEPTION << "Layer '" << layer.insData.size() << "' has invalid inputs number. 5 is expected.";
|
||||||
}
|
}
|
||||||
|
|
||||||
// CNNNetworkHelper::invertFakeQuantize(layer);
|
|
||||||
|
|
||||||
// FakeQuantize on weights are used without dequantization ScaleShifts
|
// FakeQuantize on weights are used without dequantization ScaleShifts
|
||||||
const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer);
|
const bool onWeights = CNNNetworkHelper::onConstWeightsPath(layer) && CNNNetworkHelper::onWeights(layer);
|
||||||
if (onWeights) {
|
if (onWeights) {
|
||||||
@ -77,8 +75,8 @@ void FakeQuantizeTransformation::transform(TransformationContext& context, CNNLa
|
|||||||
printDequantizationValues(dequantizationScales, dequantizationShifts);
|
printDequantizationValues(dequantizationScales, dequantizationShifts);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
CNNNetworkHelper::updateBlobs(layer, 3, dataPrecision.min);
|
CNNNetworkHelper::updateBlobs(context, layer, 3, dataPrecision.min);
|
||||||
CNNNetworkHelper::updateBlobs(layer, 4, dataPrecision.max);
|
CNNNetworkHelper::updateBlobs(context, layer, 4, dataPrecision.max);
|
||||||
|
|
||||||
if (updatePrecisions) {
|
if (updatePrecisions) {
|
||||||
CNNNetworkHelper::setOutDataPrecision(layer, dataPrecision.precision);
|
CNNNetworkHelper::setOutDataPrecision(layer, dataPrecision.precision);
|
||||||
|
@ -135,6 +135,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
|
|||||||
}
|
}
|
||||||
|
|
||||||
fillDequantizationsForWeightsPath(
|
fillDequantizationsForWeightsPath(
|
||||||
|
context,
|
||||||
fullyConnected,
|
fullyConnected,
|
||||||
supportAsymmetricQuantization,
|
supportAsymmetricQuantization,
|
||||||
originalWeightsDequantizationScales,
|
originalWeightsDequantizationScales,
|
||||||
|
@ -183,54 +183,6 @@ Blob::Ptr CNNNetworkHelper::makeNewBlobPtr(const TensorDesc& desc) {
|
|||||||
return newBlob;
|
return newBlob;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, float value) {
|
|
||||||
const auto existingBlobIt = layer.blobs.find(blobName);
|
|
||||||
if (existingBlobIt == layer.blobs.end()) {
|
|
||||||
THROW_IE_EXCEPTION << "blob '" << blobName << "' was not found in layer " << layer.name;
|
|
||||||
}
|
|
||||||
const auto& existingBlobTensorDesc = existingBlobIt->second->getTensorDesc();
|
|
||||||
Blob::Ptr newBlob = makeNewBlobPtr(existingBlobTensorDesc);
|
|
||||||
|
|
||||||
newBlob->allocate();
|
|
||||||
fillBlobByFP32(newBlob, value);
|
|
||||||
layer.blobs[existingBlobIt->first] = newBlob;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CNNNetworkHelper::invertFakeQuantize(const CNNLayer& fakeQuantize) {
|
|
||||||
if (fakeQuantize.type != "FakeQuantize") {
|
|
||||||
THROW_IE_EXCEPTION << "invalid layer type " << fakeQuantize.type;
|
|
||||||
}
|
|
||||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(fakeQuantize);
|
|
||||||
const size_t valuesCount =
|
|
||||||
std::max(quantizationDetails.inputLowValues.size(), quantizationDetails.outputLowValues.size());
|
|
||||||
std::vector<float> inputLowValues(valuesCount);
|
|
||||||
std::vector<float> inputHightValues(valuesCount);
|
|
||||||
std::vector<float> outputLowValues(valuesCount);
|
|
||||||
std::vector<float> outputHighValues(valuesCount);
|
|
||||||
bool wasInverted = false;
|
|
||||||
for (size_t i = 0ul; i < valuesCount; ++i) {
|
|
||||||
if ((quantizationDetails.getInputLowValue(i) > quantizationDetails.getInputHighValue(i)) &&
|
|
||||||
(quantizationDetails.getOutputLowValue(i) > quantizationDetails.getOutputHighValue(i))) {
|
|
||||||
inputLowValues[i] = quantizationDetails.getInputHighValue(i);
|
|
||||||
inputHightValues[i] = quantizationDetails.getInputLowValue(i);
|
|
||||||
outputLowValues[i] = quantizationDetails.getOutputHighValue(i);
|
|
||||||
outputHighValues[i] = quantizationDetails.getOutputLowValue(i);
|
|
||||||
wasInverted = true;
|
|
||||||
} else {
|
|
||||||
inputLowValues[i] = quantizationDetails.getInputLowValue(i);
|
|
||||||
inputHightValues[i] = quantizationDetails.getInputHighValue(i);
|
|
||||||
outputLowValues[i] = quantizationDetails.getOutputLowValue(i);
|
|
||||||
outputHighValues[i] = quantizationDetails.getOutputHighValue(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (wasInverted) {
|
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 1, inputLowValues);
|
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 2, inputHightValues);
|
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 3, outputLowValues);
|
|
||||||
CNNNetworkHelper::updateBlobs(fakeQuantize, 4, outputHighValues);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex,
|
void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayerIndex,
|
||||||
const std::vector<float>& values) {
|
const std::vector<float>& values) {
|
||||||
CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
|
CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
|
||||||
@ -288,6 +240,25 @@ void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayer
|
|||||||
fillBlobByFP32(newBlob, values.data());
|
fillBlobByFP32(newBlob, values.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CNNNetworkHelper::updateBlobs(
|
||||||
|
TransformationContext& context,
|
||||||
|
const CNNLayer& quantizeLayer,
|
||||||
|
int constLayerIndex,
|
||||||
|
const std::vector<float>& values) {
|
||||||
|
CNNLayerPtr blobLayer = CNNNetworkHelper::getParent(quantizeLayer, constLayerIndex);
|
||||||
|
if (blobLayer == nullptr) {
|
||||||
|
THROW_IE_EXCEPTION << "layer is absent";
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto existingBlobIt = blobLayer->blobs.find("custom");
|
||||||
|
if (existingBlobIt == blobLayer->blobs.end()) {
|
||||||
|
THROW_IE_EXCEPTION << "custom blob was not found ";
|
||||||
|
}
|
||||||
|
|
||||||
|
blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex);
|
||||||
|
updateBlobs(quantizeLayer, constLayerIndex, values);
|
||||||
|
}
|
||||||
|
|
||||||
void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values) {
|
void CNNNetworkHelper::updateBlobs(CNNLayer& layer, const std::string& blobName, const std::vector<float>& values) {
|
||||||
const auto existingBlobIt = layer.blobs.find(blobName);
|
const auto existingBlobIt = layer.blobs.find(blobName);
|
||||||
if (existingBlobIt == layer.blobs.end()) {
|
if (existingBlobIt == layer.blobs.end()) {
|
||||||
@ -377,6 +348,96 @@ void CNNNetworkHelper::updateBlobs(const CNNLayer& quantizeLayer, int constLayer
|
|||||||
blobLayer->blobs[existingBlobIt->first] = newBlob;
|
blobLayer->blobs[existingBlobIt->first] = newBlob;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CNNNetworkHelper::updateBlobs(TransformationContext& context, const CNNLayer& quantizeLayer, int constLayerIndex, float value) {
|
||||||
|
auto inData = quantizeLayer.insData[constLayerIndex].lock();
|
||||||
|
if (inData == nullptr) {
|
||||||
|
THROW_IE_EXCEPTION << "data is absent";
|
||||||
|
}
|
||||||
|
|
||||||
|
CNNLayerPtr blobLayer = getCreatorLayer(inData).lock();
|
||||||
|
if (blobLayer == nullptr) {
|
||||||
|
THROW_IE_EXCEPTION << "layer is absent";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (blobLayer->blobs.size() != 1) {
|
||||||
|
THROW_IE_EXCEPTION << "unexpected blobs size";
|
||||||
|
}
|
||||||
|
|
||||||
|
blobLayer = copyConstant(context, quantizeLayer, blobLayer, constLayerIndex);
|
||||||
|
updateBlobs(quantizeLayer, constLayerIndex, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
CNNLayerPtr CNNNetworkHelper::copyConstant(
|
||||||
|
TransformationContext& context,
|
||||||
|
const CNNLayer& quantizeLayer,
|
||||||
|
const CNNLayerPtr& blobLayer,
|
||||||
|
const size_t constLayerIndex) {
|
||||||
|
size_t repeatsCount = 0ul;
|
||||||
|
for (size_t i = 0; i < quantizeLayer.insData.size(); ++i) {
|
||||||
|
auto parentInData = quantizeLayer.insData[i].lock();
|
||||||
|
if (parentInData == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto quantizeLayerParent = getCreatorLayer(parentInData).lock();
|
||||||
|
if (quantizeLayerParent == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (quantizeLayerParent->name == blobLayer->name) {
|
||||||
|
repeatsCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (repeatsCount < 2ul) {
|
||||||
|
return blobLayer;
|
||||||
|
}
|
||||||
|
|
||||||
|
details::CNNNetworkImpl* networkImpl = dynamic_cast<details::CNNNetworkImpl*>(&context.network);
|
||||||
|
if (networkImpl == nullptr) {
|
||||||
|
THROW_IE_EXCEPTION << "Unexpected network type";
|
||||||
|
}
|
||||||
|
|
||||||
|
const DataPtr outData = blobLayer->outData[0];
|
||||||
|
const std::map<std::string, CNNLayerPtr>& inputTo = getInputTo(outData);
|
||||||
|
const auto quantizeLayerIt = inputTo.find(quantizeLayer.name);
|
||||||
|
if (quantizeLayerIt == inputTo.end()) {
|
||||||
|
THROW_IE_EXCEPTION << "Layer was not found";
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto blobIt = blobLayer->blobs.find("custom");
|
||||||
|
if (blobIt == blobLayer->blobs.end()) {
|
||||||
|
THROW_IE_EXCEPTION << "Blob was not found";
|
||||||
|
}
|
||||||
|
|
||||||
|
const Blob::Ptr blob = blobIt->second;
|
||||||
|
Blob::Ptr newBlob = makeNewBlobPtr(blob->getTensorDesc());
|
||||||
|
newBlob->allocate();
|
||||||
|
|
||||||
|
const std::shared_ptr<float> blobValues = CNNNetworkHelper::getFloatData(blob);
|
||||||
|
fillBlobByFP32(newBlob, blobValues.get());
|
||||||
|
|
||||||
|
auto newBlobValues = CNNNetworkHelper::getFloatData(newBlob);
|
||||||
|
|
||||||
|
const std::string layerName = blobLayer->name + "/new" + std::to_string(repeatsCount);
|
||||||
|
CNNLayerPtr newBlobLayer = CNNLayerPtr(new CNNLayer({ layerName, "Const", blob->getTensorDesc().getPrecision() }));
|
||||||
|
newBlobLayer->blobs.emplace("custom", newBlob);
|
||||||
|
|
||||||
|
const TensorDesc& tensorDesc = blobLayer->outData[0]->getTensorDesc();
|
||||||
|
DataPtr newEdgeAfterLayer(new Data(newBlobLayer->name, tensorDesc));
|
||||||
|
newEdgeAfterLayer->setName(newBlobLayer->name);
|
||||||
|
newEdgeAfterLayer->setPrecision(blob->getTensorDesc().getPrecision());
|
||||||
|
quantizeLayerIt->second->insData[constLayerIndex] = newEdgeAfterLayer;
|
||||||
|
getInputTo(newEdgeAfterLayer)[quantizeLayer.name] = quantizeLayerIt->second;
|
||||||
|
|
||||||
|
getCreatorLayer(newEdgeAfterLayer) = newBlobLayer;
|
||||||
|
newBlobLayer->outData.push_back(newEdgeAfterLayer);
|
||||||
|
|
||||||
|
CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&context.network);
|
||||||
|
netImpl->addData(newBlobLayer->name.c_str(), newEdgeAfterLayer);
|
||||||
|
netImpl->addLayer(newBlobLayer);
|
||||||
|
|
||||||
|
return newBlobLayer;
|
||||||
|
}
|
||||||
|
|
||||||
int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) {
|
int CNNNetworkHelper::onWeightsInDepth(const CNNLayer& layer) {
|
||||||
const std::vector<CNNLayerPtr> children = getChildren(layer);
|
const std::vector<CNNLayerPtr> children = getChildren(layer);
|
||||||
for (const CNNLayerPtr& child : children) {
|
for (const CNNLayerPtr& child : children) {
|
||||||
|
@ -250,14 +250,14 @@ void WeightableLayerTransformation::updateLayerBiasesFcSpecific(
|
|||||||
CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
|
CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
|
||||||
}
|
}
|
||||||
|
|
||||||
void WeightableLayerTransformation::updateWeights(const CNNLayerPtr parent, std::vector<float>& outputLowValues,
|
void WeightableLayerTransformation::updateWeights(TransformationContext& context, const CNNLayerPtr parent, std::vector<float>& outputLowValues,
|
||||||
std::vector<float>& outputHighValues) const {
|
std::vector<float>& outputHighValues) const {
|
||||||
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parent);
|
const QuantizationDetails quantizationDetails = QuantizationDetails::getDetails(*parent);
|
||||||
// TODO: refactor: move to standalone method
|
// TODO: refactor: move to standalone method
|
||||||
switch (quantizedTensorAlignmentOnWeights) {
|
switch (quantizedTensorAlignmentOnWeights) {
|
||||||
case LayerTransformation::QuantizedTensorAlignment::None: {
|
case LayerTransformation::QuantizedTensorAlignment::None: {
|
||||||
CNNNetworkHelper::updateBlobs(*parent, 3, outputLowValues);
|
CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues);
|
||||||
CNNNetworkHelper::updateBlobs(*parent, 4, outputHighValues);
|
CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals:
|
case LayerTransformation::QuantizedTensorAlignment::UpdateIntervals:
|
||||||
@ -300,10 +300,10 @@ void WeightableLayerTransformation::updateWeights(const CNNLayerPtr parent, std:
|
|||||||
outputHighValues[i] = roundf(outputHighValues[i] * maxK);
|
outputHighValues[i] = roundf(outputHighValues[i] * maxK);
|
||||||
}
|
}
|
||||||
|
|
||||||
CNNNetworkHelper::updateBlobs(*parent, 1, inputLowValues);
|
CNNNetworkHelper::updateBlobs(context, *parent, 1, inputLowValues);
|
||||||
CNNNetworkHelper::updateBlobs(*parent, 2, inputHighValues);
|
CNNNetworkHelper::updateBlobs(context, *parent, 2, inputHighValues);
|
||||||
CNNNetworkHelper::updateBlobs(*parent, 3, outputLowValues);
|
CNNNetworkHelper::updateBlobs(context, *parent, 3, outputLowValues);
|
||||||
CNNNetworkHelper::updateBlobs(*parent, 4, outputHighValues);
|
CNNNetworkHelper::updateBlobs(context, *parent, 4, outputHighValues);
|
||||||
|
|
||||||
const size_t levels = static_cast<size_t>(roundf(minOutputIntervalLowValue + maxOutputIntervalHighValue + 1.0));
|
const size_t levels = static_cast<size_t>(roundf(minOutputIntervalLowValue + maxOutputIntervalHighValue + 1.0));
|
||||||
parent->params["levels"] = std::to_string(levels);
|
parent->params["levels"] = std::to_string(levels);
|
||||||
@ -411,6 +411,7 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
|
|||||||
}
|
}
|
||||||
|
|
||||||
DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
|
DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
|
||||||
|
TransformationContext& context,
|
||||||
const CNNLayer& weightableLayer,
|
const CNNLayer& weightableLayer,
|
||||||
const bool supportAsymmetricQuantization,
|
const bool supportAsymmetricQuantization,
|
||||||
std::vector<float>& dequantizationScales,
|
std::vector<float>& dequantizationScales,
|
||||||
@ -461,7 +462,7 @@ DataPrecision WeightableLayerTransformation::fillDequantizationsForWeightsPath(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
updateWeights(parent, outputLowValues, outputHighValues);
|
updateWeights(context, parent, outputLowValues, outputHighValues);
|
||||||
return dataPrecision;
|
return dataPrecision;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,8 +11,7 @@ using namespace LayerTestsDefinitions;
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
const std::vector<InferenceEngine::Precision> netPrecisions = {
|
||||||
InferenceEngine::Precision::FP32,
|
InferenceEngine::Precision::FP32
|
||||||
InferenceEngine::Precision::FP16
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
|
const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues> params = {
|
||||||
@ -22,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
|
||||||
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
|
||||||
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
|
||||||
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
|
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
::testing::ValuesIn(netPrecisions),
|
::testing::ValuesIn(netPrecisions),
|
||||||
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
|
::testing::ValuesIn(inputShapes),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
::testing::Values(CommonTestUtils::DEVICE_CPU),
|
||||||
::testing::ValuesIn(params)),
|
::testing::ValuesIn(params)),
|
||||||
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
|
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
|
||||||
|
@ -21,10 +21,15 @@ const std::vector<LayerTestsDefinitions::MatMulWithOptimizedConstantFakeQuantize
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const std::vector<std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>> inputShapes = {
|
||||||
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 10, 16 }) }),
|
||||||
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>({ InferenceEngine::SizeVector({ 1, 16 }), InferenceEngine::SizeVector({ 16, 10 }) })
|
||||||
|
};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
|
INSTANTIATE_TEST_CASE_P(LPT, MatMulWithOptimizedConstantFakeQuantizeTransformation,
|
||||||
::testing::Combine(
|
::testing::Combine(
|
||||||
::testing::ValuesIn(netPrecisions),
|
::testing::ValuesIn(netPrecisions),
|
||||||
::testing::Values(InferenceEngine::SizeVector({ 1, 16 })),
|
::testing::ValuesIn(inputShapes),
|
||||||
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
::testing::Values(CommonTestUtils::DEVICE_GPU),
|
||||||
::testing::ValuesIn(params)),
|
::testing::ValuesIn(params)),
|
||||||
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
|
MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName);
|
||||||
|
@ -20,7 +20,7 @@ public:
|
|||||||
|
|
||||||
typedef std::tuple<
|
typedef std::tuple<
|
||||||
InferenceEngine::Precision,
|
InferenceEngine::Precision,
|
||||||
InferenceEngine::SizeVector,
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector>,
|
||||||
std::string,
|
std::string,
|
||||||
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
|
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues
|
||||||
> MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;
|
> MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams;
|
||||||
|
@ -23,16 +23,16 @@ namespace LayerTestsDefinitions {
|
|||||||
std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
|
std::string MatMulWithOptimizedConstantFakeQuantizeTransformation::getTestCaseName(
|
||||||
testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
|
testing::TestParamInfo<MatMulWithOptimizedConstantFakeQuantizeTransformationTransformationParams> obj) {
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
InferenceEngine::SizeVector inputShape;
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
|
||||||
std::string targetDevice;
|
std::string targetDevice;
|
||||||
InferenceEngine::details::LayerTransformation::Params params;
|
InferenceEngine::details::LayerTransformation::Params params;
|
||||||
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
|
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
|
||||||
|
|
||||||
std::tie(netPrecision, inputShape, targetDevice, param) = obj.param;
|
std::tie(netPrecision, shapes, targetDevice, param) = obj.param;
|
||||||
|
|
||||||
std::ostringstream result;
|
std::ostringstream result;
|
||||||
result << netPrecision.name() << "_" <<
|
result << netPrecision.name() << "_" <<
|
||||||
CommonTestUtils::vec2str(inputShape) << "_" <<
|
CommonTestUtils::vec2str(shapes.first) << "_" << CommonTestUtils::vec2str(shapes.second) << "_" <<
|
||||||
targetDevice << "_" <<
|
targetDevice << "_" <<
|
||||||
param.fqOnData << "_" <<
|
param.fqOnData << "_" <<
|
||||||
param.fqOnWeights;
|
param.fqOnWeights;
|
||||||
@ -43,15 +43,16 @@ void MatMulWithOptimizedConstantFakeQuantizeTransformation::SetUp() {
|
|||||||
threshold = 0.01f;
|
threshold = 0.01f;
|
||||||
|
|
||||||
InferenceEngine::Precision netPrecision;
|
InferenceEngine::Precision netPrecision;
|
||||||
InferenceEngine::SizeVector inputShape;
|
std::pair<InferenceEngine::SizeVector, InferenceEngine::SizeVector> shapes;
|
||||||
InferenceEngine::details::LayerTransformation::Params params;
|
InferenceEngine::details::LayerTransformation::Params params;
|
||||||
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
|
MatMulWithOptimizedConstantFakeQuantizeTransformationTestValues param;
|
||||||
std::tie(netPrecision, inputShape, targetDevice, param) = this->GetParam();
|
std::tie(netPrecision, shapes, targetDevice, param) = this->GetParam();
|
||||||
auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
auto precision = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
|
||||||
|
|
||||||
function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
|
function = ngraph::builder::subgraph::MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
|
||||||
precision,
|
precision,
|
||||||
inputShape,
|
shapes.first,
|
||||||
|
shapes.second,
|
||||||
param.fqOnData,
|
param.fqOnData,
|
||||||
param.fqOnWeights);
|
param.fqOnWeights);
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,8 @@ class MatMulWithOptimizedConstantFakeQuantizeFunction {
|
|||||||
public:
|
public:
|
||||||
static std::shared_ptr<ngraph::Function> getOriginal(
|
static std::shared_ptr<ngraph::Function> getOriginal(
|
||||||
const ngraph::element::Type precision,
|
const ngraph::element::Type precision,
|
||||||
const ngraph::Shape& inputShape,
|
const ngraph::Shape& inputShape1,
|
||||||
|
const ngraph::Shape& inputShape2,
|
||||||
const FakeQuantizeOnData& fqOnData,
|
const FakeQuantizeOnData& fqOnData,
|
||||||
const FakeQuantizeOnData& fqOnWeights);
|
const FakeQuantizeOnData& fqOnWeights);
|
||||||
};
|
};
|
||||||
|
@ -13,34 +13,41 @@ namespace subgraph {
|
|||||||
|
|
||||||
std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
|
std::shared_ptr<ngraph::Function> MatMulWithOptimizedConstantFakeQuantizeFunction::getOriginal(
|
||||||
const ngraph::element::Type precision,
|
const ngraph::element::Type precision,
|
||||||
const ngraph::Shape& inputShape,
|
const ngraph::Shape& inputShape1,
|
||||||
|
const ngraph::Shape& inputShape2,
|
||||||
const FakeQuantizeOnData& fqOnData,
|
const FakeQuantizeOnData& fqOnData,
|
||||||
const FakeQuantizeOnData& fqOnWeights) {
|
const FakeQuantizeOnData& fqOnWeights) {
|
||||||
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape));
|
const auto input = std::make_shared<ngraph::opset1::Parameter>(precision, ngraph::Shape(inputShape1));
|
||||||
const auto fakeQuantizeOnActivations = fqOnData.empty() ?
|
|
||||||
nullptr :
|
|
||||||
ngraph::builder::makeFakeQuantize(
|
|
||||||
input, precision, fqOnData.quantizationLevel, fqOnData.constantShape,
|
|
||||||
fqOnData.inputLowValues, fqOnData.inputHighValues, fqOnData.outputLowValues, fqOnData.outputHighValues);
|
|
||||||
|
|
||||||
const ngraph::Shape weightsShape = { inputShape[1], 10 };
|
const auto lowConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputLowValues);
|
||||||
|
const auto highConstantOnActivations = std::make_shared<ngraph::opset1::Constant>(precision, fqOnData.constantShape, fqOnData.inputHighValues);
|
||||||
|
const auto fakeQuantizeOnActivations = std::make_shared<ngraph::opset1::FakeQuantize>(
|
||||||
|
input,
|
||||||
|
lowConstantOnActivations,
|
||||||
|
highConstantOnActivations,
|
||||||
|
lowConstantOnActivations,
|
||||||
|
highConstantOnActivations,
|
||||||
|
fqOnWeights.quantizationLevel);
|
||||||
|
|
||||||
|
const ngraph::Shape weightsShape = { inputShape2[0], inputShape1[1] };
|
||||||
const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
|
const std::vector<float> weigths(weightsShape[0] * weightsShape[1], 10.f);
|
||||||
|
|
||||||
const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
|
const auto weightsConst = std::make_shared<ngraph::opset1::Constant>(precision, weightsShape, weigths);
|
||||||
const auto lowConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
|
const auto lowConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputLowValues);
|
||||||
const auto highConstant = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
|
const auto highConstantOnWeights = std::make_shared<ngraph::opset1::Constant>(precision, fqOnWeights.constantShape, fqOnWeights.inputHighValues);
|
||||||
const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
|
const auto fakeQuantizeOnWeights = std::make_shared<ngraph::opset1::FakeQuantize>(
|
||||||
weightsConst,
|
weightsConst,
|
||||||
lowConstant,
|
lowConstantOnWeights,
|
||||||
highConstant,
|
highConstantOnWeights,
|
||||||
lowConstant,
|
lowConstantOnWeights,
|
||||||
highConstant,
|
highConstantOnWeights,
|
||||||
fqOnWeights.quantizationLevel);
|
fqOnWeights.quantizationLevel);
|
||||||
|
|
||||||
const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
|
const auto matMul = std::make_shared<ngraph::opset1::MatMul>(
|
||||||
fqOnData.empty() ? input : fakeQuantizeOnActivations,
|
fakeQuantizeOnActivations,
|
||||||
fakeQuantizeOnWeights,
|
fakeQuantizeOnWeights,
|
||||||
false,
|
false,
|
||||||
false);
|
inputShape1[1] != inputShape2[0]);
|
||||||
|
|
||||||
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
|
ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(matMul) };
|
||||||
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");
|
return std::make_shared<ngraph::Function>(results, ngraph::ParameterVector{ input }, "MatMulWithOptimizedConstantFakeQuantizeFunction");
|
||||||
|
Loading…
Reference in New Issue
Block a user