[IE COMMON] [LPT] Support 3D layout for FullyConnected transformation

This commit is contained in:
Edward Shogulin 2020-05-30 12:57:36 +03:00 committed by Alexander Peskov
parent bc98d17121
commit e544dd1e28
13 changed files with 501 additions and 111 deletions

View File

@ -18,6 +18,7 @@ class INFERENCE_ENGINE_API_CLASS(FullyConnectedTransformation) : public Weightab
public:
FullyConnectedTransformation(const Params& params) : WeightableLayerTransformation(params) {}
~FullyConnectedTransformation() override {};
bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
void transform(TransformationContext& context, CNNLayer& layer) const override;
private:

View File

@ -89,7 +89,9 @@ public:
static Blob::Ptr getBlob(CNNLayerPtr layer, const std::string& blobName);
static Blob::Ptr getBlob(CNNLayer* layer, const std::string& blobName);
static Blob::Ptr getBlob(const CNNLayer* layer, const std::string& blobName);
static bool blobValuesAreEqual(const CNNLayer& layer, const std::string& blobName);
static std::shared_ptr<float> getFloatData(const CNNLayerPtr& layer, const std::string& blobName);

View File

@ -33,6 +33,7 @@ protected:
void updateLayerBiases(
TransformationContext& context,
const CNNLayer& convolution,
const bool biasesDimsAsOutput,
std::vector<float>& dequantizationScales,
std::vector<float>& dequantizationShifts,
std::vector<float>& biasesShifts) const;

View File

@ -180,7 +180,7 @@ void ConvolutionTransformation::transform(TransformationContext& context, CNNLay
if (this->updateBiases) {
std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
updateLayerBiases(context, layer, dequantizationScales, dequantizationShifts, biasesShifts);
updateLayerBiases(context, layer, false, dequantizationScales, dequantizationShifts, biasesShifts);
}
CNNNetworkHelper::removeLayer(context.network, scaleShiftOnData);

View File

@ -25,8 +25,72 @@
using namespace InferenceEngine;
using namespace InferenceEngine::details;
void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
bool getDequantizationValuesAreBroadcasted(const CNNLayer& fullyConnected) {
const DataPtr inputData = fullyConnected.insData[0].lock();
if (inputData == nullptr) {
THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
}
return inputData->getDims().size() == 3ul;
}
bool FullyConnectedTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& fullyConnected) const {
if (!WeightableLayerTransformation::canBeTransformed(context, fullyConnected)) {
return false;
}
const DataPtr inputData = fullyConnected.insData[0].lock();
if (inputData == nullptr) {
return false;
}
const std::vector<size_t> inTensorDims = inputData->getDims();
if ((inTensorDims.size() != 2) && (inTensorDims.size() != 3)) {
return false;
}
const DataPtr outputData = fullyConnected.outData[0];
if (outputData == nullptr) {
return false;
}
const std::vector<size_t> outTensorDims = outputData->getTensorDesc().getDims();
if (inTensorDims.size() != outTensorDims.size()) {
return false;
}
if (inTensorDims[0] != outTensorDims[0]) {
return false;
}
CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
if (scaleShift->type != "ScaleShift") {
return false;
}
// 3D tensor custom validation
if ((inTensorDims.size() == 3ul) &&
((!CNNNetworkHelper::blobValuesAreEqual(*scaleShift, "weights")) || (!CNNNetworkHelper::blobValuesAreEqual(*scaleShift, "biases")))) {
return false;
}
const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
const size_t prevDequantizationScaleBlobSize = prevDequantizationScaleBlob->size();
if (prevDequantizationScaleBlobSize != inTensorDims[1]) {
return false;
}
const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
const size_t prevDequantizationShiftBlobSize = prevDequantizationShiftBlob->size();
if (prevDequantizationShiftBlobSize != inTensorDims[1]) {
return false;
}
return true;
}
void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
if (!canBeTransformed(context, fullyConnected)) {
return;
}
@ -146,7 +210,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
}
if (this->updateBiases) {
updateLayerBiases(context, fullyConnected, dequantizationScales, dequantizationShifts, biasesShifts);
updateLayerBiases(context, fullyConnected, fullyConnected.type == "GEMM", dequantizationScales, dequantizationShifts, biasesShifts);
}
if ((parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) {
@ -205,23 +269,14 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
const DataPtr inputData = fullyConnected.insData[0].lock();
if (inputData == nullptr) {
THROW_IE_EXCEPTION << "input data is absent for layer " << fullyConnected.name;
}
const Layout inputLayout = inputData->getLayout();
if (inputLayout != Layout::NC) {
THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
}
const DataPtr insData = fullyConnected.insData[0].lock();
if (insData == nullptr) {
THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
const DataPtr outputData = fullyConnected.outData[0];
if (outputData == nullptr) {
THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
}
const size_t inputChannelsCount = insData->getDims()[1];
const Layout outputLayout = fullyConnected.outData[0]->getLayout();
if (outputLayout != Layout::NC) {
THROW_IE_EXCEPTION << "Unexpected output layout " << outputLayout;
}
const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
dequantizationScales.resize(outputChannelsCount);
dequantizationShifts.resize(outputChannelsCount);
@ -232,38 +287,45 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
}
const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(prevDequantizationScaleBlob);
const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(prevDequantizationShiftBlob);
const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
const float prevDequantizationScale = prevDequantizationScaleBuffer.get()[0];
const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
for (size_t i = 0; i < outputChannelsCount; ++i) {
dequantizationScales[i] = prevDequantizationScale *
dequantizationScales[i] =
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
(originalWeightsDequantizationScales.size() == 0 ?
1.0 :
(originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
}
const DataPtr insData = fullyConnected.insData[0].lock();
if (insData == nullptr) {
THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
}
const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1];
for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
float sum = 0.0;
const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
1.0 :
(originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) {
const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel];
sum += w * prevDequantizationShiftBuffer.get()[inputChannel] * weightsDequantizationScale;
const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[inputChannel];
sum += w * shift * weightsDequantizationScale;
}
dequantizationShifts[channel] = biasesBuffer == nullptr ?
sum :
(sum + biasesBuffer.get()[channel] - prevDequantizationScale * biasesBuffer.get()[channel] * weightsDequantizationScale);
(sum + biasesBuffer.get()[channel] -
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
biasesBuffer.get()[channel] * weightsDequantizationScale);
biasesShifts[channel] = sum;
}
}
@ -276,69 +338,63 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric(
std::vector<float>& dequantizationShifts) const {
const DataPtr inputData = fullyConnected.insData[0].lock();
if (inputData == nullptr) {
THROW_IE_EXCEPTION << "input data is absent for layer " << fullyConnected.name;
THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
}
const Layout inputLayout = inputData->getLayout();
if (inputLayout != Layout::NC) {
THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
}
const DataPtr insData = fullyConnected.insData[0].lock();
if (insData == nullptr) {
THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data is absent";
}
const size_t inputChannelsCount = insData->getDims()[1];
// const Layout inputLayout = inputData->getLayout();
// if (inputLayout != Layout::NC) {
// THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
// }
const size_t inputChannelsCount = inputData->getDims()[1];
const Layout outputLayout = fullyConnected.outData[0]->getLayout();
if (outputLayout != Layout::NC) {
THROW_IE_EXCEPTION << "Unexpected output layout " << outputLayout;
const DataPtr outputData = fullyConnected.outData[0];
if (outputData == nullptr) {
THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
}
const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
dequantizationScales.resize(outputChannelsCount);
dequantizationShifts.resize(outputChannelsCount);
CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
if (scaleShift->type != "ScaleShift") {
THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
}
const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(prevDequantizationScaleBlob);
const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(prevDequantizationShiftBlob);
const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
const size_t outputChannelsCount = outputData->getDims()[1];
dequantizationScales.resize(outputChannelsCount);
dequantizationShifts.resize(outputChannelsCount);
const std::shared_ptr<float> prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
for (size_t i = 0; i < outputChannelsCount; ++i) {
dequantizationScales[i] =
prevDequantizationScaleBuffer.get()[0] *
(originalWeightsDequantizationScales.size() == 0
? 1.0
: (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0]
: originalWeightsDequantizationScales[i]));
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
(originalWeightsDequantizationScales.size() == 0 ?
1.0 :
(originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
}
const auto weightsBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues));
const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected));
const std::shared_ptr<float> prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
float sum1 = 0.0;
float sum2 = 0.0;
const float weightsDequantizationScale =
originalWeightsDequantizationScales.size() == 0
? 1.0
: (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0]
: originalWeightsDequantizationScales[channel]);
const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
1.0 :
((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
for (size_t w = 0; w < inputChannelsCount; ++w) {
const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w];
sum1 += kernel * prevDequantizationShiftBuffer.get()[channel] * weightsDequantizationScale;
const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[channel];
sum1 += kernel * shift * weightsDequantizationScale;
sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale;
}
dequantizationShifts[channel] = biasesBuffer == nullptr
? sum1
: (sum1 + biasesBuffer.get()[channel] -
prevDequantizationScaleBuffer.get()[channel] *
biasesBuffer.get()[channel] * weightsDequantizationScale);
dequantizationShifts[channel] = biasesBuffer == nullptr ?
sum1 :
(sum1 + biasesBuffer.get()[channel] -
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
biasesBuffer.get()[channel] * weightsDequantizationScale);
}
}

View File

@ -582,18 +582,42 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::getLayers(const CNNLayer& parent, con
return layers;
}
Blob::Ptr CNNNetworkHelper::getBlob(CNNLayer* layer, const std::string& blobName) {
Blob::Ptr CNNNetworkHelper::getBlob(const CNNLayer* layer, const std::string& blobName) {
if (layer == nullptr) {
THROW_IE_EXCEPTION << "layer is nullable";
}
if (layer->blobs.empty()) {
THROW_IE_EXCEPTION << "Layer '" << layer->name << "' does not have any blob";
if (blobName.empty()) {
if (layer->blobs.empty()) {
THROW_IE_LPT_EXCEPTION(*layer) << "does not have any blob";
}
if (layer->blobs.size() != 1) {
THROW_IE_LPT_EXCEPTION(*layer) << "there are several blobs";
}
return layer->blobs.begin()->second;
}
if (blobName.empty() && (layer->blobs.size() != 1)) {
THROW_IE_EXCEPTION << "several blobs";
const auto it = layer->blobs.find(blobName);
if (it == layer->blobs.end()) {
THROW_IE_LPT_EXCEPTION(*layer) << " does not have blob " << blobName;
}
Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
return blob;
return it->second;
}
bool CNNNetworkHelper::blobValuesAreEqual(const CNNLayer& layer, const std::string& blobName) {
const Blob::Ptr blob = CNNNetworkHelper::getBlob(&layer, blobName);
const std::shared_ptr<float> buffer = CNNNetworkHelper::getFloatData(blob);
if (!std::equal(
buffer.get(),
buffer.get() + blob->size(),
buffer.get(),
[](const float value1, const float value2) { return value1 == value2; })) {
return false;
}
return true;
}
Blob::Ptr CNNNetworkHelper::getBlob(CNNLayerPtr layer, const std::string& blobName) {
@ -1086,8 +1110,12 @@ CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& contex
CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
const std::vector<size_t> dims = outData->getDims();
if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
// TODO: just to test
if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
}
}
addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network);

View File

@ -115,7 +115,7 @@ void ScaleShiftToConvolutionTransformation::transform(TransformationContext& con
if (this->updateBiases) {
std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
updateLayerBiases(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts, biasesShifts);
updateLayerBiases(context, *convolutionLayerPtr, false, dequantizationScales, dequantizationShifts, biasesShifts);
}
addDequantizationLayer(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts);

View File

@ -3,7 +3,6 @@
//
#include "low_precision_transformations/weightable_layer_transformation.hpp"
#include "low_precision_transformations/network_helper.hpp"
#include <algorithm>
#include <details/caseless.hpp>
@ -11,6 +10,9 @@
#include <string>
#include <vector>
#include "low_precision_transformations/common/ie_lpt_exception.hpp"
#include "low_precision_transformations/network_helper.hpp"
using namespace InferenceEngine;
using namespace InferenceEngine::details;
@ -123,50 +125,72 @@ bool WeightableLayerTransformation::isPrecisionPreserved(const CNNLayer& layer)
return false;
}
void WeightableLayerTransformation::updateLayerBiases(
TransformationContext& context,
const CNNLayer& convolution,
const CNNLayer& weightableLayer,
const bool biasesDimsAsOutput,
std::vector<float>& dequantizationScales,
std::vector<float>& dequantizationShifts,
std::vector<float>& biasesShifts) const {
if (!std::all_of(dequantizationShifts.begin(), dequantizationShifts.end(), [](float value) { return value == 0.0; })) {
const DataPtr insData = weightableLayer.insData[0].lock();
if (insData == nullptr) {
THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
}
const std::vector<size_t> insDataDims = insData->getTensorDesc().getDims();
std::shared_ptr<float> biasesBufferPtr;
Blob::Ptr biasesBlob;
CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(convolution, 2);
CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(weightableLayer, 2);
if (biasesLayer == nullptr) {
const std::vector<size_t> dims = CaselessEq<std::string>()(convolution.type, "Convolution") ?
std::vector<size_t>({ dequantizationShifts.size() }) :
std::vector<size_t>({ 1ul, dequantizationShifts.size() });
const Layout layout = CaselessEq<std::string>()(convolution.type, "Convolution") ? Layout::C : Layout::NC;
if (weightableLayer.outData.size() != 1ul) {
THROW_IE_LPT_EXCEPTION(weightableLayer) << "unexpected output data count " << weightableLayer.outData.size();
}
const DataPtr outData = weightableLayer.outData[0];
const std::vector<size_t> biasesDims = biasesDimsAsOutput ?
outData->getDims() :
std::vector<size_t>({ insDataDims.size() == 3ul ? insDataDims[2] : dequantizationShifts.size() });
const Layout biasesLayout = InferenceEngine::TensorDesc::getLayoutByDims(biasesDims);
biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, dims, layout));
biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, biasesDims, biasesLayout));
biasesBlob->allocate();
biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
float* biasesBuffer = biasesBufferPtr.get();
std::fill(biasesBuffer, biasesBuffer + biasesBlob->size(), 0.f);
LayerParams constLayerParams{ convolution.name + "_Biases", "Const", convolution.outData[0]->getTensorDesc().getPrecision() };
LayerParams biasesLayerParams{ weightableLayer.name + "_Biases", "Const", outData->getTensorDesc().getPrecision() };
biasesLayer = CNNNetworkHelper::addLayer(
context,
nullptr,
std::make_shared<CNNLayer>(convolution),
std::make_shared<CNNLayer>(constLayerParams));
std::make_shared<CNNLayer>(weightableLayer),
std::make_shared<CNNLayer>(biasesLayerParams));
biasesLayer->blobs["custom"] = biasesBlob;
biasesLayer->outData[0]->reshape(dims, layout);
biasesLayer->outData[0]->reshape(biasesDims, biasesLayout);
} else {
biasesBlob = CNNNetworkHelper::getBlob(biasesLayer, "custom");
if (biasesBlob->size() != dequantizationShifts.size()) {
THROW_IE_EXCEPTION << "dequantization shifts size " << dequantizationShifts.size() << " is not equal biases blob size " << biasesBlob->size();
DataPtr insData = weightableLayer.insData[0].lock();
if (insData == nullptr) {
THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
}
if ((insData->getDims().size() != 3) && (biasesBlob->size() != dequantizationShifts.size())) {
THROW_IE_LPT_EXCEPTION(weightableLayer) <<
"dequantization shifts size " << dequantizationShifts.size() <<
" is not equal biases blob size " << biasesBlob->size();
}
biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
}
const float* biasesBuffer = biasesBufferPtr.get();
std::vector<float> biases(biasesBlob->size());
const bool broadcast = insDataDims.size() == 3ul;
for (size_t channel = 0ul; channel < biases.size(); ++channel) {
biases[channel] = (biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
dequantizationShifts[channel] = 0.0;
biases[channel] = broadcast ?
(biasesShifts[0] + biasesBuffer[0]) / dequantizationScales[0] :
(biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
}
std::fill(dequantizationShifts.begin(), dequantizationShifts.end(), 0.f);
CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
}
}
@ -287,10 +311,9 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
THROW_IE_EXCEPTION << "insert data is absent for layer " << child.name;
}
if (insData->getTensorDesc().getLayout() != Layout::NC &&
insData->getTensorDesc().getLayout() != Layout::NCHW &&
insData->getTensorDesc().getLayout() != Layout::NCDHW) {
THROW_IE_EXCEPTION << "unexpected layout '" << insData->getTensorDesc().getLayout() << "' layer " << child.name;
const size_t dimsSize = insData->getDims().size();
if ((dimsSize != 2ul) && (dimsSize != 3ul) && (dimsSize != 4ul) && (dimsSize != 5ul)) {
THROW_IE_EXCEPTION << "unexpected dimensions size " << dimsSize << " layer " << child.type << " " << child.name;
}
LayerParams eltwiseLayerParams {child.name + "_Sub_" + parent.name, "Eltwise", precisionsInfo.original};
@ -312,15 +335,15 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
}
const TensorDesc constTensorDesc = constLayer->outData[0]->getTensorDesc();
if (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout()) {
if ((dimsSize != 3) && (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout())) {
THROW_IE_EXCEPTION << "unexpected Const layer layout " << constTensorDesc.getLayout();
}
const SizeVector& constDims = constTensorDesc.getDims();
if (constDims.size() != insData->getTensorDesc().getDims().size()) {
if ((dimsSize != 3) && (constDims.size() != insData->getTensorDesc().getDims().size())) {
THROW_IE_EXCEPTION << "unexpected dimension size " << constDims.size();
}
SizeVector dims(insData->getTensorDesc().getDims().size(), 1);
SizeVector dims(constLayer->outData[0]->getTensorDesc().getDims().size(), 1);
if (onWeights) {
dims[0] = constDims[0];
} else {

View File

@ -32,7 +32,7 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest3D) {
auto empty_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {0});
auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, empty_bias, ngraph::Shape{1, 128, 786});
auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 786}, {1});
auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {1});
auto add = std::make_shared<ngraph::opset1::Add>(fc, const_bias);
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{add}, ngraph::ParameterVector{input1});
@ -84,4 +84,4 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest2D) {
auto res = compare_functions(f, f_ref);
ASSERT_TRUE(res.first) << res.second;
}
}

View File

@ -35,7 +35,7 @@ INSTANTIATE_TEST_CASE_P(
PowerTestModel::Ptr(new PowerTestModel(1.f, -32.f, 0)),
{ { 1, 3, 299, 299 } },
{ { 1, 3, 299, 299 } }),
SingleLayerTransformationsTestParams(
"CPU",
PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, -64.f)),
@ -60,6 +60,19 @@ INSTANTIATE_TEST_CASE_P(
{ { 1, 2048 } },
{ { 1, 1000 } }),
// TODO: uncomment later
//SingleLayerTransformationsTestParams(
// "MKLDNNPlugin",
// SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 128, 768 })),
// { { 1, 128, 12, 64 } },
// { { 128, 768 } }),
SingleLayerTransformationsTestParams(
"CPU",
SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 1, 128, 768 })),
{ { 1, 128, 12, 64 } },
{ { 1, 128, 768 } }),
SingleLayerTransformationsTestParams(
"CPU",
SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel()),
@ -512,13 +525,21 @@ INSTANTIATE_TEST_CASE_P(
"CPU",
SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(true)),
{ { 1, 32, 112, 112 } },
{ { 1, 32, 112, 112 } }),
{ { 1, 32, 112, 112 } })
SingleLayerTransformationsTestParams(
"CPU",
SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
{ { 1, 32, 112, 112 } },
{ { 1, 100 } })
// TODO: uncomment later
//SingleLayerTransformationsTestParams(
// "CPU",
// SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(false)),
// { { 1, 32, 112, 112 } },
// { { 1, 100 } }),
// TODO: uncomment later
//SingleLayerTransformationsTestParams(
// "CPU",
// SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
// { { 1, 32, 112, 112 } },
// { { 1, 100 } })
),
SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);

View File

@ -0,0 +1,240 @@
// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "low_precision_transformer_single_layer_tests.hpp"
#include "low_precision_transformations/fake_quantize.hpp"
#include "low_precision_transformations/convolution.hpp"
#include "low_precision_transformations/fully_connected.hpp"
#include "low_precision_transformations/scaleshift_to_convolution.hpp"
FullyConnectedTestModel::FullyConnectedTestModel(
const std::vector<size_t>& inputDimentions,
const std::vector<size_t>& outputDimentions) :
addBiasesLayer(false),
inputDimentions(inputDimentions),
outputDimentions(outputDimentions) {}
std::string FullyConnectedTestModel::getName() const {
return std::string("FullyConnectedTestModel") +
(addBiasesLayer ? "WithBiases" : "") +
"_D" + std::to_string(inputDimentions.size()) +
"_D" + std::to_string(outputDimentions.size());
}
void FullyConnectedTestModel::initInput(Blob::Ptr input) const {
fillDataWithInitValue(input, -1.f);
}
bool FullyConnectedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
params.updatePrecisions = true;
// TODO: use getLowPrecisionTransformer(params) instead
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected").
add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
addCleanup<ScaleShiftToConvolutionTransformation>(
LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
"ScaleShift"));
// network.serialize("c:\\Projects\\temp\\fully_connected.original.xml", "c:\\Projects\\temp\\fully_connected.original.bin");
transformer.transform(network);
// network.serialize("c:\\Projects\\temp\\fully_connected.transformed.xml", "c:\\Projects\\temp\\fully_connected.transformed.bin");
if (params.quantizeOutputs) {
const CNNLayerPtr dequantizationLayer = getLayer(network, "fullyConnected");
if (dequantizationLayer->type != "ScaleShift") {
THROW_IE_EXCEPTION << "was not quantized";
}
const Blob::Ptr biases = CNNNetworkHelper::getBiases(*dequantizationLayer);
const std::shared_ptr<float> biasesData = CNNNetworkHelper::getFloatData(biases);
if (params.updateBiases) {
for (size_t i = 0ul; i < biases->size(); ++i) {
if (biasesData.get()[i] != 0.f) {
THROW_IE_EXCEPTION << "biases value is not zero";
}
}
} else {
// FakeQuantize layer has to have shift
for (size_t i = 0ul; i < biases->size(); ++i) {
if (biasesData.get()[i] == 0.f) {
THROW_IE_EXCEPTION << "biases value is zero";
}
}
}
}
return true;
}
std::string FullyConnectedTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
if (p._network_precision == "FP16")
type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
const size_t inputChannelsCount = p.inputDimensions[0][1];
const size_t outputChannelsCount = p.outputDimensions[0][1];
std::vector<size_t> weightsConstInputDims = {
p.inputDimensions[0][2] * p.inputDimensions[0][3],
p.outputDimensions[0][p.outputDimensions[0].size() == 2ul ? 1ul : 2ul] };
std::map<std::string, std::string> const_params = {};
std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
std::map<std::string, std::string> fake_quantize_params2 = { {"levels", "255"} };
std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
std::map<std::string, std::string> poolingParams = { {"kernel", "112,112"}, {"pool-method", "max"} };
std::map<std::string, std::string> reshapeParams = { };
std::map<std::string, std::string> fullyConnectedParams = { {"out-size", std::to_string(p.outputDimensions[0][1])} };
std::vector<size_t> biasesConstDims = { p.outputDimensions[0][1] };
const std::vector<std::vector<size_t>> convolutionDims = addBiasesLayer ?
std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims, biasesConstDims }) :
std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims });
std::vector<std::pair<std::string, std::string>> edges = {
{"0,0", "1,1"}, {"1,2", "6,7"}, // Power
{"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
{"6,12", "7,13"}, // FakeQuantize to Pooling
{"7,14", "8,15"}, // Pooling to Reshape
{"8,16", "15,28"}, // Reshape to FullyConnected
{"9,17", "14,22"}, {"10,18", "14,23"}, {"11,19", "14,24"}, {"12,20", "14,25"}, {"13,21", "14,26"}, // Const layers
{"14,27", "15,29"}
};
if (addBiasesLayer) {
edges.push_back({ "16,32", "15,30" }); // biases to Conv
}
const std::vector<std::vector<size_t>> fullyConnectedDims = addBiasesLayer ?
std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims, biasesConstDims }) :
std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims });
std::vector<size_t> quantizationParamsDims(p.inputDimensions[0].size(), 1);
quantizationParamsDims[1] = inputChannelsCount;
const std::vector<size_t> reshape1OuputDims = { p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] };
const std::vector<size_t> reshape2OuputDims = p.outputDimensions[0].size() == 2ul ?
std::vector<size_t>({ p.inputDimensions[0][0] * p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] }) :
std::vector<size_t>({ p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] });
CommonTestUtils::DefaultNetBuilder builder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
"FullyConnectedTestModel", p.inputDimensions[0], p._network_precision)
// 1
.addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
// 2
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputLowConst")
// 3
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputHighConst")
// 4
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputLowConst")
// 5
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputHighConst")
// 6
.addLayer("FakeQuantize",
p._network_precision,
&fake_quantize_params,
{ {p.inputDimensions[0], quantizationParamsDims, quantizationParamsDims, quantizationParamsDims, quantizationParamsDims}, {{p.inputDimensions[0]}} },
"fakeQuantize")
// 7
.addLayer("Reshape", p._network_precision, &reshapeParams, { { p.inputDimensions[0] }, { reshape1OuputDims } }, "reshape1")
// 8
.addLayer("Reshape", p._network_precision, &reshapeParams, { {{ reshape1OuputDims }}, { reshape2OuputDims } }, "reshape2")
// 9
.addLayer("Const", p._network_precision, &const_params, { {}, {weightsConstInputDims} },
std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, "weigthsConst")
// 10
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputLowConst")
// 11
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputHighConst")
// 12
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputLowConst")
// 13
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputHighConst")
// 14
.addLayer(
"FakeQuantize",
p._network_precision,
&fake_quantize_params,
{ {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
"fakeQuantizeOnWeights")
// 15
.addLayer("FullyConnected", p._network_precision, &fullyConnectedParams, { fullyConnectedDims, {p.outputDimensions[0]} }, "fullyConnected");
if (addBiasesLayer) {
// 16
builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConstDims} }, type_size * biasesConstDims[0], "biasesConst");
}
return builder.finish(&edges);
}
void FullyConnectedTestModel::resetTransformation(CNNNetwork& network) const {
CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
const size_t inputChannels = fakeQuantize->outData[0]->getTensorDesc().getDims()[1];
CNNLayerPtr fullyConnected = CNNNetworkHelper::getLayer(network, "fullyConnected");
const size_t outputChannels = fullyConnected->outData[0]->getTensorDesc().getDims()[1];
// Const on activations
//std::vector<float> lowValues(inputChannels, 1.0); // to have shifts
//std::vector<float> highValues(inputChannels);
//if (areScalesOnActivationsDifferent()) {
// for (size_t inputChannel = 0; inputChannel < highValues.size(); ++inputChannel) {
// highValues[inputChannel] = static_cast<float>(inputChannel);
// }
//}
//else {
// highValues = std::vector<float>(inputChannels, 255.f);
//}
//std::vector<float> lowValues(inputChannels, 1.275f);
//std::vector<float> highValues(inputChannels, 2.55f);
std::vector<float> lowValues(inputChannels, 127.5f);
std::vector<float> highValues(inputChannels, 255.f);
fillData(getLayer(network, "dataInputLowConst"), lowValues, "custom");
fillData(getLayer(network, "dataInputHighConst"), highValues, "custom");
fillData(getLayer(network, "dataOutputLowConst"), lowValues, "custom");
fillData(getLayer(network, "dataOutputHighConst"), highValues, "custom");
const size_t fakeQuantizeInputChannel = outputChannels;
// Const on weights
//std::vector<float> weights(
// fakeQuantize->outData[0]->getTensorDesc().getDims()[2] *
// fakeQuantize->outData[0]->getTensorDesc().getDims()[3] *
// fullyConnected->outData[0]->getTensorDesc().getDims()[fullyConnected->outData[0]->getTensorDesc().getDims().size() == 2ul ? 1 : 2]);
//for (size_t outputChannel = 0ul; outputChannel < outputChannels; ++outputChannel) {
// for (size_t inputChannel = 0ul; inputChannel < fakeQuantizeInputChannel; ++inputChannel) {
// weights[outputChannel * fakeQuantizeInputChannel + inputChannel] = inputChannel;
// }
//}
const std::vector<size_t> dims = fakeQuantize->outData[0]->getTensorDesc().getDims();
// const size_t weightsSize = dims[2] * dims[3] * dims[dims.size() == 2ul ? 1 : 2];
const size_t weightsSize = (dims[2] * dims[3]) * (dims[2] * dims[3]);
std::vector<float> weights(weightsSize, 2.f);
fillData(getLayer(network, "weigthsConst"), weights, "custom");
fillData(getLayer(network, "weigthsInputLowConst"), -128.f, "custom");
fillData(getLayer(network, "weigthsInputHighConst"), 127.f, "custom");
fillData(getLayer(network, "weigthsOutputLowConst"), -128.f, "custom");
fillData(getLayer(network, "weigthsOutputHighConst"), 127.f, "custom");
if (addBiasesLayer) {
std::vector<float> biases(outputChannels);
for (size_t i = 0ul; i < outputChannels; ++i) {
biases[i] = static_cast<float>(i);
}
fillData(getLayer(network, "biasesConst"), biases, "custom");
}
}
bool FullyConnectedTestModel::areScalesOnActivationsDifferent() const {
return false;
}

View File

@ -1007,6 +1007,23 @@ public:
void initInput(Blob::Ptr input) const override;
};
class FullyConnectedTestModel : public SingleLayerTestModel {
public:
FullyConnectedTestModel(const std::vector<size_t>& inputDimentions, const std::vector<size_t>& outputDimentions);
std::string getName() const override;
bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
void initInput(Blob::Ptr input) const override;
std::string getModel(SingleLayerTransformationsTestParams& p) const override;
void resetTransformation(CNNNetwork& network) const override;
protected:
virtual bool areScalesOnActivationsDifferent() const;
const bool addBiasesLayer;
private:
const std::vector<size_t> inputDimentions;
const std::vector<size_t> outputDimentions;
};
class EltwiseTestModel : public SingleLayerTestModel {
public:
EltwiseTestModel(
@ -1895,7 +1912,7 @@ class SingleLayerTransformationsTest : public TestsCommon, public WithParamInter
std::unordered_map<std::string, InferenceEngine::Blob::Ptr> infer(
CNNNetwork& network,
std::unordered_map<std::string, Blob::Ptr>& inputBlobs,
Core & plugin, const std::string & device_name,
Core & plugin, const std::string & device_name,
ExecutableNetwork & executableNetwork,
InferRequest & inferRequest);

View File

@ -210,7 +210,7 @@ void SingleLayerTransformationsTest::SetUp() {
Core core;
ExecutableNetwork executableNetwork;
InferRequest inferRequest;
const auto originalOutputMap = infer(network, inputBlobs, core,
const auto originalOutputMap = infer(network, inputBlobs, core,
p.device_name, executableNetwork, inferRequest);
const std::vector<bool> updatePrecisionsValues = { false };
@ -228,6 +228,7 @@ void SingleLayerTransformationsTest::SetUp() {
const std::vector<bool> updateBiasesValues = { true, false };
const std::vector<bool> supportAsymmetricQuantizationValues = { true /*, false*/ };
const std::vector<std::vector<Precision>> precisionOnActivationsValues = {
// TODO: just to debug
{ Precision::I8 },
{ Precision::I8, Precision::U8 },
{ Precision::U8 },
@ -304,7 +305,7 @@ void SingleLayerTransformationsTest::SetUp() {
const float threshold = p.model->getThreshold(p.device_name, net_precision, param);
const float zeroThreshold = p.model->getZeroThreshold();
const auto outName = transformedOutput.find(name);
if (outName == transformedOutput.end()) {
THROW_IE_EXCEPTION << "Original output name " + name + " doesn't exist in transformed model";