[IE COMMON] [LPT] Support 3D layout for FullyConnected transformation
This commit is contained in:
parent
bc98d17121
commit
e544dd1e28
@ -18,6 +18,7 @@ class INFERENCE_ENGINE_API_CLASS(FullyConnectedTransformation) : public Weightab
|
||||
public:
|
||||
FullyConnectedTransformation(const Params& params) : WeightableLayerTransformation(params) {}
|
||||
~FullyConnectedTransformation() override {};
|
||||
bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
|
||||
void transform(TransformationContext& context, CNNLayer& layer) const override;
|
||||
|
||||
private:
|
||||
|
@ -89,7 +89,9 @@ public:
|
||||
|
||||
static Blob::Ptr getBlob(CNNLayerPtr layer, const std::string& blobName);
|
||||
|
||||
static Blob::Ptr getBlob(CNNLayer* layer, const std::string& blobName);
|
||||
static Blob::Ptr getBlob(const CNNLayer* layer, const std::string& blobName);
|
||||
|
||||
static bool blobValuesAreEqual(const CNNLayer& layer, const std::string& blobName);
|
||||
|
||||
static std::shared_ptr<float> getFloatData(const CNNLayerPtr& layer, const std::string& blobName);
|
||||
|
||||
|
@ -33,6 +33,7 @@ protected:
|
||||
void updateLayerBiases(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& convolution,
|
||||
const bool biasesDimsAsOutput,
|
||||
std::vector<float>& dequantizationScales,
|
||||
std::vector<float>& dequantizationShifts,
|
||||
std::vector<float>& biasesShifts) const;
|
||||
|
@ -180,7 +180,7 @@ void ConvolutionTransformation::transform(TransformationContext& context, CNNLay
|
||||
|
||||
if (this->updateBiases) {
|
||||
std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
|
||||
updateLayerBiases(context, layer, dequantizationScales, dequantizationShifts, biasesShifts);
|
||||
updateLayerBiases(context, layer, false, dequantizationScales, dequantizationShifts, biasesShifts);
|
||||
}
|
||||
|
||||
CNNNetworkHelper::removeLayer(context.network, scaleShiftOnData);
|
||||
|
@ -25,8 +25,72 @@
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
|
||||
bool getDequantizationValuesAreBroadcasted(const CNNLayer& fullyConnected) {
|
||||
const DataPtr inputData = fullyConnected.insData[0].lock();
|
||||
if (inputData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
|
||||
}
|
||||
|
||||
return inputData->getDims().size() == 3ul;
|
||||
}
|
||||
|
||||
bool FullyConnectedTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& fullyConnected) const {
|
||||
if (!WeightableLayerTransformation::canBeTransformed(context, fullyConnected)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const DataPtr inputData = fullyConnected.insData[0].lock();
|
||||
if (inputData == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::vector<size_t> inTensorDims = inputData->getDims();
|
||||
if ((inTensorDims.size() != 2) && (inTensorDims.size() != 3)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const DataPtr outputData = fullyConnected.outData[0];
|
||||
if (outputData == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const std::vector<size_t> outTensorDims = outputData->getTensorDesc().getDims();
|
||||
if (inTensorDims.size() != outTensorDims.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inTensorDims[0] != outTensorDims[0]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
|
||||
if (scaleShift->type != "ScaleShift") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 3D tensor custom validation
|
||||
if ((inTensorDims.size() == 3ul) &&
|
||||
((!CNNNetworkHelper::blobValuesAreEqual(*scaleShift, "weights")) || (!CNNNetworkHelper::blobValuesAreEqual(*scaleShift, "biases")))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
|
||||
const size_t prevDequantizationScaleBlobSize = prevDequantizationScaleBlob->size();
|
||||
if (prevDequantizationScaleBlobSize != inTensorDims[1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
|
||||
const size_t prevDequantizationShiftBlobSize = prevDequantizationShiftBlob->size();
|
||||
if (prevDequantizationShiftBlobSize != inTensorDims[1]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
|
||||
if (!canBeTransformed(context, fullyConnected)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -146,7 +210,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
|
||||
}
|
||||
|
||||
if (this->updateBiases) {
|
||||
updateLayerBiases(context, fullyConnected, dequantizationScales, dequantizationShifts, biasesShifts);
|
||||
updateLayerBiases(context, fullyConnected, fullyConnected.type == "GEMM", dequantizationScales, dequantizationShifts, biasesShifts);
|
||||
}
|
||||
|
||||
if ((parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) {
|
||||
@ -205,23 +269,14 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
|
||||
|
||||
const DataPtr inputData = fullyConnected.insData[0].lock();
|
||||
if (inputData == nullptr) {
|
||||
THROW_IE_EXCEPTION << "input data is absent for layer " << fullyConnected.name;
|
||||
}
|
||||
const Layout inputLayout = inputData->getLayout();
|
||||
if (inputLayout != Layout::NC) {
|
||||
THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
|
||||
}
|
||||
|
||||
const DataPtr insData = fullyConnected.insData[0].lock();
|
||||
if (insData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
|
||||
const DataPtr outputData = fullyConnected.outData[0];
|
||||
if (outputData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
|
||||
}
|
||||
const size_t inputChannelsCount = insData->getDims()[1];
|
||||
|
||||
const Layout outputLayout = fullyConnected.outData[0]->getLayout();
|
||||
if (outputLayout != Layout::NC) {
|
||||
THROW_IE_EXCEPTION << "Unexpected output layout " << outputLayout;
|
||||
}
|
||||
const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
|
||||
dequantizationScales.resize(outputChannelsCount);
|
||||
dequantizationShifts.resize(outputChannelsCount);
|
||||
@ -232,38 +287,45 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
|
||||
THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
|
||||
}
|
||||
|
||||
const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
|
||||
const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(prevDequantizationScaleBlob);
|
||||
const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
|
||||
const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(prevDequantizationShiftBlob);
|
||||
const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
|
||||
const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
|
||||
|
||||
const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
|
||||
const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
|
||||
const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
|
||||
const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
|
||||
|
||||
const float prevDequantizationScale = prevDequantizationScaleBuffer.get()[0];
|
||||
const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
|
||||
for (size_t i = 0; i < outputChannelsCount; ++i) {
|
||||
dequantizationScales[i] = prevDequantizationScale *
|
||||
dequantizationScales[i] =
|
||||
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
|
||||
(originalWeightsDequantizationScales.size() == 0 ?
|
||||
1.0 :
|
||||
(originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
|
||||
}
|
||||
|
||||
const DataPtr insData = fullyConnected.insData[0].lock();
|
||||
if (insData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
|
||||
}
|
||||
const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1];
|
||||
for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
|
||||
float sum = 0.0;
|
||||
const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
|
||||
1.0 :
|
||||
(originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
|
||||
((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
|
||||
|
||||
for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) {
|
||||
const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel];
|
||||
sum += w * prevDequantizationShiftBuffer.get()[inputChannel] * weightsDequantizationScale;
|
||||
const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[inputChannel];
|
||||
sum += w * shift * weightsDequantizationScale;
|
||||
}
|
||||
|
||||
dequantizationShifts[channel] = biasesBuffer == nullptr ?
|
||||
sum :
|
||||
(sum + biasesBuffer.get()[channel] - prevDequantizationScale * biasesBuffer.get()[channel] * weightsDequantizationScale);
|
||||
(sum + biasesBuffer.get()[channel] -
|
||||
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
|
||||
biasesBuffer.get()[channel] * weightsDequantizationScale);
|
||||
biasesShifts[channel] = sum;
|
||||
}
|
||||
}
|
||||
@ -276,69 +338,63 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric(
|
||||
std::vector<float>& dequantizationShifts) const {
|
||||
const DataPtr inputData = fullyConnected.insData[0].lock();
|
||||
if (inputData == nullptr) {
|
||||
THROW_IE_EXCEPTION << "input data is absent for layer " << fullyConnected.name;
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
|
||||
}
|
||||
const Layout inputLayout = inputData->getLayout();
|
||||
if (inputLayout != Layout::NC) {
|
||||
THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
|
||||
}
|
||||
const DataPtr insData = fullyConnected.insData[0].lock();
|
||||
if (insData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data is absent";
|
||||
}
|
||||
const size_t inputChannelsCount = insData->getDims()[1];
|
||||
// const Layout inputLayout = inputData->getLayout();
|
||||
// if (inputLayout != Layout::NC) {
|
||||
// THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
|
||||
// }
|
||||
const size_t inputChannelsCount = inputData->getDims()[1];
|
||||
|
||||
const Layout outputLayout = fullyConnected.outData[0]->getLayout();
|
||||
if (outputLayout != Layout::NC) {
|
||||
THROW_IE_EXCEPTION << "Unexpected output layout " << outputLayout;
|
||||
const DataPtr outputData = fullyConnected.outData[0];
|
||||
if (outputData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
|
||||
}
|
||||
const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
|
||||
dequantizationScales.resize(outputChannelsCount);
|
||||
dequantizationShifts.resize(outputChannelsCount);
|
||||
|
||||
CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
|
||||
if (scaleShift->type != "ScaleShift") {
|
||||
THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
|
||||
}
|
||||
|
||||
const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
|
||||
const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(prevDequantizationScaleBlob);
|
||||
const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
|
||||
const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(prevDequantizationShiftBlob);
|
||||
const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
|
||||
|
||||
const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
|
||||
const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
|
||||
const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
|
||||
const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
|
||||
const size_t outputChannelsCount = outputData->getDims()[1];
|
||||
dequantizationScales.resize(outputChannelsCount);
|
||||
dequantizationShifts.resize(outputChannelsCount);
|
||||
|
||||
const std::shared_ptr<float> prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
|
||||
for (size_t i = 0; i < outputChannelsCount; ++i) {
|
||||
dequantizationScales[i] =
|
||||
prevDequantizationScaleBuffer.get()[0] *
|
||||
(originalWeightsDequantizationScales.size() == 0
|
||||
? 1.0
|
||||
: (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0]
|
||||
: originalWeightsDequantizationScales[i]));
|
||||
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
|
||||
(originalWeightsDequantizationScales.size() == 0 ?
|
||||
1.0 :
|
||||
(originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
|
||||
}
|
||||
|
||||
const auto weightsBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues));
|
||||
const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
|
||||
const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected));
|
||||
|
||||
const std::shared_ptr<float> prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
|
||||
|
||||
for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
|
||||
float sum1 = 0.0;
|
||||
float sum2 = 0.0;
|
||||
const float weightsDequantizationScale =
|
||||
originalWeightsDequantizationScales.size() == 0
|
||||
? 1.0
|
||||
: (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0]
|
||||
: originalWeightsDequantizationScales[channel]);
|
||||
const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
|
||||
1.0 :
|
||||
((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
|
||||
|
||||
for (size_t w = 0; w < inputChannelsCount; ++w) {
|
||||
const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w];
|
||||
sum1 += kernel * prevDequantizationShiftBuffer.get()[channel] * weightsDequantizationScale;
|
||||
const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[channel];
|
||||
sum1 += kernel * shift * weightsDequantizationScale;
|
||||
sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale;
|
||||
}
|
||||
|
||||
dequantizationShifts[channel] = biasesBuffer == nullptr
|
||||
? sum1
|
||||
: (sum1 + biasesBuffer.get()[channel] -
|
||||
prevDequantizationScaleBuffer.get()[channel] *
|
||||
biasesBuffer.get()[channel] * weightsDequantizationScale);
|
||||
dequantizationShifts[channel] = biasesBuffer == nullptr ?
|
||||
sum1 :
|
||||
(sum1 + biasesBuffer.get()[channel] -
|
||||
(dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
|
||||
biasesBuffer.get()[channel] * weightsDequantizationScale);
|
||||
}
|
||||
}
|
||||
|
@ -582,18 +582,42 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::getLayers(const CNNLayer& parent, con
|
||||
return layers;
|
||||
}
|
||||
|
||||
Blob::Ptr CNNNetworkHelper::getBlob(CNNLayer* layer, const std::string& blobName) {
|
||||
Blob::Ptr CNNNetworkHelper::getBlob(const CNNLayer* layer, const std::string& blobName) {
|
||||
if (layer == nullptr) {
|
||||
THROW_IE_EXCEPTION << "layer is nullable";
|
||||
}
|
||||
if (layer->blobs.empty()) {
|
||||
THROW_IE_EXCEPTION << "Layer '" << layer->name << "' does not have any blob";
|
||||
|
||||
if (blobName.empty()) {
|
||||
if (layer->blobs.empty()) {
|
||||
THROW_IE_LPT_EXCEPTION(*layer) << "does not have any blob";
|
||||
}
|
||||
|
||||
if (layer->blobs.size() != 1) {
|
||||
THROW_IE_LPT_EXCEPTION(*layer) << "there are several blobs";
|
||||
}
|
||||
return layer->blobs.begin()->second;
|
||||
}
|
||||
if (blobName.empty() && (layer->blobs.size() != 1)) {
|
||||
THROW_IE_EXCEPTION << "several blobs";
|
||||
|
||||
const auto it = layer->blobs.find(blobName);
|
||||
if (it == layer->blobs.end()) {
|
||||
THROW_IE_LPT_EXCEPTION(*layer) << " does not have blob " << blobName;
|
||||
}
|
||||
Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
|
||||
return blob;
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bool CNNNetworkHelper::blobValuesAreEqual(const CNNLayer& layer, const std::string& blobName) {
|
||||
const Blob::Ptr blob = CNNNetworkHelper::getBlob(&layer, blobName);
|
||||
const std::shared_ptr<float> buffer = CNNNetworkHelper::getFloatData(blob);
|
||||
if (!std::equal(
|
||||
buffer.get(),
|
||||
buffer.get() + blob->size(),
|
||||
buffer.get(),
|
||||
[](const float value1, const float value2) { return value1 == value2; })) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Blob::Ptr CNNNetworkHelper::getBlob(CNNLayerPtr layer, const std::string& blobName) {
|
||||
@ -1086,8 +1110,12 @@ CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& contex
|
||||
CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
|
||||
|
||||
const std::vector<size_t> dims = outData->getDims();
|
||||
if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
|
||||
THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
|
||||
|
||||
// TODO: just to test
|
||||
if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
|
||||
if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
|
||||
THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
|
||||
}
|
||||
}
|
||||
addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network);
|
||||
|
||||
|
@ -115,7 +115,7 @@ void ScaleShiftToConvolutionTransformation::transform(TransformationContext& con
|
||||
|
||||
if (this->updateBiases) {
|
||||
std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
|
||||
updateLayerBiases(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts, biasesShifts);
|
||||
updateLayerBiases(context, *convolutionLayerPtr, false, dequantizationScales, dequantizationShifts, biasesShifts);
|
||||
}
|
||||
|
||||
addDequantizationLayer(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts);
|
||||
|
@ -3,7 +3,6 @@
|
||||
//
|
||||
|
||||
#include "low_precision_transformations/weightable_layer_transformation.hpp"
|
||||
#include "low_precision_transformations/network_helper.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <details/caseless.hpp>
|
||||
@ -11,6 +10,9 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "low_precision_transformations/common/ie_lpt_exception.hpp"
|
||||
#include "low_precision_transformations/network_helper.hpp"
|
||||
|
||||
using namespace InferenceEngine;
|
||||
using namespace InferenceEngine::details;
|
||||
|
||||
@ -123,50 +125,72 @@ bool WeightableLayerTransformation::isPrecisionPreserved(const CNNLayer& layer)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void WeightableLayerTransformation::updateLayerBiases(
|
||||
TransformationContext& context,
|
||||
const CNNLayer& convolution,
|
||||
const CNNLayer& weightableLayer,
|
||||
const bool biasesDimsAsOutput,
|
||||
std::vector<float>& dequantizationScales,
|
||||
std::vector<float>& dequantizationShifts,
|
||||
std::vector<float>& biasesShifts) const {
|
||||
if (!std::all_of(dequantizationShifts.begin(), dequantizationShifts.end(), [](float value) { return value == 0.0; })) {
|
||||
const DataPtr insData = weightableLayer.insData[0].lock();
|
||||
if (insData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
|
||||
}
|
||||
const std::vector<size_t> insDataDims = insData->getTensorDesc().getDims();
|
||||
|
||||
std::shared_ptr<float> biasesBufferPtr;
|
||||
Blob::Ptr biasesBlob;
|
||||
CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(convolution, 2);
|
||||
CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(weightableLayer, 2);
|
||||
if (biasesLayer == nullptr) {
|
||||
const std::vector<size_t> dims = CaselessEq<std::string>()(convolution.type, "Convolution") ?
|
||||
std::vector<size_t>({ dequantizationShifts.size() }) :
|
||||
std::vector<size_t>({ 1ul, dequantizationShifts.size() });
|
||||
const Layout layout = CaselessEq<std::string>()(convolution.type, "Convolution") ? Layout::C : Layout::NC;
|
||||
if (weightableLayer.outData.size() != 1ul) {
|
||||
THROW_IE_LPT_EXCEPTION(weightableLayer) << "unexpected output data count " << weightableLayer.outData.size();
|
||||
}
|
||||
const DataPtr outData = weightableLayer.outData[0];
|
||||
const std::vector<size_t> biasesDims = biasesDimsAsOutput ?
|
||||
outData->getDims() :
|
||||
std::vector<size_t>({ insDataDims.size() == 3ul ? insDataDims[2] : dequantizationShifts.size() });
|
||||
const Layout biasesLayout = InferenceEngine::TensorDesc::getLayoutByDims(biasesDims);
|
||||
|
||||
biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, dims, layout));
|
||||
biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, biasesDims, biasesLayout));
|
||||
biasesBlob->allocate();
|
||||
|
||||
biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
|
||||
float* biasesBuffer = biasesBufferPtr.get();
|
||||
std::fill(biasesBuffer, biasesBuffer + biasesBlob->size(), 0.f);
|
||||
|
||||
LayerParams constLayerParams{ convolution.name + "_Biases", "Const", convolution.outData[0]->getTensorDesc().getPrecision() };
|
||||
LayerParams biasesLayerParams{ weightableLayer.name + "_Biases", "Const", outData->getTensorDesc().getPrecision() };
|
||||
biasesLayer = CNNNetworkHelper::addLayer(
|
||||
context,
|
||||
nullptr,
|
||||
std::make_shared<CNNLayer>(convolution),
|
||||
std::make_shared<CNNLayer>(constLayerParams));
|
||||
std::make_shared<CNNLayer>(weightableLayer),
|
||||
std::make_shared<CNNLayer>(biasesLayerParams));
|
||||
biasesLayer->blobs["custom"] = biasesBlob;
|
||||
biasesLayer->outData[0]->reshape(dims, layout);
|
||||
biasesLayer->outData[0]->reshape(biasesDims, biasesLayout);
|
||||
} else {
|
||||
biasesBlob = CNNNetworkHelper::getBlob(biasesLayer, "custom");
|
||||
if (biasesBlob->size() != dequantizationShifts.size()) {
|
||||
THROW_IE_EXCEPTION << "dequantization shifts size " << dequantizationShifts.size() << " is not equal biases blob size " << biasesBlob->size();
|
||||
DataPtr insData = weightableLayer.insData[0].lock();
|
||||
if (insData == nullptr) {
|
||||
THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
|
||||
}
|
||||
|
||||
if ((insData->getDims().size() != 3) && (biasesBlob->size() != dequantizationShifts.size())) {
|
||||
THROW_IE_LPT_EXCEPTION(weightableLayer) <<
|
||||
"dequantization shifts size " << dequantizationShifts.size() <<
|
||||
" is not equal biases blob size " << biasesBlob->size();
|
||||
}
|
||||
biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
|
||||
}
|
||||
const float* biasesBuffer = biasesBufferPtr.get();
|
||||
std::vector<float> biases(biasesBlob->size());
|
||||
const bool broadcast = insDataDims.size() == 3ul;
|
||||
for (size_t channel = 0ul; channel < biases.size(); ++channel) {
|
||||
biases[channel] = (biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
|
||||
dequantizationShifts[channel] = 0.0;
|
||||
biases[channel] = broadcast ?
|
||||
(biasesShifts[0] + biasesBuffer[0]) / dequantizationScales[0] :
|
||||
(biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
|
||||
}
|
||||
std::fill(dequantizationShifts.begin(), dequantizationShifts.end(), 0.f);
|
||||
CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
|
||||
}
|
||||
}
|
||||
@ -287,10 +311,9 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
|
||||
THROW_IE_EXCEPTION << "insert data is absent for layer " << child.name;
|
||||
}
|
||||
|
||||
if (insData->getTensorDesc().getLayout() != Layout::NC &&
|
||||
insData->getTensorDesc().getLayout() != Layout::NCHW &&
|
||||
insData->getTensorDesc().getLayout() != Layout::NCDHW) {
|
||||
THROW_IE_EXCEPTION << "unexpected layout '" << insData->getTensorDesc().getLayout() << "' layer " << child.name;
|
||||
const size_t dimsSize = insData->getDims().size();
|
||||
if ((dimsSize != 2ul) && (dimsSize != 3ul) && (dimsSize != 4ul) && (dimsSize != 5ul)) {
|
||||
THROW_IE_EXCEPTION << "unexpected dimensions size " << dimsSize << " layer " << child.type << " " << child.name;
|
||||
}
|
||||
|
||||
LayerParams eltwiseLayerParams {child.name + "_Sub_" + parent.name, "Eltwise", precisionsInfo.original};
|
||||
@ -312,15 +335,15 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
|
||||
}
|
||||
|
||||
const TensorDesc constTensorDesc = constLayer->outData[0]->getTensorDesc();
|
||||
if (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout()) {
|
||||
if ((dimsSize != 3) && (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout())) {
|
||||
THROW_IE_EXCEPTION << "unexpected Const layer layout " << constTensorDesc.getLayout();
|
||||
}
|
||||
const SizeVector& constDims = constTensorDesc.getDims();
|
||||
if (constDims.size() != insData->getTensorDesc().getDims().size()) {
|
||||
if ((dimsSize != 3) && (constDims.size() != insData->getTensorDesc().getDims().size())) {
|
||||
THROW_IE_EXCEPTION << "unexpected dimension size " << constDims.size();
|
||||
}
|
||||
|
||||
SizeVector dims(insData->getTensorDesc().getDims().size(), 1);
|
||||
SizeVector dims(constLayer->outData[0]->getTensorDesc().getDims().size(), 1);
|
||||
if (onWeights) {
|
||||
dims[0] = constDims[0];
|
||||
} else {
|
||||
|
@ -32,7 +32,7 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest3D) {
|
||||
auto empty_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {0});
|
||||
auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, empty_bias, ngraph::Shape{1, 128, 786});
|
||||
|
||||
auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 786}, {1});
|
||||
auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {1});
|
||||
auto add = std::make_shared<ngraph::opset1::Add>(fc, const_bias);
|
||||
|
||||
f = std::make_shared<ngraph::Function>(ngraph::NodeVector{add}, ngraph::ParameterVector{input1});
|
||||
@ -84,4 +84,4 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest2D) {
|
||||
|
||||
auto res = compare_functions(f, f_ref);
|
||||
ASSERT_TRUE(res.first) << res.second;
|
||||
}
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
PowerTestModel::Ptr(new PowerTestModel(1.f, -32.f, 0)),
|
||||
{ { 1, 3, 299, 299 } },
|
||||
{ { 1, 3, 299, 299 } }),
|
||||
|
||||
|
||||
SingleLayerTransformationsTestParams(
|
||||
"CPU",
|
||||
PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, -64.f)),
|
||||
@ -60,6 +60,19 @@ INSTANTIATE_TEST_CASE_P(
|
||||
{ { 1, 2048 } },
|
||||
{ { 1, 1000 } }),
|
||||
|
||||
// TODO: uncomment later
|
||||
//SingleLayerTransformationsTestParams(
|
||||
// "MKLDNNPlugin",
|
||||
// SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 128, 768 })),
|
||||
// { { 1, 128, 12, 64 } },
|
||||
// { { 128, 768 } }),
|
||||
|
||||
SingleLayerTransformationsTestParams(
|
||||
"CPU",
|
||||
SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 1, 128, 768 })),
|
||||
{ { 1, 128, 12, 64 } },
|
||||
{ { 1, 128, 768 } }),
|
||||
|
||||
SingleLayerTransformationsTestParams(
|
||||
"CPU",
|
||||
SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel()),
|
||||
@ -512,13 +525,21 @@ INSTANTIATE_TEST_CASE_P(
|
||||
"CPU",
|
||||
SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(true)),
|
||||
{ { 1, 32, 112, 112 } },
|
||||
{ { 1, 32, 112, 112 } }),
|
||||
{ { 1, 32, 112, 112 } })
|
||||
|
||||
SingleLayerTransformationsTestParams(
|
||||
"CPU",
|
||||
SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
|
||||
{ { 1, 32, 112, 112 } },
|
||||
{ { 1, 100 } })
|
||||
// TODO: uncomment later
|
||||
//SingleLayerTransformationsTestParams(
|
||||
// "CPU",
|
||||
// SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(false)),
|
||||
// { { 1, 32, 112, 112 } },
|
||||
// { { 1, 100 } }),
|
||||
|
||||
// TODO: uncomment later
|
||||
//SingleLayerTransformationsTestParams(
|
||||
// "CPU",
|
||||
// SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
|
||||
// { { 1, 32, 112, 112 } },
|
||||
// { { 1, 100 } })
|
||||
),
|
||||
SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
|
||||
|
||||
|
@ -0,0 +1,240 @@
|
||||
// Copyright (C) 2018-2020 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "low_precision_transformer_single_layer_tests.hpp"
|
||||
#include "low_precision_transformations/fake_quantize.hpp"
|
||||
#include "low_precision_transformations/convolution.hpp"
|
||||
#include "low_precision_transformations/fully_connected.hpp"
|
||||
#include "low_precision_transformations/scaleshift_to_convolution.hpp"
|
||||
|
||||
FullyConnectedTestModel::FullyConnectedTestModel(
|
||||
const std::vector<size_t>& inputDimentions,
|
||||
const std::vector<size_t>& outputDimentions) :
|
||||
addBiasesLayer(false),
|
||||
inputDimentions(inputDimentions),
|
||||
outputDimentions(outputDimentions) {}
|
||||
|
||||
std::string FullyConnectedTestModel::getName() const {
|
||||
return std::string("FullyConnectedTestModel") +
|
||||
(addBiasesLayer ? "WithBiases" : "") +
|
||||
"_D" + std::to_string(inputDimentions.size()) +
|
||||
"_D" + std::to_string(outputDimentions.size());
|
||||
}
|
||||
|
||||
void FullyConnectedTestModel::initInput(Blob::Ptr input) const {
|
||||
fillDataWithInitValue(input, -1.f);
|
||||
}
|
||||
|
||||
bool FullyConnectedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
|
||||
params.updatePrecisions = true;
|
||||
|
||||
// TODO: use getLowPrecisionTransformer(params) instead
|
||||
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
|
||||
add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected").
|
||||
add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
|
||||
addCleanup<ScaleShiftToConvolutionTransformation>(
|
||||
LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
|
||||
"ScaleShift"));
|
||||
|
||||
// network.serialize("c:\\Projects\\temp\\fully_connected.original.xml", "c:\\Projects\\temp\\fully_connected.original.bin");
|
||||
transformer.transform(network);
|
||||
// network.serialize("c:\\Projects\\temp\\fully_connected.transformed.xml", "c:\\Projects\\temp\\fully_connected.transformed.bin");
|
||||
|
||||
if (params.quantizeOutputs) {
|
||||
const CNNLayerPtr dequantizationLayer = getLayer(network, "fullyConnected");
|
||||
if (dequantizationLayer->type != "ScaleShift") {
|
||||
THROW_IE_EXCEPTION << "was not quantized";
|
||||
}
|
||||
|
||||
const Blob::Ptr biases = CNNNetworkHelper::getBiases(*dequantizationLayer);
|
||||
const std::shared_ptr<float> biasesData = CNNNetworkHelper::getFloatData(biases);
|
||||
if (params.updateBiases) {
|
||||
for (size_t i = 0ul; i < biases->size(); ++i) {
|
||||
if (biasesData.get()[i] != 0.f) {
|
||||
THROW_IE_EXCEPTION << "biases value is not zero";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// FakeQuantize layer has to have shift
|
||||
for (size_t i = 0ul; i < biases->size(); ++i) {
|
||||
if (biasesData.get()[i] == 0.f) {
|
||||
THROW_IE_EXCEPTION << "biases value is zero";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string FullyConnectedTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
|
||||
size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
|
||||
if (p._network_precision == "FP16")
|
||||
type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
|
||||
|
||||
const size_t inputChannelsCount = p.inputDimensions[0][1];
|
||||
const size_t outputChannelsCount = p.outputDimensions[0][1];
|
||||
std::vector<size_t> weightsConstInputDims = {
|
||||
p.inputDimensions[0][2] * p.inputDimensions[0][3],
|
||||
p.outputDimensions[0][p.outputDimensions[0].size() == 2ul ? 1ul : 2ul] };
|
||||
|
||||
std::map<std::string, std::string> const_params = {};
|
||||
std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
|
||||
std::map<std::string, std::string> fake_quantize_params2 = { {"levels", "255"} };
|
||||
std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
|
||||
std::map<std::string, std::string> poolingParams = { {"kernel", "112,112"}, {"pool-method", "max"} };
|
||||
std::map<std::string, std::string> reshapeParams = { };
|
||||
std::map<std::string, std::string> fullyConnectedParams = { {"out-size", std::to_string(p.outputDimensions[0][1])} };
|
||||
|
||||
std::vector<size_t> biasesConstDims = { p.outputDimensions[0][1] };
|
||||
|
||||
const std::vector<std::vector<size_t>> convolutionDims = addBiasesLayer ?
|
||||
std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims, biasesConstDims }) :
|
||||
std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims });
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> edges = {
|
||||
{"0,0", "1,1"}, {"1,2", "6,7"}, // Power
|
||||
{"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
|
||||
{"6,12", "7,13"}, // FakeQuantize to Pooling
|
||||
{"7,14", "8,15"}, // Pooling to Reshape
|
||||
{"8,16", "15,28"}, // Reshape to FullyConnected
|
||||
{"9,17", "14,22"}, {"10,18", "14,23"}, {"11,19", "14,24"}, {"12,20", "14,25"}, {"13,21", "14,26"}, // Const layers
|
||||
{"14,27", "15,29"}
|
||||
};
|
||||
|
||||
if (addBiasesLayer) {
|
||||
edges.push_back({ "16,32", "15,30" }); // biases to Conv
|
||||
}
|
||||
|
||||
const std::vector<std::vector<size_t>> fullyConnectedDims = addBiasesLayer ?
|
||||
std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims, biasesConstDims }) :
|
||||
std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims });
|
||||
|
||||
std::vector<size_t> quantizationParamsDims(p.inputDimensions[0].size(), 1);
|
||||
quantizationParamsDims[1] = inputChannelsCount;
|
||||
|
||||
const std::vector<size_t> reshape1OuputDims = { p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] };
|
||||
const std::vector<size_t> reshape2OuputDims = p.outputDimensions[0].size() == 2ul ?
|
||||
std::vector<size_t>({ p.inputDimensions[0][0] * p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] }) :
|
||||
std::vector<size_t>({ p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] });
|
||||
|
||||
CommonTestUtils::DefaultNetBuilder builder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
|
||||
"FullyConnectedTestModel", p.inputDimensions[0], p._network_precision)
|
||||
// 1
|
||||
.addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
|
||||
// 2
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputLowConst")
|
||||
// 3
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputHighConst")
|
||||
// 4
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputLowConst")
|
||||
// 5
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputHighConst")
|
||||
// 6
|
||||
.addLayer("FakeQuantize",
|
||||
p._network_precision,
|
||||
&fake_quantize_params,
|
||||
{ {p.inputDimensions[0], quantizationParamsDims, quantizationParamsDims, quantizationParamsDims, quantizationParamsDims}, {{p.inputDimensions[0]}} },
|
||||
"fakeQuantize")
|
||||
// 7
|
||||
.addLayer("Reshape", p._network_precision, &reshapeParams, { { p.inputDimensions[0] }, { reshape1OuputDims } }, "reshape1")
|
||||
// 8
|
||||
.addLayer("Reshape", p._network_precision, &reshapeParams, { {{ reshape1OuputDims }}, { reshape2OuputDims } }, "reshape2")
|
||||
// 9
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {weightsConstInputDims} },
|
||||
std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, "weigthsConst")
|
||||
// 10
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputLowConst")
|
||||
// 11
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputHighConst")
|
||||
// 12
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputLowConst")
|
||||
// 13
|
||||
.addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputHighConst")
|
||||
// 14
|
||||
.addLayer(
|
||||
"FakeQuantize",
|
||||
p._network_precision,
|
||||
&fake_quantize_params,
|
||||
{ {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
|
||||
"fakeQuantizeOnWeights")
|
||||
// 15
|
||||
.addLayer("FullyConnected", p._network_precision, &fullyConnectedParams, { fullyConnectedDims, {p.outputDimensions[0]} }, "fullyConnected");
|
||||
|
||||
if (addBiasesLayer) {
|
||||
// 16
|
||||
builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConstDims} }, type_size * biasesConstDims[0], "biasesConst");
|
||||
}
|
||||
|
||||
return builder.finish(&edges);
|
||||
}
|
||||
|
||||
void FullyConnectedTestModel::resetTransformation(CNNNetwork& network) const {
|
||||
CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
|
||||
const size_t inputChannels = fakeQuantize->outData[0]->getTensorDesc().getDims()[1];
|
||||
|
||||
CNNLayerPtr fullyConnected = CNNNetworkHelper::getLayer(network, "fullyConnected");
|
||||
const size_t outputChannels = fullyConnected->outData[0]->getTensorDesc().getDims()[1];
|
||||
|
||||
// Const on activations
|
||||
//std::vector<float> lowValues(inputChannels, 1.0); // to have shifts
|
||||
//std::vector<float> highValues(inputChannels);
|
||||
//if (areScalesOnActivationsDifferent()) {
|
||||
// for (size_t inputChannel = 0; inputChannel < highValues.size(); ++inputChannel) {
|
||||
// highValues[inputChannel] = static_cast<float>(inputChannel);
|
||||
// }
|
||||
//}
|
||||
//else {
|
||||
// highValues = std::vector<float>(inputChannels, 255.f);
|
||||
//}
|
||||
|
||||
//std::vector<float> lowValues(inputChannels, 1.275f);
|
||||
//std::vector<float> highValues(inputChannels, 2.55f);
|
||||
|
||||
std::vector<float> lowValues(inputChannels, 127.5f);
|
||||
std::vector<float> highValues(inputChannels, 255.f);
|
||||
|
||||
fillData(getLayer(network, "dataInputLowConst"), lowValues, "custom");
|
||||
fillData(getLayer(network, "dataInputHighConst"), highValues, "custom");
|
||||
fillData(getLayer(network, "dataOutputLowConst"), lowValues, "custom");
|
||||
fillData(getLayer(network, "dataOutputHighConst"), highValues, "custom");
|
||||
|
||||
|
||||
const size_t fakeQuantizeInputChannel = outputChannels;
|
||||
|
||||
// Const on weights
|
||||
//std::vector<float> weights(
|
||||
// fakeQuantize->outData[0]->getTensorDesc().getDims()[2] *
|
||||
// fakeQuantize->outData[0]->getTensorDesc().getDims()[3] *
|
||||
// fullyConnected->outData[0]->getTensorDesc().getDims()[fullyConnected->outData[0]->getTensorDesc().getDims().size() == 2ul ? 1 : 2]);
|
||||
//for (size_t outputChannel = 0ul; outputChannel < outputChannels; ++outputChannel) {
|
||||
// for (size_t inputChannel = 0ul; inputChannel < fakeQuantizeInputChannel; ++inputChannel) {
|
||||
// weights[outputChannel * fakeQuantizeInputChannel + inputChannel] = inputChannel;
|
||||
// }
|
||||
//}
|
||||
|
||||
const std::vector<size_t> dims = fakeQuantize->outData[0]->getTensorDesc().getDims();
|
||||
// const size_t weightsSize = dims[2] * dims[3] * dims[dims.size() == 2ul ? 1 : 2];
|
||||
const size_t weightsSize = (dims[2] * dims[3]) * (dims[2] * dims[3]);
|
||||
std::vector<float> weights(weightsSize, 2.f);
|
||||
|
||||
fillData(getLayer(network, "weigthsConst"), weights, "custom");
|
||||
|
||||
fillData(getLayer(network, "weigthsInputLowConst"), -128.f, "custom");
|
||||
fillData(getLayer(network, "weigthsInputHighConst"), 127.f, "custom");
|
||||
fillData(getLayer(network, "weigthsOutputLowConst"), -128.f, "custom");
|
||||
fillData(getLayer(network, "weigthsOutputHighConst"), 127.f, "custom");
|
||||
|
||||
if (addBiasesLayer) {
|
||||
std::vector<float> biases(outputChannels);
|
||||
for (size_t i = 0ul; i < outputChannels; ++i) {
|
||||
biases[i] = static_cast<float>(i);
|
||||
}
|
||||
fillData(getLayer(network, "biasesConst"), biases, "custom");
|
||||
}
|
||||
}
|
||||
|
||||
bool FullyConnectedTestModel::areScalesOnActivationsDifferent() const {
|
||||
return false;
|
||||
}
|
@ -1007,6 +1007,23 @@ public:
|
||||
void initInput(Blob::Ptr input) const override;
|
||||
};
|
||||
|
||||
class FullyConnectedTestModel : public SingleLayerTestModel {
|
||||
public:
|
||||
FullyConnectedTestModel(const std::vector<size_t>& inputDimentions, const std::vector<size_t>& outputDimentions);
|
||||
std::string getName() const override;
|
||||
bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
|
||||
void initInput(Blob::Ptr input) const override;
|
||||
std::string getModel(SingleLayerTransformationsTestParams& p) const override;
|
||||
void resetTransformation(CNNNetwork& network) const override;
|
||||
protected:
|
||||
virtual bool areScalesOnActivationsDifferent() const;
|
||||
const bool addBiasesLayer;
|
||||
|
||||
private:
|
||||
const std::vector<size_t> inputDimentions;
|
||||
const std::vector<size_t> outputDimentions;
|
||||
};
|
||||
|
||||
class EltwiseTestModel : public SingleLayerTestModel {
|
||||
public:
|
||||
EltwiseTestModel(
|
||||
@ -1895,7 +1912,7 @@ class SingleLayerTransformationsTest : public TestsCommon, public WithParamInter
|
||||
std::unordered_map<std::string, InferenceEngine::Blob::Ptr> infer(
|
||||
CNNNetwork& network,
|
||||
std::unordered_map<std::string, Blob::Ptr>& inputBlobs,
|
||||
Core & plugin, const std::string & device_name,
|
||||
Core & plugin, const std::string & device_name,
|
||||
ExecutableNetwork & executableNetwork,
|
||||
InferRequest & inferRequest);
|
||||
|
||||
|
@ -210,7 +210,7 @@ void SingleLayerTransformationsTest::SetUp() {
|
||||
Core core;
|
||||
ExecutableNetwork executableNetwork;
|
||||
InferRequest inferRequest;
|
||||
const auto originalOutputMap = infer(network, inputBlobs, core,
|
||||
const auto originalOutputMap = infer(network, inputBlobs, core,
|
||||
p.device_name, executableNetwork, inferRequest);
|
||||
|
||||
const std::vector<bool> updatePrecisionsValues = { false };
|
||||
@ -228,6 +228,7 @@ void SingleLayerTransformationsTest::SetUp() {
|
||||
const std::vector<bool> updateBiasesValues = { true, false };
|
||||
const std::vector<bool> supportAsymmetricQuantizationValues = { true /*, false*/ };
|
||||
const std::vector<std::vector<Precision>> precisionOnActivationsValues = {
|
||||
// TODO: just to debug
|
||||
{ Precision::I8 },
|
||||
{ Precision::I8, Precision::U8 },
|
||||
{ Precision::U8 },
|
||||
@ -304,7 +305,7 @@ void SingleLayerTransformationsTest::SetUp() {
|
||||
|
||||
const float threshold = p.model->getThreshold(p.device_name, net_precision, param);
|
||||
const float zeroThreshold = p.model->getZeroThreshold();
|
||||
|
||||
|
||||
const auto outName = transformedOutput.find(name);
|
||||
if (outName == transformedOutput.end()) {
|
||||
THROW_IE_EXCEPTION << "Original output name " + name + " doesn't exist in transformed model";
|
||||
|
Loading…
Reference in New Issue
Block a user