From e544dd1e282994873fa8e243f32df7f5b2a5099e Mon Sep 17 00:00:00 2001
From: Edward Shogulin <edward.shogulin@intel.com>
Date: Sat, 30 May 2020 12:57:36 +0300
Subject: [PATCH] [IE COMMON] [LPT] Support 3D layout for FullyConnected
 transformation

---
 .../fully_connected.hpp                       |   1 +
 .../network_helper.hpp                        |   4 +-
 .../weightable_layer_transformation.hpp       |   1 +
 .../src/convolution.cpp                       |   2 +-
 .../src/fully_connected.cpp                   | 184 +++++++++-----
 .../src/network_helper.cpp                    |  46 +++-
 .../src/scaleshift_to_convolution.cpp         |   2 +-
 .../src/weightable_layer_transformation.cpp   |  69 +++--
 .../transformations/fc_bias_fusion_test.cpp   |   4 +-
 ...ecision_transformer_single_layer_tests.cpp |  35 ++-
 .../transformations/fully_connected_test.cpp  | 240 ++++++++++++++++++
 ...ecision_transformer_single_layer_tests.hpp |  19 +-
 .../single_layer_transformations_test.cpp     |   5 +-
 13 files changed, 501 insertions(+), 111 deletions(-)
 create mode 100644 inference-engine/tests_deprecated/functional/shared_tests/transformations/fully_connected_test.cpp
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/fully_connected.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/fully_connected.hpp
index 7a4989c3efb..f3d8db8c64e 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/fully_connected.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/fully_connected.hpp
@@ -18,6 +18,7 @@ class INFERENCE_ENGINE_API_CLASS(FullyConnectedTransformation) : public Weightab
 public:
     FullyConnectedTransformation(const Params& params) : WeightableLayerTransformation(params) {}
     ~FullyConnectedTransformation() override {};
+    bool canBeTransformed(const TransformationContext& context, const CNNLayer& layer) const override;
     void transform(TransformationContext& context, CNNLayer& layer) const override;
 
 private:
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
index 9d87a1dfdc5..14469d17290 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/network_helper.hpp
@@ -89,7 +89,9 @@ public:
 
     static Blob::Ptr getBlob(CNNLayerPtr layer, const std::string& blobName);
 
-    static Blob::Ptr getBlob(CNNLayer* layer, const std::string& blobName);
+    static Blob::Ptr getBlob(const CNNLayer* layer, const std::string& blobName);
+
+    static bool blobValuesAreEqual(const CNNLayer& layer, const std::string& blobName);
 
     static std::shared_ptr<float> getFloatData(const CNNLayerPtr& layer, const std::string& blobName);
 
diff --git a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp
index ee694221336..4ea4e2d9171 100644
--- a/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp
+++ b/inference-engine/src/low_precision_transformations/include/low_precision_transformations/weightable_layer_transformation.hpp
@@ -33,6 +33,7 @@ protected:
     void updateLayerBiases(
         TransformationContext& context,
         const CNNLayer& convolution,
+        const bool biasesDimsAsOutput,
         std::vector<float>& dequantizationScales,
         std::vector<float>& dequantizationShifts,
         std::vector<float>& biasesShifts) const;
diff --git a/inference-engine/src/low_precision_transformations/src/convolution.cpp b/inference-engine/src/low_precision_transformations/src/convolution.cpp
index 331847646d1..a2e4a18d262 100644
--- a/inference-engine/src/low_precision_transformations/src/convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/convolution.cpp
@@ -180,7 +180,7 @@ void ConvolutionTransformation::transform(TransformationContext& context, CNNLay
 
     if (this->updateBiases) {
         std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
-        updateLayerBiases(context, layer, dequantizationScales, dequantizationShifts, biasesShifts);
+        updateLayerBiases(context, layer, false, dequantizationScales, dequantizationShifts, biasesShifts);
     }
 
     CNNNetworkHelper::removeLayer(context.network, scaleShiftOnData);
diff --git a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp
index f83de891773..b015db379b9 100644
--- a/inference-engine/src/low_precision_transformations/src/fully_connected.cpp
+++ b/inference-engine/src/low_precision_transformations/src/fully_connected.cpp
@@ -25,8 +25,72 @@
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
 
-void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
+bool getDequantizationValuesAreBroadcasted(const CNNLayer& fullyConnected) {
+    const DataPtr inputData = fullyConnected.insData[0].lock();
+    if (inputData == nullptr) {
+        THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
+    }
+
+    return inputData->getDims().size() == 3ul;
+}
+
+bool FullyConnectedTransformation::canBeTransformed(const TransformationContext& context, const CNNLayer& fullyConnected) const {
     if (!WeightableLayerTransformation::canBeTransformed(context, fullyConnected)) {
+        return false;
+    }
+
+    const DataPtr inputData = fullyConnected.insData[0].lock();
+    if (inputData == nullptr) {
+        return false;
+    }
+
+    const std::vector<size_t> inTensorDims = inputData->getDims();
+    if ((inTensorDims.size() != 2) && (inTensorDims.size() != 3)) {
+        return false;
+    }
+
+    const DataPtr outputData = fullyConnected.outData[0];
+    if (outputData == nullptr) {
+        return false;
+    }
+
+    const std::vector<size_t> outTensorDims = outputData->getTensorDesc().getDims();
+    if (inTensorDims.size() != outTensorDims.size()) {
+        return false;
+    }
+
+    if (inTensorDims[0] != outTensorDims[0]) {
+        return false;
+    }
+
+    CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
+    if (scaleShift->type != "ScaleShift") {
+        return false;
+    }
+
+    // 3D tensor custom validation
+    if ((inTensorDims.size() == 3ul) &&
+        ((!CNNNetworkHelper::blobValuesAreEqual(*scaleShift, "weights")) || (!CNNNetworkHelper::blobValuesAreEqual(*scaleShift, "biases")))) {
+        return false;
+    }
+
+    const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
+    const size_t prevDequantizationScaleBlobSize = prevDequantizationScaleBlob->size();
+    if (prevDequantizationScaleBlobSize != inTensorDims[1]) {
+        return false;
+    }
+
+    const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
+    const size_t prevDequantizationShiftBlobSize = prevDequantizationShiftBlob->size();
+    if (prevDequantizationShiftBlobSize != inTensorDims[1]) {
+        return false;
+    }
+
+    return true;
+}
+
+void FullyConnectedTransformation::transform(TransformationContext& context, CNNLayer& fullyConnected) const {
+    if (!canBeTransformed(context, fullyConnected)) {
         return;
     }
 
@@ -146,7 +210,7 @@ void FullyConnectedTransformation::transform(TransformationContext& context, CNN
     }
 
     if (this->updateBiases) {
-        updateLayerBiases(context, fullyConnected, dequantizationScales, dequantizationShifts, biasesShifts);
+        updateLayerBiases(context, fullyConnected, fullyConnected.type == "GEMM", dequantizationScales, dequantizationShifts, biasesShifts);
     }
 
     if ((parentOnWeights != nullptr) && (parentOnWeights->type == "FakeQuantize")) {
@@ -205,23 +269,14 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
 
     const DataPtr inputData = fullyConnected.insData[0].lock();
     if (inputData == nullptr) {
-        THROW_IE_EXCEPTION << "input data is absent for layer " << fullyConnected.name;
-    }
-    const Layout inputLayout = inputData->getLayout();
-    if (inputLayout != Layout::NC) {
-        THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
+        THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
     }
 
-    const DataPtr insData = fullyConnected.insData[0].lock();
-    if (insData == nullptr) {
-        THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
+    const DataPtr outputData = fullyConnected.outData[0];
+    if (outputData == nullptr) {
+        THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
     }
-    const size_t inputChannelsCount = insData->getDims()[1];
 
-    const Layout outputLayout = fullyConnected.outData[0]->getLayout();
-    if (outputLayout != Layout::NC) {
-        THROW_IE_EXCEPTION << "Unexpected output layout " << outputLayout;
-    }
     const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
     dequantizationScales.resize(outputChannelsCount);
     dequantizationShifts.resize(outputChannelsCount);
@@ -232,38 +287,45 @@ void FullyConnectedTransformation::calculateDequantizationForSymmetric(
         THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
     }
 
-    const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
-    const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(prevDequantizationScaleBlob);
-    const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
-    const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(prevDequantizationShiftBlob);
+    const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
+    const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
 
     const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
     const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
     const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
     const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
 
-    const float prevDequantizationScale = prevDequantizationScaleBuffer.get()[0];
+    const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
     for (size_t i = 0; i < outputChannelsCount; ++i) {
-        dequantizationScales[i] = prevDequantizationScale *
+        dequantizationScales[i] =
+            (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
             (originalWeightsDequantizationScales.size() == 0 ?
                 1.0 :
                 (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
     }
 
+    const DataPtr insData = fullyConnected.insData[0].lock();
+    if (insData == nullptr) {
+        THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data ia absent";
+    }
+    const size_t inputChannelsCount = insData->getDims().size() == 3ul ? insData->getDims()[2] : insData->getDims()[1];
     for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
         float sum = 0.0;
         const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
             1.0 :
-            (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
+            ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
 
         for (size_t inputChannel = 0; inputChannel < inputChannelsCount; ++inputChannel) {
             const float w = weightsBuffer.get()[channel * inputChannelsCount + inputChannel];
-            sum += w * prevDequantizationShiftBuffer.get()[inputChannel] * weightsDequantizationScale;
+            const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[inputChannel];
+            sum += w * shift * weightsDequantizationScale;
         }
 
         dequantizationShifts[channel] = biasesBuffer == nullptr ?
             sum :
-            (sum + biasesBuffer.get()[channel] - prevDequantizationScale * biasesBuffer.get()[channel] * weightsDequantizationScale);
+            (sum + biasesBuffer.get()[channel] -
+                (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
+                biasesBuffer.get()[channel] * weightsDequantizationScale);
         biasesShifts[channel] = sum;
     }
 }
@@ -276,69 +338,63 @@ void FullyConnectedTransformation::calculateDequantizationForAsymmetric(
     std::vector<float>& dequantizationShifts) const {
     const DataPtr inputData = fullyConnected.insData[0].lock();
     if (inputData == nullptr) {
-        THROW_IE_EXCEPTION << "input data is absent for layer " << fullyConnected.name;
+        THROW_IE_LPT_EXCEPTION(fullyConnected) << "input data is absent";
     }
-    const Layout inputLayout = inputData->getLayout();
-    if (inputLayout != Layout::NC) {
-        THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
-    }
-    const DataPtr insData = fullyConnected.insData[0].lock();
-    if (insData == nullptr) {
-        THROW_IE_LPT_EXCEPTION(fullyConnected) << "insert data is absent";
-    }
-    const size_t inputChannelsCount = insData->getDims()[1];
+    // const Layout inputLayout = inputData->getLayout();
+    // if (inputLayout != Layout::NC) {
+    //     THROW_IE_EXCEPTION << "Unexpected input layout " << inputLayout;
+    // }
+    const size_t inputChannelsCount = inputData->getDims()[1];
 
-    const Layout outputLayout = fullyConnected.outData[0]->getLayout();
-    if (outputLayout != Layout::NC) {
-        THROW_IE_EXCEPTION << "Unexpected output layout " << outputLayout;
+    const DataPtr outputData = fullyConnected.outData[0];
+    if (outputData == nullptr) {
+        THROW_IE_LPT_EXCEPTION(fullyConnected) << "output data is absent";
     }
-    const size_t outputChannelsCount = fullyConnected.outData[0]->getDims()[1];
-    dequantizationScales.resize(outputChannelsCount);
-    dequantizationShifts.resize(outputChannelsCount);
 
     CNNLayerPtr scaleShift = CNNNetworkHelper::getParent(fullyConnected);
     if (scaleShift->type != "ScaleShift") {
         THROW_IE_EXCEPTION << "Unexpected layer type to calculate quantization values " << scaleShift->type;
     }
 
-    const Blob::Ptr prevDequantizationScaleBlob = CNNNetworkHelper::getBlob(scaleShift, "weights");
-    const auto prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(prevDequantizationScaleBlob);
-    const Blob::Ptr prevDequantizationShiftBlob = CNNNetworkHelper::getBlob(scaleShift, "biases");
-    const auto prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(prevDequantizationShiftBlob);
+    const bool dequantizationValuesAreBroadcasted = getDequantizationValuesAreBroadcasted(fullyConnected);
 
-    const Blob::Ptr weightsBlob = CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues);
-    const auto weightsBuffer = CNNNetworkHelper::getFloatData(weightsBlob);
-    const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
-    const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(biasesBlob);
+    const size_t outputChannelsCount = outputData->getDims()[1];
+    dequantizationScales.resize(outputChannelsCount);
+    dequantizationShifts.resize(outputChannelsCount);
 
+    const std::shared_ptr<float> prevDequantizationScaleBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "weights"));
     for (size_t i = 0; i < outputChannelsCount; ++i) {
         dequantizationScales[i] =
-            prevDequantizationScaleBuffer.get()[0] *
-            (originalWeightsDequantizationScales.size() == 0
-                 ? 1.0
-                 : (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0]
-                                                                    : originalWeightsDequantizationScales[i]));
+            (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[i]) *
+            (originalWeightsDequantizationScales.size() == 0 ?
+                1.0 :
+                (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[i]));
     }
 
+    const auto weightsBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getWeights(fullyConnected, roundQuantizedValues));
+    const Blob::Ptr biasesBlob = CNNNetworkHelper::getBiases(fullyConnected);
+    const auto biasesBuffer = biasesBlob == nullptr ? nullptr : CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBiases(fullyConnected));
+
+    const std::shared_ptr<float> prevDequantizationShiftBuffer = CNNNetworkHelper::getFloatData(CNNNetworkHelper::getBlob(scaleShift, "biases"));
+
     for (size_t channel = 0lu; channel < outputChannelsCount; ++channel) {
         float sum1 = 0.0;
         float sum2 = 0.0;
-        const float weightsDequantizationScale =
-            originalWeightsDequantizationScales.size() == 0
-                ? 1.0
-                : (originalWeightsDequantizationScales.size() == 1 ? originalWeightsDequantizationScales[0]
-                                                                   : originalWeightsDequantizationScales[channel]);
+        const float weightsDequantizationScale = originalWeightsDequantizationScales.size() == 0 ?
+            1.0 :
+            ((originalWeightsDequantizationScales.size() == 1) ? originalWeightsDequantizationScales[0] : originalWeightsDequantizationScales[channel]);
 
         for (size_t w = 0; w < inputChannelsCount; ++w) {
             const float kernel = weightsBuffer.get()[channel * inputChannelsCount + w];
-            sum1 += kernel * prevDequantizationShiftBuffer.get()[channel] * weightsDequantizationScale;
+            const float shift = dequantizationValuesAreBroadcasted ? prevDequantizationShiftBuffer.get()[0] : prevDequantizationShiftBuffer.get()[channel];
+            sum1 += kernel * shift * weightsDequantizationScale;
             sum2 += kernel * dataZeroPoints[w] * weightsDequantizationScale;
         }
 
-        dequantizationShifts[channel] = biasesBuffer == nullptr
-                                            ? sum1
-                                            : (sum1 + biasesBuffer.get()[channel] -
-                                               prevDequantizationScaleBuffer.get()[channel] *
-                                                   biasesBuffer.get()[channel] * weightsDequantizationScale);
+        dequantizationShifts[channel] = biasesBuffer == nullptr ?
+            sum1 :
+            (sum1 + biasesBuffer.get()[channel] -
+                (dequantizationValuesAreBroadcasted ? prevDequantizationScaleBuffer.get()[0] : prevDequantizationScaleBuffer.get()[channel]) *
+                biasesBuffer.get()[channel] * weightsDequantizationScale);
     }
 }
diff --git a/inference-engine/src/low_precision_transformations/src/network_helper.cpp b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
index 06347ecb4b3..29502449715 100644
--- a/inference-engine/src/low_precision_transformations/src/network_helper.cpp
+++ b/inference-engine/src/low_precision_transformations/src/network_helper.cpp
@@ -582,18 +582,42 @@ std::vector<CNNLayerPtr> CNNNetworkHelper::getLayers(const CNNLayer& parent, con
     return layers;
 }
 
-Blob::Ptr CNNNetworkHelper::getBlob(CNNLayer* layer, const std::string& blobName) {
+Blob::Ptr CNNNetworkHelper::getBlob(const CNNLayer* layer, const std::string& blobName) {
     if (layer == nullptr) {
         THROW_IE_EXCEPTION << "layer is nullable";
     }
-    if (layer->blobs.empty()) {
-        THROW_IE_EXCEPTION << "Layer '" << layer->name << "' does not have any blob";
+
+    if (blobName.empty()) {
+        if (layer->blobs.empty()) {
+            THROW_IE_LPT_EXCEPTION(*layer) << "does not have any blob";
+        }
+
+        if (layer->blobs.size() != 1) {
+            THROW_IE_LPT_EXCEPTION(*layer) << "there are several blobs";
+        }
+        return layer->blobs.begin()->second;
     }
-    if (blobName.empty() && (layer->blobs.size() != 1)) {
-        THROW_IE_EXCEPTION << "several blobs";
+
+    const auto it = layer->blobs.find(blobName);
+    if (it == layer->blobs.end()) {
+        THROW_IE_LPT_EXCEPTION(*layer) << " does not have blob " << blobName;
     }
-    Blob::Ptr blob = blobName.empty() ? layer->blobs.begin()->second : layer->blobs[blobName];
-    return blob;
+
+    return it->second;
+}
+
+bool CNNNetworkHelper::blobValuesAreEqual(const CNNLayer& layer, const std::string& blobName) {
+    const Blob::Ptr blob = CNNNetworkHelper::getBlob(&layer, blobName);
+    const std::shared_ptr<float> buffer = CNNNetworkHelper::getFloatData(blob);
+    if (!std::equal(
+        buffer.get(),
+        buffer.get() + blob->size(),
+        buffer.get(),
+        [](const float value1, const float value2) { return value1 == value2; })) {
+        return false;
+    }
+
+    return true;
 }
 
 Blob::Ptr CNNNetworkHelper::getBlob(CNNLayerPtr layer, const std::string& blobName) {
@@ -1086,8 +1110,12 @@ CNNLayerPtr CNNNetworkHelper::addScaleShiftBetween(TransformationContext& contex
     CNNLayerPtr ssCnnLayer(new ScaleShiftLayer(ssCnnLayerParams));
 
     const std::vector<size_t> dims = outData->getDims();
-    if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
-        THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
+
+    // TODO: just to test
+    if ((dims.size() != 2ul) || ((dims.size() == 2ul) && (dims[0] != dequantizationDetails.channelsCount))) {
+        if ((dims.size() > 1) && (dims[1] != dequantizationDetails.channelsCount)) {
+            THROW_IE_EXCEPTION << "unexpected parent channels count " << dims[1];
+        }
     }
     addLayerToCNNNetworkAfterData(outData, ssCnnLayer, child != nullptr ? child->name : "", context.network);
 
diff --git a/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp b/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp
index 0d9e5faf827..a2aa5ddc356 100644
--- a/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp
+++ b/inference-engine/src/low_precision_transformations/src/scaleshift_to_convolution.cpp
@@ -115,7 +115,7 @@ void ScaleShiftToConvolutionTransformation::transform(TransformationContext& con
 
         if (this->updateBiases) {
             std::vector<float> biasesShifts(dequantizationShifts.size(), 0.f);
-            updateLayerBiases(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts, biasesShifts);
+            updateLayerBiases(context, *convolutionLayerPtr, false, dequantizationScales, dequantizationShifts, biasesShifts);
         }
 
         addDequantizationLayer(context, *convolutionLayerPtr, dequantizationScales, dequantizationShifts);
diff --git a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
index 91473def7e3..ce8a3f3b044 100644
--- a/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
+++ b/inference-engine/src/low_precision_transformations/src/weightable_layer_transformation.cpp
@@ -3,7 +3,6 @@
 //
 
 #include "low_precision_transformations/weightable_layer_transformation.hpp"
-#include "low_precision_transformations/network_helper.hpp"
 
 #include <algorithm>
 #include <details/caseless.hpp>
@@ -11,6 +10,9 @@
 #include <string>
 #include <vector>
 
+#include "low_precision_transformations/common/ie_lpt_exception.hpp"
+#include "low_precision_transformations/network_helper.hpp"
+
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
 
@@ -123,50 +125,72 @@ bool WeightableLayerTransformation::isPrecisionPreserved(const CNNLayer& layer)
     return false;
 }
 
+
 void WeightableLayerTransformation::updateLayerBiases(
     TransformationContext& context,
-    const CNNLayer& convolution,
+    const CNNLayer& weightableLayer,
+    const bool biasesDimsAsOutput,
     std::vector<float>& dequantizationScales,
     std::vector<float>& dequantizationShifts,
     std::vector<float>& biasesShifts) const {
     if (!std::all_of(dequantizationShifts.begin(), dequantizationShifts.end(), [](float value) { return value == 0.0; })) {
+        const DataPtr insData = weightableLayer.insData[0].lock();
+        if (insData == nullptr) {
+            THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
+        }
+        const std::vector<size_t> insDataDims = insData->getTensorDesc().getDims();
+
         std::shared_ptr<float> biasesBufferPtr;
         Blob::Ptr biasesBlob;
-        CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(convolution, 2);
+        CNNLayerPtr biasesLayer = CNNNetworkHelper::getParent(weightableLayer, 2);
         if (biasesLayer == nullptr) {
-            const std::vector<size_t> dims = CaselessEq<std::string>()(convolution.type, "Convolution") ?
-                std::vector<size_t>({ dequantizationShifts.size() }) :
-                std::vector<size_t>({ 1ul, dequantizationShifts.size() });
-            const Layout layout = CaselessEq<std::string>()(convolution.type, "Convolution") ? Layout::C : Layout::NC;
+            if (weightableLayer.outData.size() != 1ul) {
+                THROW_IE_LPT_EXCEPTION(weightableLayer) << "unexpected output data count " << weightableLayer.outData.size();
+            }
+            const DataPtr outData = weightableLayer.outData[0];
+            const std::vector<size_t> biasesDims = biasesDimsAsOutput ?
+                outData->getDims() :
+                std::vector<size_t>({ insDataDims.size() == 3ul ? insDataDims[2] : dequantizationShifts.size() });
+            const Layout biasesLayout = InferenceEngine::TensorDesc::getLayoutByDims(biasesDims);
 
-            biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, dims, layout));
+            biasesBlob = CNNNetworkHelper::makeNewBlobPtr(TensorDesc(Precision::FP32, biasesDims, biasesLayout));
             biasesBlob->allocate();
 
             biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
             float* biasesBuffer = biasesBufferPtr.get();
             std::fill(biasesBuffer, biasesBuffer + biasesBlob->size(), 0.f);
 
-            LayerParams constLayerParams{ convolution.name + "_Biases", "Const", convolution.outData[0]->getTensorDesc().getPrecision() };
+            LayerParams biasesLayerParams{ weightableLayer.name + "_Biases", "Const", outData->getTensorDesc().getPrecision() };
             biasesLayer = CNNNetworkHelper::addLayer(
                 context,
                 nullptr,
-                std::make_shared<CNNLayer>(convolution),
-                std::make_shared<CNNLayer>(constLayerParams));
+                std::make_shared<CNNLayer>(weightableLayer),
+                std::make_shared<CNNLayer>(biasesLayerParams));
             biasesLayer->blobs["custom"] = biasesBlob;
-            biasesLayer->outData[0]->reshape(dims, layout);
+            biasesLayer->outData[0]->reshape(biasesDims, biasesLayout);
         } else {
             biasesBlob = CNNNetworkHelper::getBlob(biasesLayer, "custom");
-            if (biasesBlob->size() != dequantizationShifts.size()) {
-                THROW_IE_EXCEPTION << "dequantization shifts size " << dequantizationShifts.size() << " is not equal biases blob size " << biasesBlob->size();
+            DataPtr insData = weightableLayer.insData[0].lock();
+            if (insData == nullptr) {
+                THROW_IE_LPT_EXCEPTION(weightableLayer) << "input data is absent";
+            }
+
+            if ((insData->getDims().size() != 3) && (biasesBlob->size() != dequantizationShifts.size())) {
+                THROW_IE_LPT_EXCEPTION(weightableLayer) <<
+                    "dequantization shifts size " << dequantizationShifts.size() <<
+                    " is not equal biases blob size " << biasesBlob->size();
             }
             biasesBufferPtr = CNNNetworkHelper::getFloatData(biasesBlob);
         }
         const float* biasesBuffer = biasesBufferPtr.get();
         std::vector<float> biases(biasesBlob->size());
+        const bool broadcast = insDataDims.size() == 3ul;
         for (size_t channel = 0ul; channel < biases.size(); ++channel) {
-            biases[channel] = (biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
-            dequantizationShifts[channel] = 0.0;
+            biases[channel] = broadcast ?
+                (biasesShifts[0] + biasesBuffer[0]) / dequantizationScales[0] :
+                (biasesShifts[channel] + biasesBuffer[channel]) / dequantizationScales[channel];
         }
+        std::fill(dequantizationShifts.begin(), dequantizationShifts.end(), 0.f);
         CNNNetworkHelper::updateBlobs(*biasesLayer, "custom", biases);
     }
 }
@@ -287,10 +311,9 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
         THROW_IE_EXCEPTION << "insert data is absent for layer " << child.name;
     }
 
-    if (insData->getTensorDesc().getLayout() != Layout::NC &&
-        insData->getTensorDesc().getLayout() != Layout::NCHW &&
-        insData->getTensorDesc().getLayout() != Layout::NCDHW) {
-        THROW_IE_EXCEPTION << "unexpected layout '" << insData->getTensorDesc().getLayout() << "' layer " << child.name;
+    const size_t dimsSize = insData->getDims().size();
+    if ((dimsSize != 2ul) && (dimsSize != 3ul) && (dimsSize != 4ul) && (dimsSize != 5ul)) {
+        THROW_IE_EXCEPTION << "unexpected dimensions size " << dimsSize << " layer " << child.type << " " << child.name;
     }
 
     LayerParams eltwiseLayerParams {child.name + "_Sub_" + parent.name, "Eltwise", precisionsInfo.original};
@@ -312,15 +335,15 @@ void WeightableLayerTransformation::createAsymmetric(TransformationContext& cont
     }
 
     const TensorDesc constTensorDesc = constLayer->outData[0]->getTensorDesc();
-    if (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout()) {
+    if ((dimsSize != 3) && (constTensorDesc.getLayout() != insData->getTensorDesc().getLayout())) {
         THROW_IE_EXCEPTION << "unexpected Const layer layout " << constTensorDesc.getLayout();
     }
     const SizeVector& constDims = constTensorDesc.getDims();
-    if (constDims.size() != insData->getTensorDesc().getDims().size()) {
+    if ((dimsSize != 3) && (constDims.size() != insData->getTensorDesc().getDims().size())) {
         THROW_IE_EXCEPTION << "unexpected dimension size " << constDims.size();
     }
 
-    SizeVector dims(insData->getTensorDesc().getDims().size(), 1);
+    SizeVector dims(constLayer->outData[0]->getTensorDesc().getDims().size(), 1);
     if (onWeights) {
         dims[0] = constDims[0];
     } else {
diff --git a/inference-engine/tests/functional/inference_engine/transformations/fc_bias_fusion_test.cpp b/inference-engine/tests/functional/inference_engine/transformations/fc_bias_fusion_test.cpp
index f9d3fd6898c..9bfdc2a27f9 100644
--- a/inference-engine/tests/functional/inference_engine/transformations/fc_bias_fusion_test.cpp
+++ b/inference-engine/tests/functional/inference_engine/transformations/fc_bias_fusion_test.cpp
@@ -32,7 +32,7 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest3D) {
         auto empty_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {0});
         auto fc = std::make_shared<ngraph::op::FullyConnected>(input1, weights, empty_bias, ngraph::Shape{1, 128, 786});
 
-        auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{1, 1, 786}, {1});
+        auto const_bias = ngraph::opset1::Constant::create(ngraph::element::f32, ngraph::Shape{786}, {1});
         auto add = std::make_shared<ngraph::opset1::Add>(fc, const_bias);
 
         f = std::make_shared<ngraph::Function>(ngraph::NodeVector{add}, ngraph::ParameterVector{input1});
@@ -84,4 +84,4 @@ TEST(TransformationTests, FullyConnectedBiasFusionTest2D) {
 
     auto res = compare_functions(f, f_ref);
     ASSERT_TRUE(res.first) << res.second;
-}
\ No newline at end of file
+}
diff --git a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
index 5819f382548..5cff70d15b5 100644
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
@@ -35,7 +35,7 @@ INSTANTIATE_TEST_CASE_P(
             PowerTestModel::Ptr(new PowerTestModel(1.f, -32.f, 0)),
             { { 1, 3, 299, 299 } },
             { { 1, 3, 299, 299 } }),
-            
+
         SingleLayerTransformationsTestParams(
             "CPU",
             PowerTestModel::Ptr(new PowerTestModel(1.f, 1.f, -64.f)),
@@ -60,6 +60,19 @@ INSTANTIATE_TEST_CASE_P(
             { { 1, 2048 } },
             { { 1, 1000 } }),
 
+        // TODO: uncomment later
+        //SingleLayerTransformationsTestParams(
+        //    "MKLDNNPlugin",
+        //    SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 128, 768 })),
+        //    { { 1, 128, 12, 64 } },
+        //    { { 128, 768 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new FullyConnectedTestModel({ 1, 128, 12, 64 }, { 1, 128, 768 })),
+            { { 1, 128, 12, 64 } },
+            { { 1, 128, 768 } }),
+
         SingleLayerTransformationsTestParams(
             "CPU",
             SingleLayerTestModel::Ptr(new ConvolutionAndQuantizeOnSignedActivationsAndWeightsPositiveTestModel()),
@@ -512,13 +525,21 @@ INSTANTIATE_TEST_CASE_P(
             "CPU",
             SingleLayerTestModel::Ptr(new UpdateBiasesConvolutionTestModel(true)),
             { { 1, 32, 112, 112 } },
-            { { 1, 32, 112, 112 } }),
+            { { 1, 32, 112, 112 } })
 
-        SingleLayerTransformationsTestParams(
-            "CPU",
-            SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
-            { { 1, 32, 112, 112 } },
-            { { 1, 100 } })
+        // TODO: uncomment later
+        //SingleLayerTransformationsTestParams(
+        //    "CPU",
+        //    SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(false)),
+        //    { { 1, 32, 112, 112 } },
+        //    { { 1, 100 } }),
+
+        // TODO: uncomment later
+        //SingleLayerTransformationsTestParams(
+        //    "CPU",
+        //    SingleLayerTestModel::Ptr(new UpdateBiasesFullyConnectedTestModel(true)),
+        //    { { 1, 32, 112, 112 } },
+        //    { { 1, 100 } })
     ),
     SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);
 
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/fully_connected_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/fully_connected_test.cpp
new file mode 100644
index 00000000000..69c2843e09c
--- /dev/null
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/fully_connected_test.cpp
@@ -0,0 +1,240 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformer_single_layer_tests.hpp"
+#include "low_precision_transformations/fake_quantize.hpp"
+#include "low_precision_transformations/convolution.hpp"
+#include "low_precision_transformations/fully_connected.hpp"
+#include "low_precision_transformations/scaleshift_to_convolution.hpp"
+
+FullyConnectedTestModel::FullyConnectedTestModel(
+    const std::vector<size_t>& inputDimentions,
+    const std::vector<size_t>& outputDimentions) :
+    addBiasesLayer(false),
+    inputDimentions(inputDimentions),
+    outputDimentions(outputDimentions) {}
+
+std::string FullyConnectedTestModel::getName() const {
+    return std::string("FullyConnectedTestModel") +
+        (addBiasesLayer ? "WithBiases" : "") +
+        "_D" + std::to_string(inputDimentions.size()) +
+        "_D" + std::to_string(outputDimentions.size());
+}
+
+void FullyConnectedTestModel::initInput(Blob::Ptr input) const {
+    fillDataWithInitValue(input, -1.f);
+}
+
+bool FullyConnectedTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
+    params.updatePrecisions = true;
+
+    // TODO: use getLowPrecisionTransformer(params) instead
+    LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params).
+        add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected").
+        add<ConvolutionTransformation>(LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }), "Convolution").
+        addCleanup<ScaleShiftToConvolutionTransformation>(
+            LayerTransformation::Params(params).setPrecisionsOnActivations({ Precision::U8 }),
+            "ScaleShift"));
+
+    // network.serialize("c:\\Projects\\temp\\fully_connected.original.xml", "c:\\Projects\\temp\\fully_connected.original.bin");
+    transformer.transform(network);
+    // network.serialize("c:\\Projects\\temp\\fully_connected.transformed.xml", "c:\\Projects\\temp\\fully_connected.transformed.bin");
+
+    if (params.quantizeOutputs) {
+        const CNNLayerPtr dequantizationLayer = getLayer(network, "fullyConnected");
+        if (dequantizationLayer->type != "ScaleShift") {
+            THROW_IE_EXCEPTION << "was not quantized";
+        }
+
+        const Blob::Ptr biases = CNNNetworkHelper::getBiases(*dequantizationLayer);
+        const std::shared_ptr<float> biasesData = CNNNetworkHelper::getFloatData(biases);
+        if (params.updateBiases) {
+            for (size_t i = 0ul; i < biases->size(); ++i) {
+                if (biasesData.get()[i] != 0.f) {
+                    THROW_IE_EXCEPTION << "biases value is not zero";
+                }
+            }
+        } else {
+            // FakeQuantize layer has to have shift
+            for (size_t i = 0ul; i < biases->size(); ++i) {
+                if (biasesData.get()[i] == 0.f) {
+                    THROW_IE_EXCEPTION << "biases value is zero";
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+std::string FullyConnectedTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
+    size_t type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type);
+    if (p._network_precision == "FP16")
+        type_size = sizeof(InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP16>::value_type);
+
+    const size_t inputChannelsCount = p.inputDimensions[0][1];
+    const size_t outputChannelsCount = p.outputDimensions[0][1];
+    std::vector<size_t> weightsConstInputDims = {
+        p.inputDimensions[0][2] * p.inputDimensions[0][3],
+        p.outputDimensions[0][p.outputDimensions[0].size() == 2ul ? 1ul : 2ul] };
+
+    std::map<std::string, std::string> const_params = {};
+    std::map<std::string, std::string> fake_quantize_params = { {"levels", "256"} };
+    std::map<std::string, std::string> fake_quantize_params2 = { {"levels", "255"} };
+    std::map<std::string, std::string> power_params = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
+    std::map<std::string, std::string> poolingParams = { {"kernel", "112,112"}, {"pool-method", "max"} };
+    std::map<std::string, std::string> reshapeParams = { };
+    std::map<std::string, std::string> fullyConnectedParams = { {"out-size", std::to_string(p.outputDimensions[0][1])} };
+
+    std::vector<size_t> biasesConstDims = { p.outputDimensions[0][1] };
+
+    const std::vector<std::vector<size_t>> convolutionDims = addBiasesLayer ?
+        std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims, biasesConstDims }) :
+        std::vector<std::vector<size_t>>({ p.inputDimensions[0], weightsConstInputDims });
+
+    std::vector<std::pair<std::string, std::string>> edges = {
+        {"0,0", "1,1"}, {"1,2", "6,7"}, // Power
+        {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
+        {"6,12", "7,13"},  // FakeQuantize to Pooling
+        {"7,14", "8,15"},  // Pooling to Reshape
+        {"8,16", "15,28"},  // Reshape to FullyConnected
+        {"9,17", "14,22"}, {"10,18", "14,23"}, {"11,19", "14,24"}, {"12,20", "14,25"}, {"13,21", "14,26"}, // Const layers
+        {"14,27", "15,29"}
+    };
+
+    if (addBiasesLayer) {
+        edges.push_back({ "16,32", "15,30" }); // biases to Conv
+    }
+
+    const std::vector<std::vector<size_t>> fullyConnectedDims = addBiasesLayer ?
+        std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims, biasesConstDims }) :
+        std::vector<std::vector<size_t>>({ p.outputDimensions[0], weightsConstInputDims });
+
+    std::vector<size_t> quantizationParamsDims(p.inputDimensions[0].size(), 1);
+    quantizationParamsDims[1] = inputChannelsCount;
+
+    const std::vector<size_t> reshape1OuputDims = { p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] };
+    const std::vector<size_t> reshape2OuputDims = p.outputDimensions[0].size() == 2ul ?
+        std::vector<size_t>({ p.inputDimensions[0][0] * p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] }) :
+        std::vector<size_t>({ p.inputDimensions[0][0], p.inputDimensions[0][1], p.inputDimensions[0][2] * p.inputDimensions[0][3] });
+
+    CommonTestUtils::DefaultNetBuilder builder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput(
+        "FullyConnectedTestModel", p.inputDimensions[0], p._network_precision)
+        // 1
+        .addLayer("Power", p._network_precision, &power_params, { {p.inputDimensions[0]}, {p.inputDimensions[0]} })
+        // 2
+        .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputLowConst")
+        // 3
+        .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataInputHighConst")
+        // 4
+        .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputLowConst")
+        // 5
+        .addLayer("Const", p._network_precision, &const_params, { {}, {quantizationParamsDims} }, inputChannelsCount * type_size, "dataOutputHighConst")
+        // 6
+        .addLayer("FakeQuantize",
+            p._network_precision,
+            &fake_quantize_params,
+            { {p.inputDimensions[0], quantizationParamsDims, quantizationParamsDims, quantizationParamsDims, quantizationParamsDims}, {{p.inputDimensions[0]}} },
+            "fakeQuantize")
+        // 7
+        .addLayer("Reshape", p._network_precision, &reshapeParams, { { p.inputDimensions[0] }, { reshape1OuputDims } }, "reshape1")
+        // 8
+        .addLayer("Reshape", p._network_precision, &reshapeParams, { {{ reshape1OuputDims }}, { reshape2OuputDims } }, "reshape2")
+        // 9
+        .addLayer("Const", p._network_precision, &const_params, { {}, {weightsConstInputDims} },
+            std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size, "weigthsConst")
+        // 10
+        .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputLowConst")
+        // 11
+        .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsInputHighConst")
+        // 12
+        .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputLowConst")
+        // 13
+        .addLayer("Const", p._network_precision, &const_params, { {}, {{1}} }, type_size, "weigthsOutputHighConst")
+        // 14
+        .addLayer(
+            "FakeQuantize",
+            p._network_precision,
+            &fake_quantize_params,
+            { {weightsConstInputDims, {1}, {1}, {1}, {1}}, {{weightsConstInputDims}} },
+            "fakeQuantizeOnWeights")
+        // 15
+        .addLayer("FullyConnected", p._network_precision, &fullyConnectedParams, { fullyConnectedDims, {p.outputDimensions[0]} }, "fullyConnected");
+
+    if (addBiasesLayer) {
+        // 16
+        builder.addLayer("Const", p._network_precision, &const_params, { {}, {biasesConstDims} }, type_size * biasesConstDims[0], "biasesConst");
+    }
+
+    return builder.finish(&edges);
+}
+
+void FullyConnectedTestModel::resetTransformation(CNNNetwork& network) const {
+    CNNLayerPtr fakeQuantize = CNNNetworkHelper::getLayer(network, "fakeQuantize");
+    const size_t inputChannels = fakeQuantize->outData[0]->getTensorDesc().getDims()[1];
+
+    CNNLayerPtr fullyConnected = CNNNetworkHelper::getLayer(network, "fullyConnected");
+    const size_t outputChannels = fullyConnected->outData[0]->getTensorDesc().getDims()[1];
+
+    // Const on activations
+    //std::vector<float> lowValues(inputChannels, 1.0);  // to have shifts
+    //std::vector<float> highValues(inputChannels);
+    //if (areScalesOnActivationsDifferent()) {
+    //    for (size_t inputChannel = 0; inputChannel < highValues.size(); ++inputChannel) {
+    //        highValues[inputChannel] = static_cast<float>(inputChannel);
+    //    }
+    //}
+    //else {
+    //    highValues = std::vector<float>(inputChannels, 255.f);
+    //}
+
+    //std::vector<float> lowValues(inputChannels, 1.275f);
+    //std::vector<float> highValues(inputChannels, 2.55f);
+
+    std::vector<float> lowValues(inputChannels, 127.5f);
+    std::vector<float> highValues(inputChannels, 255.f);
+
+    fillData(getLayer(network, "dataInputLowConst"), lowValues, "custom");
+    fillData(getLayer(network, "dataInputHighConst"), highValues, "custom");
+    fillData(getLayer(network, "dataOutputLowConst"), lowValues, "custom");
+    fillData(getLayer(network, "dataOutputHighConst"), highValues, "custom");
+
+
+    const size_t fakeQuantizeInputChannel = outputChannels;
+
+    // Const on weights
+    //std::vector<float> weights(
+    //    fakeQuantize->outData[0]->getTensorDesc().getDims()[2] *
+    //    fakeQuantize->outData[0]->getTensorDesc().getDims()[3] *
+    //    fullyConnected->outData[0]->getTensorDesc().getDims()[fullyConnected->outData[0]->getTensorDesc().getDims().size() == 2ul ? 1 : 2]);
+    //for (size_t outputChannel = 0ul; outputChannel < outputChannels; ++outputChannel) {
+    //    for (size_t inputChannel = 0ul; inputChannel < fakeQuantizeInputChannel; ++inputChannel) {
+    //        weights[outputChannel * fakeQuantizeInputChannel + inputChannel] = inputChannel;
+    //    }
+    //}
+
+    const std::vector<size_t> dims = fakeQuantize->outData[0]->getTensorDesc().getDims();
+    // const size_t weightsSize = dims[2] * dims[3] * dims[dims.size() == 2ul ? 1 : 2];
+    const size_t weightsSize = (dims[2] * dims[3]) * (dims[2] * dims[3]);
+    std::vector<float> weights(weightsSize, 2.f);
+
+    fillData(getLayer(network, "weigthsConst"), weights, "custom");
+
+    fillData(getLayer(network, "weigthsInputLowConst"), -128.f, "custom");
+    fillData(getLayer(network, "weigthsInputHighConst"), 127.f, "custom");
+    fillData(getLayer(network, "weigthsOutputLowConst"), -128.f, "custom");
+    fillData(getLayer(network, "weigthsOutputHighConst"), 127.f, "custom");
+
+    if (addBiasesLayer) {
+        std::vector<float> biases(outputChannels);
+        for (size_t i = 0ul; i < outputChannels; ++i) {
+            biases[i] = static_cast<float>(i);
+        }
+        fillData(getLayer(network, "biasesConst"), biases, "custom");
+    }
+}
+
+bool FullyConnectedTestModel::areScalesOnActivationsDifferent() const {
+    return false;
+}
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
index 0f81098173c..0e369715aee 100644
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
@@ -1007,6 +1007,23 @@ public:
     void initInput(Blob::Ptr input) const override;
 };
 
+class FullyConnectedTestModel : public SingleLayerTestModel {
+public:
+    FullyConnectedTestModel(const std::vector<size_t>& inputDimentions, const std::vector<size_t>& outputDimentions);
+    std::string getName() const override;
+    bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
+    void initInput(Blob::Ptr input) const override;
+    std::string getModel(SingleLayerTransformationsTestParams& p) const override;
+    void resetTransformation(CNNNetwork& network) const override;
+protected:
+    virtual bool areScalesOnActivationsDifferent() const;
+    const bool addBiasesLayer;
+
+private:
+    const std::vector<size_t> inputDimentions;
+    const std::vector<size_t> outputDimentions;
+};
+
 class EltwiseTestModel : public SingleLayerTestModel {
 public:
     EltwiseTestModel(
@@ -1895,7 +1912,7 @@ class SingleLayerTransformationsTest : public TestsCommon, public WithParamInter
     std::unordered_map<std::string, InferenceEngine::Blob::Ptr> infer(
             CNNNetwork& network,
             std::unordered_map<std::string, Blob::Ptr>& inputBlobs,
-            Core & plugin, const std::string & device_name, 
+            Core & plugin, const std::string & device_name,
             ExecutableNetwork & executableNetwork,
             InferRequest & inferRequest);
 
diff --git a/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp b/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp
index 50b9e1fd735..0e90c269d6e 100644
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/single_layer_transformations_test.cpp
@@ -210,7 +210,7 @@ void SingleLayerTransformationsTest::SetUp() {
         Core core;
         ExecutableNetwork executableNetwork;
         InferRequest inferRequest;
-        const auto originalOutputMap = infer(network, inputBlobs, core, 
+        const auto originalOutputMap = infer(network, inputBlobs, core,
                 p.device_name, executableNetwork, inferRequest);
 
         const std::vector<bool> updatePrecisionsValues = { false };
@@ -228,6 +228,7 @@ void SingleLayerTransformationsTest::SetUp() {
         const std::vector<bool> updateBiasesValues = { true, false };
         const std::vector<bool> supportAsymmetricQuantizationValues = { true /*, false*/ };
         const std::vector<std::vector<Precision>> precisionOnActivationsValues = {
+            // TODO: just to debug
             { Precision::I8 },
             { Precision::I8, Precision::U8 },
             { Precision::U8 },
@@ -304,7 +305,7 @@ void SingleLayerTransformationsTest::SetUp() {
 
                                                     const float threshold = p.model->getThreshold(p.device_name, net_precision, param);
                                                     const float zeroThreshold = p.model->getZeroThreshold();
-                                                    
+
                                                     const auto outName = transformedOutput.find(name);
                                                     if (outName == transformedOutput.end()) {
                                                         THROW_IE_EXCEPTION << "Original output name " + name + " doesn't exist in transformed model";