diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/gna/gna_config.hpp
index fd7dd701bdd..41e287832a9 100644
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@@ -92,6 +92,13 @@ DECLARE_GNA_CONFIG_KEY(COMPACT_MODE);
 */
 DECLARE_GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN);
 
+/**
+* @brief The option to allow to specify the maximum error percent that the optimized algorithm finding
+* will use to find PWL functions.
+* By default (in case of NO value set), 1.0 value is used.
+*/
+DECLARE_GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT);
+
 /**
 * @brief By default, the GNA plugin uses one worker thread for inference computations.
 * This parameter allows you to create up to 127 threads for software modes.
diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp
index 8f3b3b5cd15..e117ca79da9 100644
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@@ -519,6 +519,10 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
         throw std::logic_error("Invalid value for 'cw_l' argument. It must be greater than or equal to 0");
     }
 
+    if (FLAGS_pwl_me < 0.0 || FLAGS_pwl_me > 100.0) {
+        throw std::logic_error("Invalid value for 'pwl_me' argument. It must be greater than 0.0 and less than 100.0");
+    }
+
     return true;
 }
 
@@ -671,6 +675,7 @@ int main(int argc, char *argv[]) {
 
         gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string((FLAGS_cw_r > 0 || FLAGS_cw_l > 0) ? 1 : FLAGS_nthreads);
         gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO);
+        gnaPluginConfig[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(FLAGS_pwl_me);
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 5. Write model to file --------------------------------------------------
diff --git a/inference-engine/samples/speech_sample/speech_sample.hpp b/inference-engine/samples/speech_sample/speech_sample.hpp
index 5fc905ae689..1409d557d60 100644
--- a/inference-engine/samples/speech_sample/speech_sample.hpp
+++ b/inference-engine/samples/speech_sample/speech_sample.hpp
@@ -91,6 +91,10 @@ static const char input_layer_names_message[] = "Optional. Layer names for input
                                           "The names are separated with \",\" " \
                                           "Example: Input1,Input2 ";
 
+/// @brief message for PWL max error percent
+static const char pwl_max_error_percent_message[] = "Optional. The maximum percent of error for PWL function." \
+                                                    "The value must be in <0, 100> range. The default value is 1.0.";
+
 /// \brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
 
@@ -161,6 +165,9 @@ DEFINE_string(oname, "", output_layer_names_message);
 /// @brief Input layer name
 DEFINE_string(iname, "", input_layer_names_message);
 
+/// @brief PWL max error percent
+DEFINE_double(pwl_me, 1.0, pwl_max_error_percent_message);
+
 /**
  * \brief This function show a help message
  */
@@ -191,5 +198,6 @@ static void showUsage() {
     std::cout << "    -cw_r \"<integer>\"       " << context_window_message_r << std::endl;
     std::cout << "    -oname \"<string>\"       " << output_layer_names_message << std::endl;
     std::cout << "    -iname \"<string>\"       " << input_layer_names_message << std::endl;
+    std::cout << "    -pwl_me \"<double>\"      " << pwl_max_error_percent_message << std::endl;
 }
 
diff --git a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
index 0f641e5473f..4a758649e94 100644
--- a/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
+++ b/inference-engine/src/gna_plugin/backend/am_intel_dnn.cpp
@@ -1243,15 +1243,15 @@ void GNAPluginNS::backend::AMIntelDNN::WriteDnnText(const char *filename, intel_
                             break;
                         case kActFakeQuantize :
                             out_file << "<fakeQuantize.levels> " <<
-                                std::dec << component[i].op.pwl.func_id.args.fakeQuantize.levels << "\n";
+                                std::dec << component[i].op.pwl.func_id.fqParams.levels << "\n";
                             out_file << "<fakeQuantize.input_low> " <<
-                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_low << "\n";
+                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.fqParams.input_low << "\n";
                             out_file << "<fakeQuantize.input_high> " <<
-                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.input_high << "\n";
+                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.fqParams.input_high << "\n";
                             out_file << "<fakeQuantize.output_low> " <<
-                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_low << "\n";
+                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.fqParams.output_low << "\n";
                             out_file << "<fakeQuantize.output_high> " <<
-                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.args.fakeQuantize.output_high << "\n";
+                                std::setprecision(12) << std::scientific << component[i].op.pwl.func_id.fqParams.output_high << "\n";
                             break;
                         default:
                             break;
diff --git a/inference-engine/src/gna_plugin/backend/dnn_types.h b/inference-engine/src/gna_plugin/backend/dnn_types.h
index 341f890490e..ea0b5a1e399 100644
--- a/inference-engine/src/gna_plugin/backend/dnn_types.h
+++ b/inference-engine/src/gna_plugin/backend/dnn_types.h
@@ -34,9 +34,25 @@ enum DnnActivationType : uint8_t {
     kActNumType
 };
 
+struct FakeQuantizeParams {
+    int8_t set;
+    int32_t levels;
+    // if input is per-channel quantization - input pointers contains per-channel ranges
+    int8_t  inputPerChannel;
+    float* input_low;
+    float* input_high;
+    // if output is per-channel quantization - output pointers contains per-channel ranges
+    int8_t  outputPerChannel;
+    float* output_low;
+    float* output_high;
+};
+
 struct DnnActivation {
     // for prelu
     DnnActivationType type;
+    FakeQuantizeParams fqParams;
+    FakeQuantizeParams srcFQParams;
+
     union {
         struct {
             float negative_slope;
@@ -50,17 +66,6 @@ struct DnnActivation {
             float low;
             float high;
         } clamp;
-        struct {
-            int32_t levels;
-            // if input is per-channel quantization - input pointers contains per-channel ranges
-            int8_t  inputPerChannel;
-            float  *input_low;
-            float  *input_high;
-            // if output is per-channel quantization - output pointers contains per-channel ranges
-            int8_t  outputPerChannel;
-            float  *output_low;
-            float  *output_high;
-        } fakeQuantize;
     } args;
     operator DnnActivationType () const noexcept {
         return type;
diff --git a/inference-engine/src/gna_plugin/backend/make_pwl.cpp b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
index f7f34d33270..ddf73975b88 100644
--- a/inference-engine/src/gna_plugin/backend/make_pwl.cpp
+++ b/inference-engine/src/gna_plugin/backend/make_pwl.cpp
@@ -34,15 +34,20 @@ void make_gna_pwl(const DnnActivation  fun,
             gna_pwl[0].xBase = static_cast<int32_t> (INT32_MIN & XBASEMASK);  // zero out the 2 lsb
             if (fun == kActSigmoid) {
                 gnalog() <<  "=========================== Sigmoid Segments ===========================\n";
-                gna_pwl[0].yBase = gna_pwl[1].yBase = 0;
+                auto minVal = fun.fqParams.set? FLOAT_TO_INT16(*fun.fqParams.input_low * out_scale): 0;
+                gna_pwl[0].yBase = gna_pwl[1].yBase = minVal;
                 gna_pwl[1].xBase = (static_cast<int32_t> (in_scale * (-pwl[0].b / pwl[0].m))) & XBASEMASK;
             } else if (fun == kActTanh) {
                 gnalog() <<  "=========================== Tanh Segments ===========================\n";
-                gna_pwl[0].yBase = gna_pwl[1].yBase = static_cast<int16_t>(-1.0 * out_scale);
+                auto minVal = fun.fqParams.set ? FLOAT_TO_INT16(*fun.fqParams.input_low * out_scale) :
+                    static_cast<int16_t>(-1.0 * out_scale);
+                gna_pwl[0].yBase = gna_pwl[1].yBase = minVal;
                 gna_pwl[1].xBase = (static_cast<int32_t> (in_scale * (-1.0 - pwl[0].b) / pwl[0].m)) & XBASEMASK;
             } else {
                 gnalog() << "=========================== SoftSign Segments ===========================\n";
-                gna_pwl[0].yBase = gna_pwl[1].yBase = static_cast<int16_t>(-1.0 * out_scale);
+                auto minVal = fun.fqParams.set ? FLOAT_TO_INT16(*fun.fqParams.input_low * out_scale) :
+                    static_cast<int16_t>(-1.0 * out_scale);
+                gna_pwl[0].yBase = gna_pwl[1].yBase = minVal;
                 gna_pwl[1].xBase = (static_cast<int32_t> (in_scale * (-1.0 - pwl[0].b) / pwl[0].m)) & XBASEMASK;
             }
             gna_pwl[0].slope = 0;
@@ -74,9 +79,10 @@ void make_gna_pwl(const DnnActivation  fun,
                          << "\n";
             }
             // insert extra segment for xvalues > u_bound
+            auto maxVal = fun.fqParams.set ? *fun.fqParams.input_high : 1.0;
             gna_pwl[n_segments - 1].xBase =
                     ((uint32_t) (in_scale * (1.0 - pwl[pwl_size - 2].b) / pwl[pwl_size - 2].m)) & XBASEMASK;
-            gna_pwl[n_segments - 1].yBase = FLOAT_TO_INT16(1.0 * out_scale);
+            gna_pwl[n_segments - 1].yBase = FLOAT_TO_INT16(maxVal * out_scale);
             gna_pwl[n_segments - 1].slope = 0;
 
             gnalog() << (gna_pwl[n_segments - 1].xBase / in_scale)
@@ -223,9 +229,19 @@ void make_gna_pwl(const DnnActivation  fun,
             else
                 gnalog() << "=========================== LeakyReLU Segments ======================\n";
             int32_t x_lower = INT32_MIN;
+            int32_t x_upper = INT32_MAX;
             int16_t y_lower = INT16_MIN;
-            if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
-            if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
+            int16_t y_upper = INT16_MAX;
+            if (fun.fqParams.set) {
+                x_lower = FLOAT_TO_INT32(*fun.fqParams.input_low * 1.25 * in_scale);
+                x_upper = FLOAT_TO_INT32(*fun.fqParams.input_high * 1.25 * in_scale);
+                y_lower = FLOAT_TO_INT16(*fun.fqParams.input_low * 1.25 * out_scale);
+                y_upper = FLOAT_TO_INT16(*fun.fqParams.input_high * 1.25 * out_scale);
+            } else {
+                if (x_lower < y_lower * in_scale / out_scale) x_lower = FLOAT_TO_INT32(y_lower * in_scale / out_scale);
+                if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
+            }
+
             gna_pwl[0].yBase = y_lower * fun.args.lrelu.negative_slope;
             s = gna_slope(fun.args.lrelu.negative_slope, in_scale, out_scale);
             gna_pwl[0].xBase = (x_lower & XBASEMASK) | s.slope_scale_index;  // zero out the 2 lsb
@@ -244,6 +260,18 @@ void make_gna_pwl(const DnnActivation  fun,
                     << " " << 0.0
                     << " " << (gna_pwl[1].slope * in_scale) / (out_scale*s.slope_scale)
                     << "\n";
+
+            if (fun.fqParams.set) {  // need a right segment
+                gna_pwl.push_back({
+                    static_cast<int32_t>(x_upper & XBASEMASK),  // zero out the 2 lsb
+                    y_upper,
+                    0 });
+
+                gnalog() << (x_upper & XBASEMASK) / in_scale
+                    << " " << gna_pwl[n_segments].yBase / out_scale
+                    << " " << 0
+                    << "\n";
+            }
             break;
         }
         case kActSign: {
@@ -281,11 +309,18 @@ void make_gna_pwl(const DnnActivation  fun,
             break;
         }
         case kActIdentity:
-        case kActKaldiLstmClipping: {
+        case kActKaldiLstmClipping:
+        case kActFakeQuantize: {
             int32_t x_lower = INT32_MIN;
             int32_t x_upper = INT32_MAX;
             int16_t y_lower = INT16_MIN;
             int16_t y_upper = INT16_MAX;
+            if (fun == kActFakeQuantize && fun.fqParams.set) {
+                x_lower = *fun.fqParams.input_low * in_scale;
+                x_upper = *fun.fqParams.input_high * in_scale;
+                y_lower = *fun.fqParams.input_low * out_scale;
+                y_upper = *fun.fqParams.input_high * out_scale;
+            }
             auto n_segments = 2;
             if (fun == kActKaldiLstmClipping) {
                 gnalog()  << "=========================== Clipping Segments ===========================\n";
@@ -311,6 +346,8 @@ void make_gna_pwl(const DnnActivation  fun,
                 if (x_upper > y_upper * in_scale / out_scale) x_upper = FLOAT_TO_INT32(y_upper * in_scale / out_scale);
                 if (y_lower < x_lower * out_scale / in_scale) y_lower = FLOAT_TO_INT16(x_lower * out_scale / in_scale);
                 if (y_upper > x_upper * out_scale / in_scale) y_upper = FLOAT_TO_INT16(x_upper * out_scale / in_scale);
+            } else if (fun == kActFakeQuantize) {
+                gnalog() << "=========================== Fake Quantize Segments ===========================\n";
             }
             gna_pwl.resize(n_segments);
             gna_pwl[0].xBase = INT32_MIN & XBASEMASK;  // zero out the 2 lsb
diff --git a/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp b/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp
index 34af49e5586..9bb0169183a 100644
--- a/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp
+++ b/inference-engine/src/gna_plugin/descriptions/gna_flags.hpp
@@ -13,6 +13,7 @@ struct GNAFlags {
     bool compact_mode = false;
     bool exclusive_async_requests = false;
     bool uniformPwlDesign = false;
+    float pwlMaxErrorPercent = 1.0f;
     bool gna_openmp_multithreading = false;
     bool sw_fp32 = false;
     bool fake_quantized = false;
diff --git a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
index dba694b8055..6f38366f6e5 100644
--- a/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/layer_quantizer.hpp
@@ -95,6 +95,15 @@ struct QuantPair {
     static B optional () { return B();}
 };
 
+struct FakeQuantizeParams {
+    bool paramsSet = false;
+    uint32_t levelsNum = 1;
+    float inputMinValue = 1.0f;
+    float inputMaxValue = 1.0f;
+    float outputMinValue = 1.0f;
+    float outputMaxValue = 1.0f;
+};
+
 /**
  * @brief should allocated blob for specific data type, in case of src blob is nullptr
  * @tparam T
@@ -170,14 +179,41 @@ class Quant<FakeQuantI8> {
 
 
 template <typename T>
-inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision,
+    float scale_factor, const FakeQuantizeParams& fqParams) {
     auto prec_blob = InferenceEngine::make_shared_blob<T>({ precision,
         fp32_blob->getTensorDesc().getDims(), fp32_blob->getTensorDesc().getLayout() });
     prec_blob->allocate();
 
+    auto input_low = 0.0f;
+    auto input_high = 0.0f;
+    auto output_low = 0.0f;
+    auto output_high = 0.0f;
+    auto levels = 1;
+    if (fqParams.paramsSet) {
+        input_low = fqParams.inputMinValue;
+        input_high = fqParams.inputMaxValue;
+        output_low = fqParams.outputMinValue;
+        output_high = fqParams.outputMaxValue;
+        levels = fqParams.levelsNum;
+    }
+
     int i = 0;
     for (auto& precValue : *prec_blob) {
-        auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++] * scale_factor;
+        auto f32Value = fp32_blob->buffer().template as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()[i++];
+        if (fqParams.paramsSet) {
+            auto x = f32Value;
+            if (x <= std::min(input_low, input_high)) {
+                f32Value = output_low;
+            } else if (x > std::max(input_low, input_high)) {
+                f32Value = output_high;
+            } else {
+                f32Value = nearbyint((x - input_low) / (input_high - input_low) * (levels - 1)) /
+                    (levels - 1) * (output_high - output_low) + output_low;
+            }
+        }
+
+        f32Value = f32Value * scale_factor;
         if (f32Value > std::numeric_limits<T>::max()) {
             precValue = std::numeric_limits<T>::max();
         } else if (f32Value < std::numeric_limits<T>::min()) {
@@ -190,20 +226,21 @@ inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::
     return  static_cast<InferenceEngine::Blob::Ptr>(prec_blob);
 }
 
-inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision, float scale_factor) {
+inline InferenceEngine::Blob::Ptr fp32_to_precision_blob(InferenceEngine::Blob::Ptr fp32_blob, InferenceEngine::Precision precision,
+    float scale_factor, const FakeQuantizeParams &fqParams) {
     InferenceEngine::Blob::Ptr result_ptr = nullptr;
     switch (precision) {
     case InferenceEngine::Precision::FP32:
-        result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor);
+        result_ptr = fp32_to_precision_blob<float>(fp32_blob, precision, scale_factor, fqParams);
         break;
     case InferenceEngine::Precision::I32:
-        result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor);
+        result_ptr = fp32_to_precision_blob<int32_t>(fp32_blob, precision, scale_factor, fqParams);
         break;
     case InferenceEngine::Precision::I16:
-        result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor);
+        result_ptr = fp32_to_precision_blob<int16_t>(fp32_blob, precision, scale_factor, fqParams);
         break;
     case InferenceEngine::Precision::I8:
-        result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor);
+        result_ptr = fp32_to_precision_blob<int8_t>(fp32_blob, precision, scale_factor, fqParams);
         break;
     default:
         THROW_GNA_EXCEPTION << "FP32 to " << precision << " not supported";
@@ -304,13 +341,15 @@ inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
 
     auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*wl);
     {
-        auto per_channel_weights = !quantData->_weights_quant.GetMinValues().empty();
+        auto weightsStats = !quantData->_weights_quant.GetMinValues().empty();
         auto weightsScale = quantData->_weights_quant.GetScale();
         auto dstScale = quantData->_dst_quant.GetScale();
-        fnc(wl->_weights->buffer().as<float *>(),
-            wl->_biases ? wl->_biases->buffer().as<float *>() : nullptr,
+        auto blob_precision = wl->_weights->getTensorDesc().getPrecision();
+        auto quantizedWeights = blob_precision != InferenceEngine::Precision::FP32 && blob_precision != InferenceEngine::Precision::FP16;
+        fnc(wl->_weights->buffer().as<float*>(),
+            wl->_biases ? wl->_biases->buffer().as<float*>() : nullptr,
             intWeights->buffer(),
-            intBiases ? intBiases->buffer() : static_cast<BiasesPrecision *>(nullptr),
+            intBiases ? intBiases->buffer() : static_cast<BiasesPrecision*>(nullptr),
             input_scale_factor,
             &weightsScale,
             &dstScale,
@@ -318,12 +357,13 @@ inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
             num_columns,
             num_rows_padded,
             num_columns_padded,
+            quantizedWeights,
             quantData->_weights_quant.GetLevels(),
-            nullptr,
-            nullptr,
-            per_channel_weights ? &quantData->_weights_quant.GetMinValues().front(): nullptr,
-            per_channel_weights ? &quantData->_weights_quant.GetMaxValues().front(): nullptr,
-            &quantData->_weights_quantized);
+            quantData->_weights_quant.GetMinValues().size(),
+            weightsStats ? &quantData->_weights_quant.GetMinValues(true).front() : nullptr,
+            weightsStats ? &quantData->_weights_quant.GetMaxValues(true).front() : nullptr,
+            weightsStats ? &quantData->_weights_quant.GetMinValues(false).front() : nullptr,
+            weightsStats ? &quantData->_weights_quant.GetMaxValues(false).front() : nullptr);
     }
     wl->_weights = intWeights;
     wl->_biases = intBiases;
@@ -410,19 +450,29 @@ inline void quantizeWeightsBiasesConv(const QuantDesc & quantDesc,
 
     auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*conv);
     {
+        auto weightsStats = !quantData->_weights_quant.GetMinValues().empty();
         auto weightsScale = quantData->_weights_quant.GetScale();
         auto dstScale = quantData->_dst_quant.GetScale();
-        fnc(conv->_weights->buffer().as<float *>(),
-            conv->_biases ? conv->_biases->buffer().as<float *>() : nullptr,
+        auto blob_precision = conv->_weights->getTensorDesc().getPrecision();
+        auto quantizedWeights = blob_precision != InferenceEngine::Precision::FP32 && blob_precision != InferenceEngine::Precision::FP16;
+        fnc(conv->_weights->buffer().as<float*>(),
+            conv->_biases ? conv->_biases->buffer().as<float*>() : nullptr,
             intWeights->buffer(),
-            intBiases ? intBiases->buffer() : static_cast<BiasesPrecision *>(nullptr),
+            intBiases ? intBiases->buffer() : static_cast<BiasesPrecision*>(nullptr),
             input_scale_factor,
             &weightsScale,
             &dstScale,
             num_rows,
             num_columns,
             num_rows_padded,
-            num_columns_padded);
+            num_columns_padded,
+            quantizedWeights,
+            quantData->_weights_quant.GetLevels(),
+            quantData->_weights_quant.GetMinValues().size(),
+            weightsStats ? &quantData->_weights_quant.GetMinValues(true).front() : nullptr,
+            weightsStats ? &quantData->_weights_quant.GetMaxValues(true).front() : nullptr,
+            weightsStats ? &quantData->_weights_quant.GetMinValues(false).front() : nullptr,
+            weightsStats ? &quantData->_weights_quant.GetMaxValues(false).front() : nullptr);
     }
     conv->_weights = intWeights;
     conv->_biases = intBiases;
@@ -494,11 +544,22 @@ class DataQuantizer<Desc, InferenceEngine::CNNLayer *> : public DataQuantizerBas
             if (initial_precision == InferenceEngine::Precision::FP16) {
                 cnnLayer->blobs["custom"] = make_fp32_blob(cnnLayer->blobs["custom"]);
             }
-            auto const_scale_factor = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer)->_dst_quant.GetScale();
+            auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
             auto new_const_blob = InferenceEngine::Blob::CreateFromData(cnnLayer->outData[0]);
             auto const_blob = cnnLayer->blobs["custom"];
             if (const_blob->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
-                cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(), const_scale_factor);
+                auto fqParams = FakeQuantizeParams{};
+                if (quantParams->_dst_quant.IsStatsSet()) {
+                    fqParams.paramsSet = true;
+                    fqParams.levelsNum = quantParams->_dst_quant.GetLevels();
+                    fqParams.inputMinValue = quantParams->_dst_quant.GetMinValues(true).front();
+                    fqParams.inputMaxValue = quantParams->_dst_quant.GetMaxValues(true).front();
+                    fqParams.outputMinValue = quantParams->_dst_quant.GetMinValues(false).front();
+                    fqParams.outputMaxValue = quantParams->_dst_quant.GetMaxValues(false).front();
+                }
+
+                cnnLayer->blobs["custom"] = fp32_to_precision_blob(const_blob, cnnLayer->outData[0]->getPrecision(),
+                    quantParams->_dst_quant.GetScale(), fqParams);
             }
         }
 
diff --git a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
index 523fdb3d47a..dc867be0a9a 100644
--- a/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/frontend/model_quantizer.hpp
@@ -7,6 +7,7 @@
 #include <vector>
 #include <utility>
 #include <string>
+#include <type_traits>
 
 #include <legacy/layer_transform.hpp>
 #include "gna_graph_tools.hpp"
@@ -77,7 +78,8 @@ class ModelQuantizer {
             scaleIndex++;
         }
 
-        propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size());
+        bool isFakeQuantize = std::is_same<T, FakeQuantI8>() || std::is_same<T, FakeQuantI16>();
+        propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size(), isFakeQuantize);
 
         // sorted order gives possibility for propagate quantisation along depended layers
         for (auto &&layer : sortedNewNet) {
@@ -88,8 +90,8 @@ class ModelQuantizer {
     }
 
  private :
-    void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr> & net, int weightsBytesSize) const {
-        ScaleFactorCalculator sf(net, weightsBytesSize);
+    void propagateScaleFactor(std::vector<InferenceEngine::CNNLayerPtr> & net, int weightsBytesSize, bool fakeQuantize) const {
+        ScaleFactorCalculator sf(net, weightsBytesSize, fakeQuantize);
 
         while (!sf.allLayersProcessed()) {
             for (auto &&layer : sf.getStartLayers()) {
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.cpp b/inference-engine/src/gna_plugin/frontend/quantization.cpp
index 33999cffe3e..d8b5f9d4da3 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.cpp
+++ b/inference-engine/src/gna_plugin/frontend/quantization.cpp
@@ -9,6 +9,7 @@
 #include <limits>
 #include "backend/gna_types.h"
 #include "quantization.h"
+#include <algorithm>
 
 #ifdef DEBUG
 #define QUANTWARNING(...) (fprintf(stderr, __VA_ARGS__))
@@ -19,26 +20,44 @@
 
 template<>
 void QuantizationCallback<int16_t, int32_t>::runFakeQuantize() const {
+    if (quantizedWeights) {
+        THROW_GNA_EXCEPTION << "Quantized weights are not yet supported in int16 quantization mode";
+    }
+
     uint32_t num_saturate = 0;
+    auto input_low = 0.0f;
+    auto input_high = 0.0f;
+    auto output_low = 0.0f;
+    auto output_high = 0.0f;
+    auto levels = 1;
+    if (fq_num_stats > 0) {
+        input_low = *fq_ptr_input_low;
+        input_high = *fq_ptr_input_high;
+        output_low = *fq_ptr_output_low;
+        output_high = *fq_ptr_output_high;
+        levels = fq_levels;
+    }
 
     for (uint32_t row = 0; row < num_rows; row++) {
         for (uint32_t col = 0; col < num_columns; col++) {
             float rounding_value = (ptr_float_weights[row * num_columns + col] > 0) ? 0.5f : -0.5f;
             float value = ptr_float_weights[row * num_columns + col];
-            if (!*ptr_quantized_weights) {
-                value = value * *ptr_weight_scale_factor + rounding_value;
-            } else {
-                value -= MAX_VAL_2B_WEIGHT;
+            if (fq_num_stats > 0) {
+                auto x = value;
+                if (x <= std::min(input_low, input_high)) {
+                    value = output_low;
+                } else if (x > std::max(input_low, input_high)) {
+                    value = output_high;
+                } else {
+                    value = nearbyint((x - input_low) / (input_high - input_low) * (levels - 1)) /
+                        (levels - 1) * (output_high - output_low) + output_low;
+                }
             }
 
+            value = value * *ptr_weight_scale_factor + rounding_value;
+
             int16_t* ptr_weight_16 = ptr_int_weights + (row * num_columns_padded + col);
 
-            if (*ptr_quantized_weights &&
-                (value > std::numeric_limits<int16_t>::max() ||
-                    value < std::numeric_limits<int16_t>::min())) {
-                THROW_GNA_EXCEPTION << "unsupported weights range for I16 quantisation: " << value;
-            }
-
             if (value > std::numeric_limits<int16_t>::max()) {
                 *ptr_weight_16 = std::numeric_limits<int16_t>::max();
                 num_saturate++;
@@ -91,37 +110,6 @@ void QuantizationCallback<int16_t, int32_t>::runFakeQuantize() const {
 template<>
 void QuantizationCallback<int16_t, int32_t>::runQuantize() const {
     uint32_t num_saturate = 0;
-
-    if (*ptr_weight_scale_factor == 1.0) {
-        // scale factor for weights is not calculated yet
-        float mean_weight = 0.0;
-        float mean_weight_squared = 0.0;
-        float max_weight = -1e20f;
-        float var_weight;
-        float mean_plus_2stdev;
-
-        for (uint32_t i = 0; i < num_rows; i++) {
-            for (uint32_t j = 0; j < num_columns; j++) {
-                float weight = ptr_float_weights[i * num_columns + j];
-                mean_weight += weight;
-                mean_weight_squared += weight * weight;
-                if (fabs(weight) > max_weight) {
-                    max_weight = fabs(weight);
-                }
-            }
-        }
-
-        mean_weight /= static_cast<float>(num_rows * num_columns);
-        mean_weight_squared /= static_cast<float>(num_rows * num_columns);
-        var_weight = mean_weight_squared - mean_weight * mean_weight;
-        mean_plus_2stdev = mean_weight + 2.0f * static_cast<float>(sqrtf(var_weight));
-
-        if (max_weight != 0.0f) {
-            *ptr_weight_scale_factor = static_cast<float>(MAX_VAL_2B_WEIGHT) / max_weight;
-        }
-        *ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
-    }
-
     for (uint32_t row = 0; row < num_rows; row++) {
         for (uint32_t col = 0; col < num_columns; col++) {
             float rounding_value = (ptr_float_weights[row * num_columns + col] > 0) ? 0.5f : -0.5f;
@@ -176,6 +164,24 @@ void QuantizationCallback<int16_t, int32_t>::runQuantize() const {
     }
 }
 
+std::pair<float, float> FindMinMaxValues(void* ptr_float_memory, size_t num_elements) {
+    float* ptr_float_feat = reinterpret_cast<float*>(ptr_float_memory);
+    float min = num_elements ? ptr_float_feat[0] : 0.0;
+    float max = num_elements ? ptr_float_feat[0] : 0.0;
+
+    for (size_t i = 1; i < num_elements; i++) {
+        if (fabs(ptr_float_feat[i]) > max) {
+            max = fabs(ptr_float_feat[i]);
+        }
+
+        if (fabs(ptr_float_feat[i]) < min) {
+            min = fabs(ptr_float_feat[i]);
+        }
+    }
+
+    return { min, max };
+}
+
 float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements) {
     float *ptr_float_feat = reinterpret_cast<float *>(ptr_float_memory);
     float max = 0.0;
@@ -224,17 +230,37 @@ template<>
 void QuantizationCallback<int8_t, gna_compound_bias_t>::runFakeQuantize() const {
     uint32_t num_saturate = 0;
 
-    if (fq_ptr_output_high == nullptr || fq_ptr_output_low == nullptr) {
-        THROW_GNA_EXCEPTION << "Fake quantized output range not set";
-    }
-    if (fq_levels == 0 || fq_levels == 1) {
-        THROW_GNA_EXCEPTION << "Fake quantized levels not set";
-    }
-
+    auto input_low = 0.0f;
+    auto input_high = 0.0f;
+    auto output_low = 0.0f;
+    auto output_high = 0.0f;
+    auto levels = 1;
+    float valueAcc = 0.0;
     for (uint32_t i = 0; i < num_rows; i++) {
-        uint32_t channel_multiplier = ((fq_ptr_output_high[i] - fq_ptr_output_low[i]) *
-            *ptr_weight_scale_factor) / (fq_levels - 1) + 0.5f;
-        ptr_int_biases[i].multiplier = static_cast<uint8_t> (channel_multiplier);
+        uint32_t channel_multiplier = 1;
+        if (fq_num_stats > 0) {
+            auto idx = fq_num_stats == 1 ? 0 : i;
+            input_low = fq_ptr_input_low[idx];
+            input_high = fq_ptr_input_high[idx];
+            output_low = fq_ptr_output_low[idx];
+            output_high = fq_ptr_output_high[idx];
+            levels = fq_levels;
+
+            channel_multiplier = ((input_high - input_low) * *ptr_weight_scale_factor) / (levels - 1);
+        } else {
+            float scaled_row_max = 0;
+            for (uint32_t col = 0; col < num_columns; col++) {
+                float value = ptr_float_weights[i * num_columns + col] * *ptr_weight_scale_factor;
+                valueAcc += value;
+                if (fabs(value) > scaled_row_max) {
+                    scaled_row_max = fabs(value);
+                }
+            }
+
+            channel_multiplier = scaled_row_max / static_cast<float>(MAX_VAL_1B_WEIGHT);
+        }
+
+        ptr_int_biases[i].multiplier = static_cast<uint8_t> (channel_multiplier + 0.5f);
         if (channel_multiplier > MAX_OUT_MULTIPLIER) {
             THROW_GNA_EXCEPTION << "invalid channel multiplier: " << channel_multiplier;
         }
@@ -243,19 +269,25 @@ void QuantizationCallback<int8_t, gna_compound_bias_t>::runFakeQuantize() const
             auto offset = i * num_columns + j;
             auto rounding_value = (ptr_float_weights[i * num_columns + j] > 0) ? 0.5f : -0.5f;
             float value = ptr_float_weights[offset];
-            if (!*ptr_quantized_weights) {
+            if (!quantizedWeights) {
+                if (fq_num_stats > 0) {
+                    auto x = value;
+                    if (x <= std::min(input_low, input_high)) {
+                        value = output_low;
+                    } else if (x > std::max(input_low, input_high)) {
+                        value = output_high;
+                    } else {
+                        value = nearbyint((x - input_low) / (input_high - input_low) * (levels - 1)) /
+                            (levels - 1) * (output_high - output_low) + output_low;
+                    }
+                }
+
                 value = value * (*ptr_weight_scale_factor / ptr_int_biases[i].multiplier) + rounding_value;
             } else {
                 value -= MAX_VAL_1B_WEIGHT;
             }
             auto normalizedWeight = static_cast<int32_t>(value);
 
-            if (*ptr_quantized_weights &&
-                (value > std::numeric_limits<int8_t>::max() ||
-                value < std::numeric_limits<int8_t>::min())) {
-                THROW_GNA_EXCEPTION << "unsupported weights range for I8 quantization: " << value;
-            }
-
             if (value > std::numeric_limits<int8_t>::max()) {
                 normalizedWeight = std::numeric_limits<int8_t>::max();
                 num_saturate++;
@@ -309,40 +341,6 @@ void QuantizationCallback<int8_t, gna_compound_bias_t>::runQuantize() const {
     }
     uint32_t num_saturate = 0;
 
-    if (*ptr_weight_scale_factor == 1.0) {
-        // scale factor for weights is not calculated yet
-        float mean_weight = 0.0;
-        float mean_weight_squared = 0.0;
-        float max_weight = -1e20f;
-        float var_weight;
-        float mean_plus_2stdev;
-
-        for (uint32_t i = 0; i < num_rows; i++) {
-            for (uint32_t j = 0; j < num_columns; j++) {
-                float weight = ptr_float_weights[i*num_columns + j];
-                mean_weight += weight;
-                mean_weight_squared += weight * weight;
-                if (fabs(weight) > max_weight) {
-                    max_weight = fabs(weight);
-                }
-            }
-        }
-
-        mean_weight /= static_cast<float>(num_rows * num_columns);
-        mean_weight_squared /= static_cast<float>(num_rows * num_columns);
-        var_weight = mean_weight_squared - mean_weight * mean_weight;
-        mean_plus_2stdev = mean_weight + 2.0f * static_cast<float>(sqrtf(var_weight));
-
-        *ptr_weight_scale_factor = static_cast<float>(MAX_VAL_1B_WEIGHT) / max_weight;
-
-        // For 8 bit weights quantize as follows:
-        // 1. adjust scale factor to increase dynamic range of entire matrix by max multiplier
-        // 2. find maximum scaled weight for each row
-        // 3. find multiplier such that dividing by the multiplier brings row back within 8-bit dynamic range
-        // 4. quantize and store scaled row
-        *ptr_weight_scale_factor = MAX_OUT_MULTIPLIER * *ptr_weight_scale_factor;  //  increase dynamic range by max multiplier
-        *ptr_output_scale_factor = input_scale_factor * *ptr_weight_scale_factor;
-    }
     float valueAcc = 0.0;
     for (uint32_t row = 0; row < num_rows; row++) {
         float scaled_row_max = 0;
diff --git a/inference-engine/src/gna_plugin/frontend/quantization.h b/inference-engine/src/gna_plugin/frontend/quantization.h
index 67a72aadadf..1916bba298e 100644
--- a/inference-engine/src/gna_plugin/frontend/quantization.h
+++ b/inference-engine/src/gna_plugin/frontend/quantization.h
@@ -31,12 +31,13 @@ struct QuantizationCallback {
     uint32_t num_rows_padded;
     uint32_t num_columns_padded;
 
+    bool quantizedWeights;
     int32_t fq_levels;
+    const size_t fq_num_stats;
     const float *fq_ptr_input_low;
     const float *fq_ptr_input_high;
-    const float *fq_ptr_output_low;
-    const float *fq_ptr_output_high;
-    const bool* ptr_quantized_weights;
+    const float* fq_ptr_output_low;
+    const float* fq_ptr_output_high;
 
     void runQuantize() const;
     void runFakeQuantize() const;
@@ -45,5 +46,6 @@ struct QuantizationCallback {
 template class QuantizationCallback<int16_t, int32_t>;
 template class QuantizationCallback<int8_t, gna_compound_bias_t>;
 
+std::pair<float, float> FindMinMaxValues(void* ptr_float_memory, size_t num_elements);
 float ScaleFactorForQuantization(void *ptr_float_memory, float target_max, size_t num_elements);
 void QuantizeVector16(float *ptr_float_memory, int16_t *ptr_int_memory, uint32_t num_elements, float scale_factor);
diff --git a/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp b/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp
index 5f6c6a60907..bf510c7bb50 100644
--- a/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp
+++ b/inference-engine/src/gna_plugin/frontend/quantized_layer_params.hpp
@@ -24,27 +24,57 @@ public:
     int32_t GetLevels() const {
         return levels;
     }
-    void SetMinValues(const std::vector<float> &min) {
-        min_values.clear();
-        min_values.insert(min_values.end(), min.begin(), min.end());
+    bool IsStatsSet() const {
+        return !input_min_values.empty() && !input_max_values.empty();
     }
-    const std::vector<float>& GetMinValues() const {
-        return min_values;
+    void SetMinValues(const std::vector<float> &min, bool input = true) {
+        if (input) {
+            input_min_values.clear();
+            input_min_values.insert(input_min_values.end(), min.begin(), min.end());
+        } else {
+            output_min_values.clear();
+            output_min_values.insert(output_min_values.end(), min.begin(), min.end());
+        }
     }
-    void SetMaxValues(const std::vector<float>& max) {
-        max_values.clear();
-        max_values.insert(max_values.end(), max.begin(), max.end());
+    std::vector<float>& GetMinValues(bool input = true) {
+        if (input) {
+            return input_min_values;
+        }
+
+        return output_min_values;
     }
-    const std::vector<float>& GetMaxValues() const {
-        return max_values;
+    void SetMaxValues(const std::vector<float>& max, bool input = true) {
+        if (input) {
+            input_max_values.clear();
+            input_max_values.insert(input_max_values.end(), max.begin(), max.end());
+        } else {
+            output_max_values.clear();
+            output_max_values.insert(output_max_values.end(), max.begin(), max.end());
+        }
+    }
+    std::vector<float>& GetMaxValues(bool input = true) {
+        if (input) {
+            return input_max_values;
+        }
+
+        return output_max_values;
+    }
+    void CopyStats(Quantization &src) {
+        levels = src.GetLevels();
+        SetMinValues(src.GetMinValues(true), true);
+        SetMaxValues(src.GetMaxValues(true), true);
+        SetMinValues(src.GetMinValues(false), false);
+        SetMaxValues(src.GetMaxValues(false), false);
     }
 
 private:
     float scale = 1.0f;
     bool scale_set = false;
     int32_t levels = 0;
-    std::vector<float> min_values;
-    std::vector<float> max_values;
+    std::vector<float> input_min_values;
+    std::vector<float> input_max_values;
+    std::vector<float> output_min_values;
+    std::vector<float> output_max_values;
 };
 
 struct QuantizedLayerParams {
@@ -53,7 +83,6 @@ struct QuantizedLayerParams {
 
     // deprecate this
     Quantization _weights_quant;
-    bool _weights_quantized = false;
     Quantization _bias_quant;
     float _o_shift = 0.0f;
     float _b_shift = 0.0f;
diff --git a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
index 6791768e4e9..b6f5912a814 100644
--- a/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/frontend/scale_factor_calc.hpp
@@ -16,9 +16,13 @@
 #include "layers/gna_layer_info.hpp"
 #include "gna_plugin_log.hpp"
 #include "gna_slope_scale.h"
+#include "runtime/pwl.h"
 
 namespace GNAPluginNS {
 namespace frontend {
+static const float MIN_SEARCH_WEIGHTS_VAL = 1.0f;
+static const float MAX_SEARCH_WEIGHTS_VAL = 1024.0f;
+
 struct ScaleFactorUpdateResult {
     InferenceEngine::CNNLayer *restartLayer = nullptr;
     ScaleFactorUpdateResult() = default;
@@ -29,6 +33,146 @@ struct ScaleFactorUpdateResult {
     }
 };
 
+/**
+ * @brief Compares two float values and returns if they are equal
+ * @param p1 First float value
+ * @param p2 Second float value
+ * @return Returns true if two float values are equal
+ */
+static bool fp32eq(float p1, float p2) {
+    return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+}
+
+/**
+ * @brief Calculates PWL slopes for specified function in a given input range
+ * @param info Layer information
+ * @return Array of slopes for a function
+ */
+static std::vector<double> getPWLSlopes(const LayerInfo& info) {
+    if (info.isIdentity() || info.isFakeQuantize() || info.isRelu() || info.isClamp() || info.isAbs()) {
+        return { 1.0f };
+    }
+
+    return {};
+}
+
+/**
+ * @brief Finds the best output activation scale factor that allows to get the most precise PWL slope
+ * @param inScale Input activation layer scale factor
+ * @param outScales Array of output activation scale factors
+ * @param slopes Array of slopes for a given function
+ * @return Best output activation scale factor
+ */
+static float selectBestOutputScaleFactors(float inScale, std::vector<float> outScales, const std::vector<double>& slopes) {
+    std::vector<float> scaleErrors;
+    for (size_t i = 0; i < outScales.size(); ++i) {
+        auto outScale = outScales[i];
+
+        auto sd = 0.0;
+        for (size_t j = 0; j < slopes.size(); ++j) {
+            auto s = gna_slope(slopes[j], inScale, outScale);
+            auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
+            if (slope < std::numeric_limits<int16_t>::min() && slope > std::numeric_limits<int16_t>::max()) {
+                sd += std::numeric_limits<int8_t>::max();
+                continue;
+            }
+
+            auto testSlope = static_cast<double>(slope) / s.slope_scale * inScale / outScale;
+            if (fp32eq(testSlope, slopes[j])) {
+                return outScale;
+            }
+
+            sd += pow(testSlope - slopes[j], 2.0);
+        }
+
+        sd /= slopes.size();
+        sd = sqrtf(sd);
+        scaleErrors.push_back(sd);
+    }
+
+    size_t minIndex = 0;
+    auto minError = scaleErrors[0];
+    for (size_t i = 1; i < scaleErrors.size(); ++i) {
+        if (scaleErrors[i] < minError) {
+            minError = scaleErrors[i];
+            minIndex = i;
+        }
+    }
+
+    return outScales[minIndex];
+}
+
+/**
+ * @brief Finds the weights scale factor that allows to get the most precise PWL slope
+ * @param inScale Input weightable layer scale factor
+ * @param outScale Output activation scale factor
+ * @param weightsScales Array of weights scales to check
+ * @return Best weights scale factor
+ */
+static float selectBestWeightsScaleFactors(float inScale, float outScale, std::vector<float> weightsScales,
+    const std::vector<double>& slopes) {
+    std::vector<float> scaleErrors;
+    for (size_t i = 0; i < weightsScales.size(); ++i) {
+        auto weightScale = weightsScales[i];
+
+        auto sd = 0.0;
+        for (size_t j = 0; j < slopes.size(); ++j) {
+            auto s = gna_slope(slopes[j], inScale * weightScale, outScale);
+            auto slope = static_cast<uint32_t>(s.slope * s.slope_scale);
+            if (slope < std::numeric_limits<int16_t>::min() && slope > std::numeric_limits<int16_t>::max()) {
+                sd += std::numeric_limits<int8_t>::max();
+                continue;
+            }
+
+            auto testSlope = static_cast<double>(slope) / s.slope_scale * (inScale * weightScale) / outScale;
+            if (fp32eq(testSlope, slopes[j])) {
+                return outScale;
+            }
+            sd += pow(testSlope - slopes[j], 2.0);
+        }
+
+        sd /= slopes.size();
+        sd = sqrtf(sd);
+        scaleErrors.push_back(sd);
+    }
+
+    size_t minIndex = 0;
+    auto minError = scaleErrors[0];
+    for (size_t i = 1; i < scaleErrors.size(); ++i) {
+        if (scaleErrors[i] < minError) {
+            minError = scaleErrors[i];
+            minIndex = i;
+        }
+    }
+
+    return weightsScales[minIndex];
+}
+
+/**
+ * @brief Generates specified number of scale factors in a given range.
+ * @param startRange First scale factor
+ * @param endRange Last scale factor
+ * @param numIterations number of scale factors to generate
+ * @return Array of scale factors
+ */
+static std::vector<float> generateScaleFactors(float startRange, float endRange, size_t numScaleFactors) {
+    if (!numScaleFactors) {
+        return { startRange, endRange };
+    }
+
+    auto scaleFactors = std::vector<float>{};
+    auto domain = endRange - startRange;
+    auto step = domain / numScaleFactors;
+    for (size_t i = 0; i <= numScaleFactors; ++i) {
+        auto scale = startRange + step * i;
+        if (!std::isnan(scale)) {
+            scaleFactors.push_back(scale);
+        }
+    }
+
+    return scaleFactors;
+}
+
 /**
  * @brief calculates output scale factor per layer
  * @tparam T
@@ -44,7 +188,7 @@ class ScaleFactorPerLayer {
      * @param result
      * @return
      */
-    bool operator()(T cnnLayer, int weightsSize, ScaleFactorUpdateResult &result) {
+    bool operator()(T cnnLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
         return false;
     }
 };
@@ -54,17 +198,15 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
  private :
     const float activation_scale_factor = 2048.f;
     const float identity_scale_factor = 2049.0f;
+    const float max_activation_scale_factor = 4096.0f;
     const float k = 5;
     const float k_identity = 6;
     const double pow_domain = 16;
 
  protected :
-    static bool fp32eq(float p1, float p2) {
-        return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
-    }
-
     float getActivationScale(InferenceEngine::CNNLayer const* cnnLayer,
-                             GNAPluginNS::LayerInfo const& layer) {
+                             GNAPluginNS::LayerInfo const& layer,
+                             const bool fakeQuantize) {
         auto quantizedParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
 
         // todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights
@@ -136,18 +278,140 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
             }
         }
 
-        if (!quantizedParams->_dst_quant.GetMaxValues().empty()) {
-            auto min_value = quantizedParams->_dst_quant.GetMinValues().front();
-            auto max_value = quantizedParams->_dst_quant.GetMaxValues().front();
-            auto newScaleFactor = (quantizedParams->_dst_quant.GetLevels() - 1) / (max_value - min_value);
-            result = newScaleFactor < result ? newScaleFactor : result;
+        // Identity layer is inserted by GNA passes and requires statistics to correctly set output
+        // scale factor. POT does not produce any statistics for this layer as it does not exist
+        // in the source IR.
+        if (fakeQuantize && !quantizedParams->_dst_quant.IsScaleSet() && layer.isIdentity()) {
+            auto prevLayer = CNNNetPrevLayer(cnnLayer);
+            while (prevLayer != nullptr) {
+                auto prevQuantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
+                if (prevQuantParams->_dst_quant.IsStatsSet()) {
+                    quantizedParams->_dst_quant.CopyStats(prevQuantParams->_dst_quant);
+                    quantizedParams->_src_quant.CopyStats(prevQuantParams->_dst_quant);
+                    break;
+                }
+
+                // Take the input statistics only if layer does not modify input values.
+                if (prevQuantParams->_src_quant.IsStatsSet() &&
+                    (LayerInfo(prevLayer).isNonFunctional() || LayerInfo(prevLayer).isMemory() ||
+                    LayerInfo(prevLayer).isConst() || LayerInfo(prevLayer).isInput())) {
+                    quantizedParams->_dst_quant.CopyStats(prevQuantParams->_src_quant);
+                    quantizedParams->_src_quant.CopyStats(prevQuantParams->_src_quant);
+                    break;
+                }
+
+                // Stop searching for statistics if previous layer does not modify input values.
+                if ((LayerInfo(prevLayer).isWeightable() && !LayerInfo(prevLayer).isWeightableIdentity())
+                    || LayerInfo(prevLayer).isEltwise() || LayerInfo(prevLayer).isActivation()) {
+                    break;
+                }
+
+                if (!CNNNetHasPrevLayer(prevLayer.get())) {
+                    break;
+                }
+
+                prevLayer = CNNNetPrevLayer(prevLayer);
+            }
+
+            // If did not find statistics by searching previous layers, check if a next layer has
+            // statistics set.
+            if (!quantizedParams->_dst_quant.IsStatsSet()) {
+                auto donotSkip = [](InferenceEngine::CNNLayerPtr) {
+                    return false;
+                };
+
+                auto nextLayers = CNNNetGetAllNextLayersSkipCertain(cnnLayer, -1, donotSkip);
+                for (auto &l : nextLayers) {
+                    auto nextQuantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*l);
+                    if (nextQuantParams->_src_quant.IsStatsSet()) {
+                        quantizedParams->_dst_quant.CopyStats(nextQuantParams->_src_quant);
+                        quantizedParams->_src_quant.CopyStats(nextQuantParams->_src_quant);
+                        break;
+                    }
+
+                    // Take output statistics only if a next layer does not modify input values
+                    if (nextQuantParams->_dst_quant.IsStatsSet() &&
+                        (LayerInfo(l).isNonFunctional() || LayerInfo(l).isMemory())) {
+                        quantizedParams->_dst_quant.CopyStats(nextQuantParams->_dst_quant);
+                        quantizedParams->_src_quant.CopyStats(nextQuantParams->_dst_quant);
+                        break;
+                    }
+                }
+            }
+        }
+
+        // Adjust output scale factor based on statistics (if present) in the following steps:
+        // 1. calculate scale factor based on output min and max values
+        // 2. (temporary W/A) clamp scale factor to maximum activation scale factor
+        // 3. search previous layers if there was already scale factor set
+        // 4. adjust output scale factor to get the most precise PWL slope
+        if (quantizedParams->_dst_quant.IsStatsSet()) {
+            auto minOutValue = quantizedParams->_dst_quant.GetMinValues().front();
+            auto maxOutValue = quantizedParams->_dst_quant.GetMaxValues().front();
+            auto absMax = std::max(std::abs(minOutValue), std::abs(maxOutValue));
+            auto absMin = std::min(std::abs(minOutValue), std::abs(maxOutValue));
+
+            result = (quantizedParams->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
+            if (0 && fp32eq(absMin, 0.0f) && !fp32eq(absMax, 0.0f)) {
+                result = (quantizedParams->_dst_quant.GetLevels() - 1) / (2 * absMax);
+            }
+            //
+            //result = MAX_VAL_2B_FEAT / absMax;
+            if (std::isinf(result) || fp32eq(absMax, 0.0f)) {
+                result = max_activation_scale_factor;
+            }
+
+            // TODO: remove clamping maximum scale factor
+            result = result > max_activation_scale_factor ? max_activation_scale_factor : result;
+            if (!layer.isIdentity() && !layer.isFakeQuantize() && !layer.isRelu() && !layer.isClamp()) {
+                result = result > activation_scale_factor ? activation_scale_factor : result;
+            }
+
+            // Take input scale factor from previous layer if previous layer does not modify
+            // input values
+            bool usePrevScaleFactor = false;
+            auto skipNonFunctional = [](InferenceEngine::CNNLayerPtr l) {
+                return LayerInfo(l).isNonFunctional();
+            };
+
+            auto prevLayer = CNNNetPrevLayerSkipCertain(cnnLayer, 0, skipNonFunctional);
+            auto prevLayer2 = prevLayer != nullptr? CNNNetPrevLayerSkipCertain(prevLayer, 0, skipNonFunctional): nullptr;
+            if (prevLayer != nullptr &&
+                (layer.isIdentity() || layer.isFakeQuantize()) && LayerInfo(prevLayer).isWeightableIdentity()) {
+                auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
+                if (!fp32eq(prevLayerQuant->_src_quant.GetScale(), 1.0f) &&
+                    (prevLayer2 == nullptr || LayerInfo(prevLayer2).has16BOutput())) {
+                    result = prevLayerQuant->_src_quant.GetScale();
+                    usePrevScaleFactor = true;
+                }
+            }
+
+            // Adjust output scale factor to get the most precise PWL slope.
+            // NOTE: Currently it is only implemented for identity, clamp, relu and FQ layers.
+            //       For all other layers, it does not improve accuracy.
+            auto slopes = getPWLSlopes(layer);
+            if (!slopes.empty() && !usePrevScaleFactor) {
+                auto div = 10;
+                auto mul = 10;
+                auto startRange = result > 1.0f ? static_cast<int32_t>(result) : result;
+                auto endRange = startRange - startRange / div;
+                endRange = endRange > 1.0f ? static_cast<int32_t>(endRange) : endRange;
+                auto scaleFactors = generateScaleFactors(startRange, endRange, static_cast<int32_t>(startRange - endRange) * mul);
+                auto newScaleFactor = selectBestOutputScaleFactors(quantizedParams->_src_quant.GetScale(), scaleFactors, slopes);
+                if (!fp32eq(result, newScaleFactor) &&
+                    !fp32eq(newScaleFactor, 1.0f) && !fp32eq(newScaleFactor, 0.0f) && !std::isinf(newScaleFactor)) {
+                    gnalog() << "[INFO] Adjusting scale factor for " << cnnLayer->name
+                        << " from: " << result << " to: " << newScaleFactor << "\n";
+                    result = newScaleFactor;
+                }
+            }
         }
 
         return result;
     }
 
  public :
-    bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, ScaleFactorUpdateResult &result) {
+    bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
         if ( !cnnLayer ) {
             THROW_IE_EXCEPTION << "Incorrect Convolutional Layer pointer \n";
         }
@@ -156,7 +420,11 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
         auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
 
         if (InferenceEngine::details::CaselessEq<std::string>()(cnnLayer->type, "Memory")) {
-            if (!CNNNetHasPrevLayer(cnnLayer) && quant->_dst_quant.IsScaleSet()) {
+            if (CNNNetHasPrevLayer(cnnLayer) && quant->_dst_quant.IsStatsSet() && !quant->_dst_quant.IsScaleSet()) {
+                auto minOutValue = quant->_dst_quant.GetMinValues().front();
+                auto maxOutValue = quant->_dst_quant.GetMaxValues().front();
+                auto scale = (quant->_dst_quant.GetLevels() - 1) / (maxOutValue - minOutValue);
+                quant->_dst_quant.SetScale(scale);
                 quant->_src_quant = quant->_dst_quant;
             }
 
@@ -180,7 +448,9 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
                         return true;
                     }
 
-                    if (quantSibling->_dst_quant.IsScaleSet()) {
+                    if ((!fakeQuantize && quantSibling->_dst_quant.IsScaleSet()) ||
+                        (fakeQuantize && quantSibling->_dst_quant.IsScaleSet() && !fp32eq(quantSibling->_dst_quant.GetScale(), 1.0) &&
+                        quantSibling->_dst_quant.GetScale() < inputQuant->_dst_quant.GetScale())) {
                         // means we already restarted propagation input memory layer
                         // need to search for requantiseable layer prior memory output layer
                         InferenceEngine::CNNLayerPtr restartedLayer;
@@ -230,7 +500,8 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
                         << activation_scale_factor << ", restarting from corresponding memory: " << input->name << std::endl;
 
                     // try updating memory input layer scale factor and restart from it
-                    quantSibling->_src_quant = quantSibling->_dst_quant = inputQuant->_dst_quant;
+                    quantSibling->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
+                    quantSibling->_dst_quant.SetScale(inputQuant->_dst_quant.GetScale());
                     result = ScaleFactorUpdateResult(input.get());
                     return true;
                 }
@@ -241,49 +512,55 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
         if (cnnLayer->type == "Const") {
             if (quant->_dst_quant.IsScaleSet()) {
                 quant->_src_quant = quant->_dst_quant;
-                return ScaleFactorUpdateResult();
-            }
-
-            auto blob = cnnLayer->blobs["custom"];
-            auto blob_precision = blob->getTensorDesc().getPrecision();
-
-            if (blob_precision != InferenceEngine::Precision::FP32 && blob_precision != InferenceEngine::Precision::FP16) {
-                quant->_dst_quant.SetScale(1.0f);
                 return true;
             }
 
-            if (blob_precision == InferenceEngine::Precision::FP16) {
-                blob = make_fp32_blob(blob);
-            }
-
             auto max_val = std::numeric_limits<float>::min();
             auto min_val = std::numeric_limits<float>::max();
+            if (quant->_dst_quant.IsStatsSet()) {
+                min_val = quant->_dst_quant.GetMinValues().front();
+                max_val = quant->_dst_quant.GetMaxValues().front();
+            } else {
+                auto blob = cnnLayer->blobs["custom"];
+                auto blob_precision = blob->getTensorDesc().getPrecision();
 
-            auto flt_buf = blob->buffer().as<float*>();
-            auto size = blob->size();
+                if (blob_precision != InferenceEngine::Precision::FP32 && blob_precision != InferenceEngine::Precision::FP16) {
+                    quant->_dst_quant.SetScale(1.0f);
+                    return true;
+                }
 
-            for (int i=0; i < size; i++) {
-                auto val = flt_buf[i];
-                if (val > max_val) max_val = val;
-                if (val < min_val) min_val = val;
+                if (blob_precision == InferenceEngine::Precision::FP16) {
+                    blob = make_fp32_blob(blob);
+                }
+
+                auto flt_buf = blob->buffer().as<float*>();
+                auto size = blob->size();
+
+                for (int i = 0; i < size; i++) {
+                    auto val = flt_buf[i];
+                    if (val > max_val) max_val = val;
+                    if (val < min_val) min_val = val;
+                }
             }
 
+            auto levels = fakeQuantize ? MAX_VAL_2B_FEAT : std::numeric_limits<int16_t>::max();
             auto abs_val = std::max(std::abs(max_val), std::abs(min_val));
-            auto scale_val = static_cast<float>(std::numeric_limits<int16_t>::max()) / abs_val;
+            auto scale_val = static_cast<float>(levels) / abs_val;
+            //TODO: use FQ formula for scale factor calculation
 
-            // TODO: Investigate what should be the scale in such cases (31910)
-            if (std::isinf(scale_val)) {
-                quant->_dst_quant.SetScale(quant->_src_quant.GetScale());
+            if (std::isinf(scale_val) || fp32eq(abs_val, 0.0f)) {
+                quant->_dst_quant.SetScale(fakeQuantize ? levels : 1.0f);
             } else {
                 quant->_dst_quant.SetScale(scale_val);
             }
+            quant->_src_quant.SetScale(quant->_dst_quant.GetScale());
 
-            return ScaleFactorUpdateResult();
+            return true;
         }
 
         if (!CNNNetHasPrevLayer(cnnLayer)) {
             quant->_dst_quant = quant->_src_quant;
-            return ScaleFactorUpdateResult();
+            return true;
         }
 
         // by default layer is pass thru its scale factor
@@ -292,17 +569,41 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
             THROW_GNA_EXCEPTION << "layer: " << CNNNetPrevLayer(cnnLayer)->name << "not quantized";
         }
 
-        quant->_src_quant = inputQuant->_dst_quant;
-        if (layerInfo.isActivation()) {
+        if (layerInfo.isPower() && !layerInfo.isActivation()) {
+            auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*cnnLayer);
+            auto powerLayer = dynamic_cast<InferenceEngine::PowerLayer const*>(cnnLayer);
+            if (!powerLayer) {
+                THROW_IE_EXCEPTION << "Incorrect Power Layer pointer \n";
+            }
+
+            auto powerScale = std::abs(powerLayer->scale);
+            if (fp32eq(powerScale, 0.0f)) {
+                powerScale = 1.0f;
+            }
+            auto weightsScaleFactor = MAX_VAL_2B_WEIGHT / powerScale;
+            quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
+            quant->_weights_quant.SetScale(weightsScaleFactor);
+            quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
+            return true;
+        } else if (layerInfo.isActivation()) {
             // todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights
             // set the initial value
-            if (!quant->_dst_quant.IsScaleSet()) {
-                auto scale = getActivationScale(cnnLayer, layerInfo);
+            if (!quant->_dst_quant.IsScaleSet() || fp32eq(quant->_dst_quant.GetScale(), 1.0f) ||
+                !fp32eq(quant->_src_quant.GetScale(), inputQuant->_dst_quant.GetScale())) {
+                quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
+                auto scale = getActivationScale(cnnLayer, layerInfo, fakeQuantize);
                 quant->_dst_quant.SetScale(scale);
             }
             return true;
+        } else if (layerInfo.isCropAffined()) {
+            auto weightsScaleFactor = 1;
+            quant->_weights_quant.SetScale(weightsScaleFactor);
+            quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
+            quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
+            return true;
         }
-        quant->_dst_quant = inputQuant->_dst_quant;
+        quant->_src_quant.SetScale(inputQuant->_dst_quant.GetScale());
+        quant->_dst_quant.SetScale(inputQuant->_dst_quant.GetScale());
 
         return true;
     }
@@ -311,7 +612,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
 template<>
 class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
  public:
-    bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, ScaleFactorUpdateResult &result) {
+    bool operator()(InferenceEngine::EltwiseLayer* eltwiseLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
         if ( !eltwiseLayer ) {
             THROW_GNA_EXCEPTION << "Incorrect Eltwise Layer pointer \n";
         }
@@ -325,7 +626,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
 
         switch (eltwiseLayer->_operation) {
             case InferenceEngine::EltwiseLayer::Prod: {
-                quantData->_weights_quant = quantParams1->_dst_quant;
+                quantData->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale());
                 quantData->_dst_quant.SetScale(quantParams0->_dst_quant.GetScale() * quantParams1->_dst_quant.GetScale());
                 break;
             }
@@ -344,9 +645,51 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                     std::swap(quantParams0, quantParams1);
                 }
 
+                auto prevLayer = in1;
+                while (LayerInfo(prevLayer).isNonFunctional() && CNNNetHasPrevLayer(prevLayer.get(), 0)) {
+                    prevLayer = CNNNetPrevLayer(prevLayer);
+                }
+
                 // this path might result in significant data loss
                 quantData->_bias_quant.SetScale(quantParams1->_dst_quant.GetScale() / quantParams0->_dst_quant.GetScale());
-                quantData->_weights_quant.SetScale(quantParams1->_dst_quant.GetScale() / quantParams0->_dst_quant.GetScale());
+                auto weightsScale = quantParams1->_dst_quant.GetScale() / quantParams0->_dst_quant.GetScale();
+                auto prevLayerIn1 = CNNNetPrevLayer(in1);
+                // If a previous layer is a layer where freely weights scale factor can be selected,
+                // try to find the scale factor that will allow to use integer as weights scale factor for eltwise
+                // operation.
+                // If the weights scale factor for eltwise sum/sub is not integer, it will cause accuracy degradation.
+                if (fakeQuantize && LayerInfo(in1).isWeightableIdentity() &&
+                    (prevLayerIn1 == nullptr || LayerInfo(prevLayerIn1).has16BOutput())) {
+                    auto bestWeightsScale = 0.0f;
+                    auto bestError = static_cast<float>(std::numeric_limits<int16_t>::max());
+                    auto scaleIn0Dst = quantParams0->_dst_quant.GetScale();
+                    auto scaleIn1Src = quantParams1->_src_quant.GetScale();
+                    for (size_t i = MAX_VAL_2B_FEAT; i > 0; --i) {
+                        auto scaleIn1Dst = i * scaleIn1Src;
+                        auto eltwiseWeightsScale = scaleIn1Dst / scaleIn0Dst;
+                        if (eltwiseWeightsScale < 1.0 || eltwiseWeightsScale > std::numeric_limits<int16_t>::max() - 1) {
+                            continue;
+                        }
+
+                        auto error = std::abs(eltwiseWeightsScale - static_cast<int16_t>(eltwiseWeightsScale));
+                        if (error < bestError) {
+                            bestError = error;
+                            bestWeightsScale = i;
+                        }
+
+                        if (fp32eq(error, 0.0f)) {
+                            break;
+                        }
+                    }
+
+                    if (!fp32eq(bestWeightsScale, quantParams1->_weights_quant.GetScale())) {
+                        quantParams1->_weights_quant.SetScale(bestWeightsScale);
+                        quantParams1->_dst_quant.SetScale(quantParams1->_weights_quant.GetScale() * quantParams1->_src_quant.GetScale());
+                        result = ScaleFactorUpdateResult(in1.get());
+                        return true;
+                    }
+                }
+                quantData->_weights_quant.SetScale(weightsScale);
                 quantData->_dst_quant.SetScale(quantParams1->_dst_quant.GetScale());
 
                 // eltwise will always work in int16
@@ -382,6 +725,22 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                                 break;
                             }
 
+                            if (fakeQuantize && info.isWeightableIdentity()) {
+                                auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
+                                if (!fp32eq(quantDataForInputLayer->_weights_quant.GetScale(), 1.0f)) {
+                                    auto reducer = quantData->_weights_quant.GetScale() / std::numeric_limits<int16_t>::max();
+                                    reducer = std::max(1.0f, reducer);
+                                    auto newWeightsScale = quantDataForInputLayer->_weights_quant.GetScale() / reducer;
+                                    newWeightsScale = std::max(1.0f, newWeightsScale);
+                                    quantDataForInputLayer->_weights_quant.SetScale(static_cast<int32_t>(newWeightsScale));
+                                    quantDataForInputLayer->_dst_quant.SetScale(quantDataForInputLayer->_weights_quant.GetScale() *
+                                        quantDataForInputLayer->_src_quant.GetScale());
+
+                                    result = ScaleFactorUpdateResult(in.get());
+                                    return true;
+                                }
+                            }
+
                             // if we are here it means that we are in the port 1
                             if (info.isFullyConnected() || info.isConvolution()) {
                                 auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
@@ -408,7 +767,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
 template<>
 class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
  public:
-    bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, ScaleFactorUpdateResult &result) {
+    bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
         if ( !concatLayer ) {
             THROW_GNA_EXCEPTION << "Incorrect Concat Layer pointer \n";
         }
@@ -417,10 +776,6 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
             THROW_GNA_EXCEPTION << "Concat layer has unsupported number of incoming layers.";
         }
 
-        auto fp32eq = [](float p1, float p2) -> bool {
-            return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
-        };
-
         auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*concatLayer);
         std::vector<InferenceEngine::CNNLayerPtr> inputLayers;
         for (auto input_idx = 0; input_idx != concatLayer->insData.size(); input_idx++) {
@@ -435,7 +790,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
         auto in0 = inputLayers.front();
         auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
         auto scaleFactor = quantParams0->_dst_quant.GetScale();
-        auto scaleFactorCheck = [scaleFactor, &fp32eq](InferenceEngine::CNNLayerPtr& inputLayer) {
+        auto scaleFactorCheck = [scaleFactor](InferenceEngine::CNNLayerPtr& inputLayer) {
             auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
             return fp32eq(quantParams->_dst_quant.GetScale(), scaleFactor);
         };
@@ -453,14 +808,14 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
         };
 
         GNAPluginNS::QuantizedLayerParams* sourceQuantParams = nullptr;
-        auto firstInputIt = std::find_if(inputLayers.begin(), inputLayers.end(), inputLayerCheck);
-        if (firstInputIt != inputLayers.end()) {
-            auto quantParamsFirst = InferenceEngine::getInjectedData<QuantizedLayerParams>(*firstInputIt);
-            auto nextInputIt = firstInputIt + 1;
+        auto sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), inputLayerCheck);
+        if (sourceLayerIt != inputLayers.end()) {
+            auto quantParamsFirst = InferenceEngine::getInjectedData<QuantizedLayerParams>(*sourceLayerIt);
+            auto nextInputIt = sourceLayerIt + 1;
             while ((nextInputIt = std::find_if(nextInputIt, inputLayers.end(), inputLayerCheck)) != inputLayers.end()) {
                 auto quantParamsSecond = InferenceEngine::getInjectedData<QuantizedLayerParams>(*nextInputIt);
                 if (!fp32eq(quantParamsSecond->_dst_quant.GetScale(), quantParamsFirst->_dst_quant.GetScale())) {
-                    THROW_GNA_EXCEPTION << "Two Input layers " << (*firstInputIt)->name
+                    THROW_GNA_EXCEPTION << "Two Input layers " << (*sourceLayerIt)->name
                         << " and " << (*nextInputIt)->name << " have different scales in concat!!! \n";
                 }
             }
@@ -469,7 +824,6 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
         // find a source quant value
         // - 1st candidate - input layer
         // - 2nd candidate - non-activation layer with non-1 scale factor
-        // - 3rd candidate - 1st layer with non-1 scale factor
         static std::map<std::string, size_t> restarted_counter;
         auto restartedCountIt = restarted_counter.find(concatLayer->name);
         if (restartedCountIt == restarted_counter.end()) {
@@ -477,29 +831,45 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
             restartedCountIt = pos.first;
         }
 
-        auto sourceLayerIt = firstInputIt;
         if (sourceLayerIt == inputLayers.end()) {
             if (((restartedCountIt->second) / 2) % 2 == 1) {
                 std::reverse(inputLayers.begin(), inputLayers.end());
             }
-            if (((restartedCountIt->second) / 4) % 2 == 0) {
-                auto sourceLayerCheck = [&fp32eq](InferenceEngine::CNNLayerPtr& inputLayer) {
-                    auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
-                    LayerInfo info(inputLayer);
-                    return !info.isActivation() && !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f);
-                };
-                sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), sourceLayerCheck);
+
+            if (fakeQuantize) {
+                sourceLayerIt = inputLayers.begin();
+                auto quantParamsFirst = InferenceEngine::getInjectedData<QuantizedLayerParams>(*inputLayers.begin());
+                auto minScaleFactor = quantParamsFirst->_dst_quant.GetScale();
+                for (auto it = inputLayers.begin(); it != inputLayers.end(); ++it) {
+                    auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(*it);
+                    if (quantParams->_dst_quant.GetScale() < minScaleFactor &&
+                        !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f) ||
+                        fp32eq(minScaleFactor, 1.0f)) {
+                        minScaleFactor = quantParams->_dst_quant.GetScale();
+                        sourceLayerIt = it;
+                    }
+                }
+            } else {
+                if (((restartedCountIt->second) / 4) % 2 == 0) {
+                    auto sourceLayerCheck = [](InferenceEngine::CNNLayerPtr& inputLayer) {
+                        auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
+                        LayerInfo info(inputLayer);
+                        return !info.isActivation() && !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f);
+                    };
+                    sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), sourceLayerCheck);
+                }
+
+                if (sourceLayerIt == inputLayers.end()) {
+                    auto nonDefaultScaleFactor = [](InferenceEngine::CNNLayerPtr& inputLayer) {
+                        auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
+                        return !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f);
+                    };
+
+                    sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), nonDefaultScaleFactor);
+                }
             }
-        }
-        ++restartedCountIt->second;
 
-        if (sourceLayerIt == inputLayers.end()) {
-            auto nonDefaultScaleFactor = [&fp32eq](InferenceEngine::CNNLayerPtr& inputLayer) {
-                auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
-                return !fp32eq(quantParams->_dst_quant.GetScale(), 1.0f);
-            };
-
-            sourceLayerIt = std::find_if(inputLayers.begin(), inputLayers.end(), nonDefaultScaleFactor);
+            ++restartedCountIt->second;
         }
 
         std::set<size_t> concatIdxToUpdate;
@@ -514,24 +884,29 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
                     continue;
                 }
 
-                // possible case when some of the concat inputs are free to select scale ex: const->concat<-affine
-                if (!fp32eq(quantParamsIn->_dst_quant.GetScale(), 1.0f) && !LayerInfo(*it).isActivation()) {
+                if (fakeQuantize) {
                     concatIdxToUpdate.insert(std::distance(inputLayers.begin(), it));
-                }
+                    quantParamsIn->_dst_quant.SetScale(quantParams->_dst_quant.GetScale());
+                } else {
+                    // possible case when some of the concat inputs are free to select scale ex: const->concat<-affine
+                    if (!fp32eq(quantParamsIn->_dst_quant.GetScale(), 1.0f) && !LayerInfo(*it).isActivation()) {
+                        concatIdxToUpdate.insert(std::distance(inputLayers.begin(), it));
+                    }
 
-                quantParamsIn->_dst_quant.SetScale(quantParams->_dst_quant.GetScale());
+                    quantParamsIn->_dst_quant.SetScale(quantParams->_dst_quant.GetScale());
+                }
             }
         }
 
         auto updatedScaleFactor = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0)->_dst_quant.GetScale();
-        auto equalScaleFactor = [updatedScaleFactor, &fp32eq](InferenceEngine::CNNLayerPtr& inputLayer) {
+        auto equalScaleFactor = [updatedScaleFactor](InferenceEngine::CNNLayerPtr& inputLayer) {
             auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(inputLayer);
             return fp32eq(quantParams->_dst_quant.GetScale(), updatedScaleFactor);
         };
 
         auto layerIt = std::find_if_not(inputLayers.begin() + 1, inputLayers.end(), equalScaleFactor);
         if (layerIt != inputLayers.end()) {
-            THROW_GNA_EXCEPTION << "layers entered into concat have different scale factors" << concatLayer->name;
+            THROW_GNA_EXCEPTION << "layers entered into concat have different scale factors. Layer name: " << concatLayer->name;
         }
 
         quantData->_dst_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
@@ -555,7 +930,7 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
                     gnalog() << "[UFS] from : " << concatLayer->name << " reached: " << layer->name;
                     // found that direct input to concat is a indirect parent of align filter - so no link required
                     auto info = LayerInfo(layer);
-                    if (!info.isWeightable() && !info.isActivation() && !info.isConst() && !info.isMemory()) {
+                    if (!info.isWeightable() && !info.isActivation() && !info.isConst()) {
                         gnalog() << "... skipped\n";
                         return;
                     }
@@ -575,16 +950,44 @@ class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
             auto restarLayerInfo = LayerInfo(restartedLayer);
             if (restarLayerInfo.isActivation()) {
                 // requantize activation by just changing it's output scale factor
-                quantDataForConCatInput->_dst_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
-            }
-            if (restarLayerInfo.isConst()) {
+                auto newScaleFactor = sourceQuantParams->_dst_quant.GetScale();
+                auto skipNonFunctional = [](InferenceEngine::CNNLayerPtr l) {
+                    return LayerInfo(l).isNonFunctional();
+                };
+
+                auto prevLayer = CNNNetPrevLayerSkipCertain(restartedLayer, 0, skipNonFunctional);
+                auto prevLayer2 = prevLayer != nullptr ? CNNNetPrevLayerSkipCertain(prevLayer, 0, skipNonFunctional) : nullptr;
+
+                if (fakeQuantize && prevLayer != nullptr && LayerInfo(prevLayer).isWeightableIdentity() &&
+                    (prevLayer2 == nullptr || LayerInfo(prevLayer2).has16BOutput())) {
+                    auto weightsScales = generateScaleFactors(MIN_SEARCH_WEIGHTS_VAL, MAX_SEARCH_WEIGHTS_VAL,
+                        MAX_SEARCH_WEIGHTS_VAL - MIN_SEARCH_WEIGHTS_VAL);
+
+                    auto prevLayerQuant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*prevLayer);
+                    auto bestWeightsScale = 1.0f;
+                    auto slopes = getPWLSlopes(restarLayerInfo);
+                    if (!slopes.empty() && !fp32eq(prevLayerQuant->_src_quant.GetScale(), newScaleFactor)) {
+                        bestWeightsScale = selectBestWeightsScaleFactors(prevLayerQuant->_src_quant.GetScale(),
+                            newScaleFactor, weightsScales, { 1.0f });
+                    }
+                    if (!slopes.empty() && !fp32eq(bestWeightsScale, prevLayerQuant->_weights_quant.GetScale())) {
+                        gnalog() << "[INFO][Concat] Optimizing weights scale factor for '" << prevLayer->name << "' layer. Change from "
+                            << prevLayerQuant->_weights_quant.GetScale() << " to " << bestWeightsScale << "\n";
+
+                        prevLayerQuant->_weights_quant.SetScale(bestWeightsScale);
+                        prevLayerQuant->_dst_quant.SetScale(prevLayerQuant->_weights_quant.GetScale() * prevLayerQuant->_src_quant.GetScale());
+                        result = ScaleFactorUpdateResult(prevLayer.get());
+                        return true;
+                    }
+                }
+
+                quantDataForConCatInput->_dst_quant.SetScale(newScaleFactor);
+            } else if (restarLayerInfo.isConst()) {
                 gnalog() << "... warning const layer will be requantized\n";
-                quantDataForConCatInput->_dst_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
-            }
-            if (restarLayerInfo.isMemory()) {
-                gnalog() << "... warning memory layer will be requantized\n";
                 quantDataForConCatInput->_src_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
                 quantDataForConCatInput->_dst_quant.SetScale(sourceQuantParams->_dst_quant.GetScale());
+            } else {
+                THROW_GNA_EXCEPTION << "cannot requantize '" << restartedLayer->name << "' input to concat: " << concatLayer->name;
             }
             result = ScaleFactorUpdateResult(restartedLayer.get());
         }
@@ -607,7 +1010,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
     uint16_t const _scale_change_threshold_200 = 200;
 
  public:
-    bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, ScaleFactorUpdateResult &result) {
+    bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
         if ( !wl ) {
             THROW_GNA_EXCEPTION << "Incorrect Weightable Layer pointer  \n";
         } else if (!wl->_weights) {
@@ -620,8 +1023,30 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
 
         auto quant = InferenceEngine::getInjectedData<QuantizedLayerParams>(*wl);
         quant->_src_quant = quantDataForInputLayer->_dst_quant;
+        if (quant->_weights_quant.IsStatsSet() && !quant->_weights_quant.IsScaleSet()) {
+            auto getScale = [&quant](size_t i) {
+                return (quant->_weights_quant.GetLevels() - 1) /
+                    (quant->_weights_quant.GetMaxValues(false)[i] - quant->_weights_quant.GetMinValues(false)[i]);
+            };
+
+            float min_channel_scale = getScale(0);
+            for (uint32_t i = 1; i < quant->_weights_quant.GetMinValues().size(); i++) {
+                min_channel_scale = std::min(min_channel_scale, getScale(i));
+            }
+
+            auto multiplier = 1.0f;
+            if (quant->_weights_quant.GetLevels() <= std::numeric_limits<uint8_t>::max()) {
+                // GNA supports additional multiplier for only 8bit weights.
+                // The multipler is used to extend dynamic range.
+                multiplier = MAX_OUT_MULTIPLIER;
+            }
+
+            // Common weights scale calculation
+            quant->_weights_quant.SetScale(min_channel_scale * multiplier);
+        }
+
         // TODO: pass 8 bits somehow
-        if (quant->_weights_quant.GetScale() == 1.0f) {
+        if (!quant->_weights_quant.IsScaleSet()) {
             size_t scaleRange = 0;
             if (weightsSize == 2) {
                 scaleRange = MAX_VAL_2B_WEIGHT;
@@ -632,7 +1057,7 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
             }
             quant->_weights_quant.SetScale(
                 ScaleFactorForQuantization(wl->_weights->buffer().as<float *>(), scaleRange, wl->_weights->size()));
-            if (quant->_weights_quant.GetScale() == -1.0f) {
+            if (quant->_weights_quant.GetScale() == -1.0f || (fakeQuantize && LayerInfo(wl).isConcatAlignFilter())) {
                 quant->_weights_quant.SetScale(1.0f);
             }
 
@@ -685,6 +1110,39 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
         }
 
         quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
+        if (quant->_dst_quant.IsStatsSet()) {
+            // Adjust weights scale factor if output values exceed int32 maximum value
+
+            if (wl->_biases && !quant->_bias_quant.IsScaleSet()) {
+                auto minMax = FindMinMaxValues(wl->_biases->buffer().as<float*>(), wl->_biases->size());
+                quant->_bias_quant.SetMinValues({ minMax.first });
+                quant->_bias_quant.SetMaxValues({ minMax.second });
+
+                auto biasScale = ScaleFactorForQuantization(wl->_biases->buffer().as<float*>(), MAX_VAL_4B_BIAS, wl->_biases->size());
+                quant->_bias_quant.SetScale(biasScale);
+                if (quant->_bias_quant.GetScale() != -1.0f && quant->_bias_quant.GetScale() < quant->_dst_quant.GetScale()) {
+                    quant->_weights_quant.SetScale(quant->_bias_quant.GetScale() / quant->_src_quant.GetScale());
+                    quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
+                }
+            }
+
+            auto maxAbsVal = std::max(std::abs(quant->_dst_quant.GetMinValues().front()),
+                std::abs(quant->_dst_quant.GetMaxValues().front()));
+
+            auto maxIntVal = static_cast<int64_t>(maxAbsVal * quant->_dst_quant.GetScale() + 0.5f);
+            auto weightsReducer = static_cast<double>(maxIntVal) / std::numeric_limits<int32_t>::max();
+            weightsReducer = std::max(1.0, weightsReducer);
+            if (!fp32eq(weightsReducer, 1.0f)) {
+                quant->_weights_quant.SetScale(quant->_weights_quant.GetScale() / weightsReducer);
+            }
+
+            if (fp32eq(quant->_weights_quant.GetScale(), 0.0f) || std::isinf(quant->_weights_quant.GetScale())) {
+                quant->_weights_quant.SetScale(1.0f);
+            }
+
+            quant->_dst_quant.SetScale(quant->_weights_quant.GetScale() * quant->_src_quant.GetScale());
+        }
+
         return true;
     }
 };
@@ -692,8 +1150,8 @@ class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
 template<>
 class ScaleFactorPerLayer<InferenceEngine::ScaleShiftLayer*> : public ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
  public:
-    bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, ScaleFactorUpdateResult &result) {
-        return ScaleFactorPerLayer<InferenceEngine::WeightableLayer*>::operator()(wl, 2, result);
+    bool operator()(InferenceEngine::WeightableLayer *wl, int weightsSize, ScaleFactorUpdateResult &result, const bool fakeQuantize) {
+        return ScaleFactorPerLayer<InferenceEngine::WeightableLayer*>::operator()(wl, 2, result, fakeQuantize);
     }
 };
 
@@ -717,10 +1175,11 @@ class ScaleFactorCalculator {
     mutable Cnt::const_iterator idx;
     mutable bool needRestart = false;
     int weightsBytesSize;
+    bool isFakeQuantize;
 
  public:
-    ScaleFactorCalculator(Cnt &net, int weightsBytesSize)
-            : net(net), weightsBytesSize(weightsBytesSize) {
+    ScaleFactorCalculator(Cnt &net, int weightsBytesSize, bool fakeQuantize)
+            : net(net), weightsBytesSize(weightsBytesSize), isFakeQuantize(fakeQuantize) {
         idx = std::begin(this->net);
     }
     bool needToRestart() const {
@@ -736,7 +1195,7 @@ class ScaleFactorCalculator {
     bool operator()(T ptr) const {
         needRestart = false;
         frontend::ScaleFactorUpdateResult result;
-        if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, result)) {
+        if (!frontend::ScaleFactorPerLayer<T>()(ptr, weightsBytesSize, result, isFakeQuantize)) {
             return false;
         }
         if (result) {
diff --git a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
index 19f22520a90..87afd6deb7d 100644
--- a/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
+++ b/inference-engine/src/gna_plugin/gna_graph_compiler.cpp
@@ -740,6 +740,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
         auto orientation = kDnnInterleavedOrientation;
 
         auto activation_type = DnnActivation::fromType(kActPow);
+        activation_type.fqParams.set = false;
+        activation_type.srcFQParams.set = false;
         activation_type.args.pow.exponent = power.power;
         activation_type.args.pow.scale = power.scale;
         activation_type.args.pow.offset = power.offset;
@@ -768,7 +770,8 @@ void GNAGraphCompiler::PowerPrimitive(InferenceEngine::CNNLayerPtr layer) {
                 PwlDesignOpt16(activation_type,
                     ptr_pwl_segments,
                     input_pwl_scale_factor,
-                    output_pwl_scale_factor);
+                    output_pwl_scale_factor,
+                    gnaFlags->pwlMaxErrorPercent);
             }
         }
 
@@ -1668,14 +1671,6 @@ void GNAGraphCompiler::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer)
     }
 }
 
-void GNAGraphCompiler::FakeQuantizePrimitive(InferenceEngine::CNNLayerPtr layer) {
-    // in FP32 mode lets use special form of activation that satisfies fakeQuantize formula
-    if (gnaFlags->sw_fp32) {
-        PWLPrimitive(layer);
-        return;
-    }
-}
-
 void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto* generic = dynamic_cast<GenericLayer*>(layer.get());
     std::string type;
@@ -1768,6 +1763,24 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
         THROW_GNA_EXCEPTION << "Activation function type not yet supported: " << type;
     }
     auto activation_type = DnnActivation::fromType(it->second);
+    activation_type.fqParams.set = false;
+    if (quantized != nullptr && quantized->_dst_quant.IsStatsSet()) {
+        activation_type.fqParams.set = true;
+        activation_type.fqParams.levels = quantized->_dst_quant.GetLevels();
+        activation_type.fqParams.inputPerChannel = false;
+        activation_type.fqParams.input_low = &(quantized->_dst_quant.GetMinValues(true).front());
+        activation_type.fqParams.input_high = &(quantized->_dst_quant.GetMaxValues(true).front());
+    }
+
+    activation_type.srcFQParams.set = false;
+    if (quantized != nullptr && quantized->_src_quant.IsStatsSet()) {
+        activation_type.srcFQParams.set = true;
+        activation_type.srcFQParams.levels = quantized->_src_quant.GetLevels();
+        activation_type.srcFQParams.inputPerChannel = false;
+        activation_type.srcFQParams.input_low = &(quantized->_src_quant.GetMinValues(true).front());
+        activation_type.srcFQParams.input_high = &(quantized->_src_quant.GetMaxValues(true).front());
+    }
+
     if (it->second == kActRelu) {
         auto reluLayer = dynamic_cast<ReLULayer*>(layer.get());
         activation_type.args.lrelu.negative_slope = reluLayer != nullptr ? reluLayer->negative_slope : 0.0f;
@@ -1775,11 +1788,9 @@ void GNAGraphCompiler::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
         activation_type.args.lrelu.negative_slope = 0.0f;
     }
 
-    if (it->second == kActFakeQuantize) {
+    if (quantized == nullptr && it->second == kActFakeQuantize) {
         activation_type = GNAFakeQuantizeLayer(layer).parseAsActivation();
-    }
-
-    if (it->second == kActKaldiLstmClipping) {
+    } else if (it->second == kActKaldiLstmClipping) {
         auto clamp_layer = dynamic_cast<ClampLayer*>(layer.get());
         if (clamp_layer) {
             if (clamp_layer->min_value == 0 && clamp_layer->max_value == 0) {
@@ -1856,7 +1867,8 @@ case name:\
             PwlDesignOpt16(activation_type,
                 ptr_pwl_segments,
                 input_pwl_scale_factor,
-                output_pwl_scale_factor);
+                output_pwl_scale_factor,
+                gnaFlags->pwlMaxErrorPercent);
         }
         ptr_pwl_segments_target = reinterpret_cast<gna_pwl_segment_t*>(&ptr_pwl_segments_target);
     }
@@ -2001,7 +2013,7 @@ void GNAGraphCompiler::CreateLayerPrimitive(CNNLayerPtr layer) {
         {{DelayedCopyLayerName}, CREATE(CopyPrimitive)},
         {{"TensorIterator"}, SKIP},
         {{"LSTMCell"}, SKIP},
-        {{"FakeQuantize"}, CREATE(FakeQuantizePrimitive)}  // TODO: fakequantize layer should be properly converted to GNA scale factors for integer case
+        {{"FakeQuantize"}, CREATE(PWLPrimitive)}
     };
     (void)layersBuilder;
     auto it = LayersBuilder::getStorage().find(layer->type);
diff --git a/inference-engine/src/gna_plugin/gna_graph_tools.hpp b/inference-engine/src/gna_plugin/gna_graph_tools.hpp
index 112e6060c30..bd3dfe90a9b 100644
--- a/inference-engine/src/gna_plugin/gna_graph_tools.hpp
+++ b/inference-engine/src/gna_plugin/gna_graph_tools.hpp
@@ -663,10 +663,10 @@ inline void CNNNetworkRemoveLayer(CNNLayerPtr layer, bool checkDims = true) {
     }
     gnalog() << "Removing " << layer->name << " layer\n";
     if (layer->insData.size() != 1) {
-        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" that has not 1 input";
+        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" that has different number of inputs than 1";
     }
     if (layer->outData.size() != 1) {
-        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" that has not 1 output";
+        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" that has different number of outputs than 1";
     }
 
     auto isp = layer->insData.front().lock();
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index c8a337c3617..d978bbd46f5 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -24,7 +24,6 @@
 #include <debug.h>
 #include <gna/gna_config.hpp>
 #include "gna_plugin_config.hpp"
-#include <legacy/ie_util_internal.hpp>
 #include "gna_plugin.hpp"
 #include "optimizer/gna_pass_manager.hpp"
 #include "layers/gna_layer_type.hpp"
@@ -50,6 +49,10 @@
 #include <transformations/init_node_info.hpp>
 #include <transformations/opset_conversions/convert_opset3_to_opset2.hpp>
 #include <transformations/opset_conversions/convert_opset2_to_opset1.hpp>
+#include <transformations/common_optimizations/fq_mul_fusion.hpp>
+#include <transformations/common_optimizations/fq_reshape_fusion.hpp>
+#include <transformations/common_optimizations/pull_transpose_through_fq.hpp>
+#include <transformations/common_optimizations/relu_fake_quantize_fusion.hpp>
 
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@@ -394,9 +397,9 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
     // search for FQ layers
     // only supports cases of int16 or int8
     InputsDataMap inputs = network.getInputsInfo();
-    for (auto && input : inputs) {
+    size_t inputIdx = 0;
+    for (auto&& input : inputs) {
         auto data = input.second->getInputData();
-        size_t inputIdx = 0;
         for (auto && nextToInputLayer : getInputTo(data)) {
             if (!LayerInfo(nextToInputLayer.second).isFakeQuantize()) {
                 inputIdx++;
@@ -411,7 +414,16 @@ void GNAPlugin::UpdateInputScaleFromNetwork(InferenceEngine::CNNNetwork & networ
                 THROW_GNA_LAYER_EXCEPTION(nextToInputLayer.second)
                     << "unsupported, per-channel quantization for input layer : " << input.second->name();
             }
+
+            auto fp32eq = [](float p1, float p2) -> bool {
+                return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+            };
             float scaleInput = (fqLayer.getLevels() - 1) / (inputRange.second[0] - inputRange.first[0]);
+            auto minAbsVal = std::min(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
+            auto maxAbsVal = std::max(std::abs(inputRange.second[0]), std::abs(inputRange.first[0]));
+            if (fp32eq(minAbsVal, 0.0f) && !fp32eq(maxAbsVal, 0.0f)) {
+                scaleInput = (fqLayer.getLevels() - 1) / (2 * maxAbsVal);
+            }
 
             if (!config.inputScaleFactors.empty()) {
                 gnalog() << "Scale factor calculated during model quantization (" << scaleInput
@@ -676,6 +688,68 @@ void GNAPlugin::ConvertModelLayoutFromNCHWToNHWC(const std::vector<CNNLayerPtr>
     }
 }
 
+#ifdef PLOT
+void GNAPlugin::AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
+    InferenceEngine::ordered_properties& printed_properties,
+    InferenceEngine::ordered_properties& node_properties) {
+    // printing quantized params
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+    if (!quantized) {
+        return;
+    }
+    if (LayerInfo(layer).isWeightable() || LayerInfo(layer).isEltwise()) {
+        printed_properties.emplace_back(
+            "weights scale factor", std::to_string(quantized->_weights_quant.GetScale()));
+        if (quantized->_weights_quant.IsStatsSet()) {
+            for (auto& min : quantized->_weights_quant.GetMinValues()) {
+                printed_properties.emplace_back(
+                    "weights min val", std::to_string(min));
+            }
+            for (auto& max : quantized->_weights_quant.GetMaxValues()) {
+                printed_properties.emplace_back(
+                    "weights max val", std::to_string(max));
+            }
+        }
+
+        if (quantized->_bias_quant.IsStatsSet()) {
+            for (auto& min : quantized->_bias_quant.GetMinValues()) {
+                printed_properties.emplace_back(
+                    "bias min val", std::to_string(min));
+            }
+            for (auto& max : quantized->_bias_quant.GetMaxValues()) {
+                printed_properties.emplace_back(
+                    "bias max val", std::to_string(max));
+            }
+        }
+    }
+    printed_properties.emplace_back(
+        "src scale factor", std::to_string(quantized->_src_quant.GetScale()));
+    if (quantized->_src_quant.IsStatsSet()) {
+        for (auto& min : quantized->_src_quant.GetMinValues()) {
+            printed_properties.emplace_back(
+                "src min val", std::to_string(min));
+        }
+        for (auto& max : quantized->_src_quant.GetMaxValues()) {
+            printed_properties.emplace_back(
+                "src max val", std::to_string(max));
+        }
+    }
+
+    printed_properties.emplace_back(
+        "dst scale factor", std::to_string(quantized->_dst_quant.GetScale()));
+    if (quantized->_dst_quant.IsStatsSet()) {
+        for (auto& min : quantized->_dst_quant.GetMinValues()) {
+            printed_properties.emplace_back(
+                "dst min val", std::to_string(min));
+        }
+        for (auto& max : quantized->_dst_quant.GetMaxValues()) {
+            printed_properties.emplace_back(
+                "dst max val", std::to_string(max));
+        }
+    }
+}
+#endif
+
 void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
     std::shared_ptr<InferenceEngine::details::CNNNetworkImpl> convertedNetwork;
     if (_network.getFunction()) {
@@ -698,6 +772,10 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
                     // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation
                     return node->get_rt_info().count("UNROLL_TI") == 0;
             });
+        pass_config->disable<ngraph::pass::FakeQuantizeMulFusion>();
+        pass_config->disable<ngraph::pass::FakeQuantizeReshapeFusion>();
+        pass_config->disable<ngraph::pass::PullTransposeThroughFQUp>();
+        pass_config->disable<ngraph::pass::ReluFakeQuantizeFusion>();
         manager.run_passes(graph);
         convertedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(graph, clonedNetwork);
     }
@@ -809,17 +887,11 @@ void GNAPlugin::LoadNetwork(CNNNetwork & _network) {
 
 #ifdef PLOT
     std::ofstream file("gna_passes.dot");
-    saveGraphToDot(newNet, file, [](const CNNLayerPtr layer,
-                                           ordered_properties &printed_properties,
-                                           ordered_properties &node_properties) {
-        // printing quantized params
-        auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
-        if (!quantized) {
-            return;
-        }
-        printed_properties.emplace_back(
-            "scale factor", std::to_string(quantized->_dst_quant.GetScale()));
-    });
+    saveGraphToDot(newNet, file, [this](const CNNLayerPtr layer,
+        ordered_properties& printed_properties,
+        ordered_properties& node_properties) {
+            AddDebugProperties(layer, printed_properties, node_properties);
+        });
 #endif
 
     auto sortedNet = CNNNetSortTopologicallyEx(newNet, make_fuzed_order);
diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp
index 1a6e20d558c..0af27ba6572 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -23,6 +23,7 @@
 #include "gna_plugin_policy.hpp"
 #include "gna_plugin_log.hpp"
 #include "gna_plugin_config.hpp"
+#include <legacy/ie_util_internal.hpp>
 
 #if GNA_LIB_VER == 2
 #include <gna2-model-api.h>
@@ -237,6 +238,11 @@ class GNAPlugin : public InferenceEngine::IInferencePlugin {
      * @param layers model sorted layers
      */
     void ConvertModelLayoutFromNCHWToNHWC(const std::vector<InferenceEngine::CNNLayerPtr> &layers);
+#ifdef PLOT
+    void AddDebugProperties(const InferenceEngine::CNNLayerPtr layer,
+        InferenceEngine::ordered_properties& printed_properties,
+        InferenceEngine::ordered_properties& node_properties);
+#endif
 };
 
 }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.cpp b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
index 60d4d854214..b7d20534733 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_config.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.cpp
@@ -156,6 +156,24 @@ void Config::UpdateFromMap(const std::map<std::string, std::string>& config) {
                 THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
                                     << "should be equal to YES/NO, but not" << value;
             }
+        } else if (key == GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)) {
+            float max_error;
+            try {
+                max_error = InferenceEngine::CNNLayer::ie_parse_float(value);
+                if (max_error < 0.0f || max_error > 100.0f) {
+                    throw std::out_of_range("");
+                }
+            }
+            catch (std::invalid_argument&) {
+                THROW_GNA_EXCEPTION << "Invalid value of PWL max error percent";
+            }
+            catch (std::out_of_range&) {
+                log << "Unsupported PWL error percent value: " << value
+                    << ", should be greater than 0 and less than 100";
+                THROW_GNA_EXCEPTION << "Unsupported PWL error percent value: " << value
+                    << ", should be greater than 0 and less than 100";
+            }
+            gnaFlags.pwlMaxErrorPercent = max_error;
         } else if (key == CONFIG_KEY(PERF_COUNT)) {
             if (value == PluginConfigParams::YES) {
                 gnaFlags.performance_counting = true;
@@ -252,6 +270,7 @@ void Config::AdjustKeyMapValues() {
     keyConfigMap[GNA_CONFIG_KEY(PRECISION)] = gnaPrecision.name();
     keyConfigMap[GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN)] =
             gnaFlags.uniformPwlDesign ? PluginConfigParams::YES: PluginConfigParams::NO;
+    keyConfigMap[GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT)] = std::to_string(gnaFlags.pwlMaxErrorPercent);
     keyConfigMap[CONFIG_KEY(PERF_COUNT)] =
             gnaFlags.performance_counting ? PluginConfigParams::YES: PluginConfigParams::NO;
     keyConfigMap[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(gnaFlags.gna_lib_async_threads_num);
diff --git a/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp b/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp
index c80cb62d6e5..9d30126a1ce 100644
--- a/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_fake_quantize_layer.hpp
@@ -29,7 +29,7 @@ class GNAFakeQuantizeLayer {
     DnnActivation parseAsActivation() const {
         DnnActivation fqActivation;
 
-        fqActivation.args.fakeQuantize.levels = fqLayer->GetParamAsInt("levels");
+        fqActivation.fqParams.levels = fqLayer->GetParamAsInt("levels");
         auto inputShape  = getShapeForRange(fqLayer, 1);
         auto outputShape = getShapeForRange(fqLayer, 3);
 
@@ -37,13 +37,15 @@ class GNAFakeQuantizeLayer {
         auto inputRangeSize = InferenceEngine::details::product(inputShape.begin(), inputShape.end());
         auto outputRangeSize = InferenceEngine::details::product(outputShape.begin(), outputShape.end());
 
-        fqActivation.args.fakeQuantize.inputPerChannel = inputRangeSize != 1;
-        fqActivation.args.fakeQuantize.input_low   = getParamFromInputAsFloats(fqLayer, 1);
-        fqActivation.args.fakeQuantize.input_high  = getParamFromInputAsFloats(fqLayer, 2);
+        fqActivation.fqParams.set = true;
 
-        fqActivation.args.fakeQuantize.outputPerChannel = outputRangeSize != 1;
-        fqActivation.args.fakeQuantize.output_low  = getParamFromInputAsFloats(fqLayer, 3);
-        fqActivation.args.fakeQuantize.output_high = getParamFromInputAsFloats(fqLayer, 4);
+        fqActivation.fqParams.inputPerChannel = inputRangeSize != 1;
+        fqActivation.fqParams.input_low   = getParamFromInputAsFloats(fqLayer, 1);
+        fqActivation.fqParams.input_high  = getParamFromInputAsFloats(fqLayer, 2);
+
+        fqActivation.fqParams.outputPerChannel = outputRangeSize != 1;
+        fqActivation.fqParams.output_low  = getParamFromInputAsFloats(fqLayer, 3);
+        fqActivation.fqParams.output_high = getParamFromInputAsFloats(fqLayer, 4);
         fqActivation.type = kActFakeQuantize;
 
         return fqActivation;
diff --git a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
index 6c1bf161e28..1112160974b 100644
--- a/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/layers/gna_layer_info.hpp
@@ -103,7 +103,8 @@ class LayerInfo {
              "neglog",
              "neghalflog",
              "softsign",
-             "power"};
+             "power",
+             "fakequantize"};
 
         if (isPower()) {
             auto powerLayer = as<const InferenceEngine::PowerLayer*>();
@@ -157,7 +158,10 @@ class LayerInfo {
         IS_VALID();
         return nullptr != as<const InferenceEngine::ScaleShiftLayer*>();
     }
-
+    bool isSyntheticScaleShift() const noexcept {
+        IS_VALID();
+        return layer->name.find("SyntheticScaleShift") != std::string::npos;
+    }
     bool isEltwise() const noexcept {
         IS_VALID();
         return nullptr != as<const InferenceEngine::EltwiseLayer*>();
@@ -193,6 +197,18 @@ class LayerInfo {
     bool isIdentity() const noexcept {
         return isOfType("identity");
     }
+    bool isTanh() const noexcept {
+        return isOfType("tanh");
+    }
+    bool isSigmoid() const noexcept {
+        return isOfType("sigmoid");
+    }
+    bool isSoftSign() const noexcept {
+        return isOfType("softsign");
+    }
+    bool isClamp() const noexcept {
+        return isOfType("clamp");
+    }
     bool isFullyConnected() const noexcept {
         return isOfType("FullyConnected") || isOfType("InnerProduct");
     }
@@ -283,6 +299,9 @@ class LayerInfo {
     bool isCopyDelayed() const noexcept {
         return isOfType(DelayedCopyLayerName);
     }
+    bool isWeightableIdentity() const noexcept {
+        return isConcatAlignFilter() || isSyntheticScaleShift() || isCropAffined();
+    }
 
     size_t paddingSize() const {
         static InferenceEngine::details::caseless_set<std::string> layersWithPossiblePadding = {"FullyConnected",
diff --git a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
index c6233547677..d32b49c42c7 100644
--- a/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
+++ b/inference-engine/src/gna_plugin/optimizer/gna_pass_manager.cpp
@@ -39,6 +39,7 @@
 #include "frontend/quantization.h"
 #include "gna_groups.hpp"
 #include "gna_graph_patterns.hpp"
+#include "gna_data_types.hpp"
 
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
@@ -54,6 +55,10 @@ std::shared_ptr<IPassManager> BasePass::getPassManager() {
     return sharedMgr;
 }
 
+
+static bool fp32eq(float p1, float p2) {
+    return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+}
 // indexes stored in pass manager
 static const char identityLayersCounterName[] = "identityLayerCounter";
 static const char diagonalLayersCounterName[] = "diagonalLayerCounter";
@@ -1836,9 +1841,6 @@ void FuseFQIntoWeightsPass::run() {
         weightableLayer->insData.resize(1);
 
         // 2. running FQ function for given layer
-        if (weightDims.size() != 2) {
-            THROW_GNA_LAYER_EXCEPTION(fqLayer) << " layout of weigths not equal to NC not yet supported";
-        }
         auto outputSize = details::product(weightDims.begin(), weightDims.end());
 
         // depending on compute precision weights will be recreated
@@ -1874,61 +1876,42 @@ void FuseFQIntoWeightsPass::run() {
             // check if
             // - weights were float values and need to be quantized,
             // - weights are integer values and quantization can be skipped
-            for (size_t i = 0; i < outputRange.first.size(); ++i) {
-                if (inputRange.first[i] > outputRange.first[i] ||
-                    inputRange.second[i] > outputRange.second[i]) {
-                    quantized->_weights_quantized = true;
-                    break;
-                }
-            }
-
-            quantized->_weights_quant.SetMinValues(outputRange.first);
-            quantized->_weights_quant.SetMaxValues(outputRange.second);
+            quantized->_weights_quant.SetMinValues(inputRange.first, true);
+            quantized->_weights_quant.SetMaxValues(inputRange.second, true);
+            quantized->_weights_quant.SetMinValues(outputRange.first, false);
+            quantized->_weights_quant.SetMaxValues(outputRange.second, false);
             quantized->_weights_quant.SetLevels(levels);
 
             // lets find out minimum scale factor among channels
-            if (quantized->_weights_quant.GetMinValues().empty()) {
+            if (!quantized->_weights_quant.IsStatsSet()) {
                 THROW_GNA_LAYER_EXCEPTION(fqLayer) << " per channel/tensor weigths scales are missed";
             }
-            auto getScale = [&quantized](size_t i) {
-                return (quantized->_weights_quant.GetLevels() - 1) /
-                    (quantized->_weights_quant.GetMaxValues()[i] - quantized->_weights_quant.GetMinValues()[i]);
-            };
-
-            float min_channel_scale = getScale(0);
-            for (uint32_t i = 1; i < quantized->_weights_quant.GetMinValues().size(); i++) {
-                min_channel_scale = std::min(min_channel_scale, getScale(i));
-            }
-
-            auto multiplier = 1.0f;
-            if (quantized->_weights_quant.GetLevels() <= std::numeric_limits<uint8_t>::max()) {
-                // GNA supports additional multiplier for only 8bit weights.
-                // The multipler is used to extend dynamic range.
-                multiplier = MAX_OUT_MULTIPLIER;
-            }
-
-            // Common weights scale calculation
-            quantized->_weights_quant.SetScale(min_channel_scale * multiplier);
             continue;
         }
 
+        size_t depth = 1;
         intel_dnn_component_t component;
         component.num_columns_in = weightDims[1];
         component.num_rows_in    = weightDims[0];
 
+        if (LayerInfo(weightableLayer).isConvolution()) {
+            depth = (weightDims.size() == 4)? weightDims[3]: 1;
+        }
+
         intel_piecewiselinear_t *transform = reinterpret_cast<intel_piecewiselinear_t *>(&component.op.pwl);
         transform->func_id = gnaFakeQuantizeLayer.parseAsActivation();
 
         auto quantizedWeightsData = quantizedWeights->buffer();
-        component.ptr_inputs = quantizedWeightsData.as<float*>();
-
         auto dequantizedWeights = make_shared_blob<float>(TensorDesc(Precision::FP32, {outputSize}, Layout::C));
         dequantizedWeights->allocate();
 
         auto resultBuffer = dequantizedWeights->buffer();
-        component.ptr_outputs = resultBuffer.as<float*>();
+        for (size_t i = 0; i < depth; ++i) {
+            component.ptr_inputs = quantizedWeightsData.as<float*>() + i * component.num_columns_in * component.num_rows_in;
+            component.ptr_outputs = resultBuffer.as<float*>() + i * component.num_columns_in * component.num_rows_in;
 
-        PwlApply32(&component, 0, component.num_rows_in - 1, 0, component.num_columns_in - 1);
+            PwlApply32(&component, 0, component.num_rows_in - 1, 0, component.num_columns_in - 1);
+        }
 
         // 3. assign dequantized const blob to weightable layer
         assignWeightsAndBiases(weightableLayer, dequantizedWeights, biases);
@@ -1944,6 +1927,97 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
     auto donotSkip = [](CNNLayerPtr) {
         return false;
     };
+
+    auto allowFQFuse = [](CNNLayerPtr layer) -> bool {
+        auto doNotSkup = [](CNNLayerPtr layer) {
+            return false;
+        };
+
+        if (CNNNetGetAllNextLayersSkipCertain(layer, -1, doNotSkup).empty()) {
+            return false;
+        }
+
+        auto skipNonFunctional = [](CNNLayerPtr layer) {
+            return LayerInfo(layer).isNonFunctional();
+        };
+
+        auto prevLayer = CNNNetPrevLayerSkipCertain(layer, 0, skipNonFunctional);
+        if (LayerInfo(prevLayer).isActivation() || LayerInfo(prevLayer).isConst()) {
+            return true;
+        }
+
+        auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer, -1, skipNonFunctional);
+        for (auto& l : nextLayers) {
+            if (!LayerInfo(l).isActivation()) {
+                return false;
+            }
+        }
+
+        return true;
+    };
+
+    std::function<void(QuantizedLayerParams*, CNNLayerPtr)> propagateStatistics =
+        [&propagateStatistics](QuantizedLayerParams* srcQuantParams, CNNLayerPtr layer) {
+        if (LayerInfo(layer).isFakeQuantize()) {
+            return;
+        }
+
+        auto donotSkip = [](CNNLayerPtr) {
+            return false;
+        };
+
+        auto quantParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+
+        // Find all output layers connected to FQ
+        auto nextLayers = CNNNetGetAllNextLayersSkipCertain(layer.get(), -1, donotSkip);
+        if (nextLayers.empty()) {
+            quantParams->_src_quant.CopyStats(srcQuantParams->_dst_quant);
+            if (LayerInfo(layer).isNonFunctional()) {
+                quantParams->_dst_quant.CopyStats(srcQuantParams->_dst_quant);
+            }
+            return;
+        }
+
+        auto srcMinVals = srcQuantParams->_dst_quant.GetMinValues().front();
+        auto srcMaxVals = srcQuantParams->_dst_quant.GetMaxValues().front();
+        // If a next layer is concat, find minimum nad maximum statistics
+        if (LayerInfo(layer).isConcat() && quantParams->_src_quant.IsStatsSet()) {
+            auto concatMinVal = quantParams->_src_quant.GetMinValues().front();
+            auto concatMaxVal = quantParams->_src_quant.GetMaxValues().front();
+            quantParams->_src_quant.SetMinValues({ std::min(srcMinVals, concatMinVal) });
+            quantParams->_src_quant.SetMaxValues({ std::max(srcMaxVals, concatMaxVal) });
+        } else if (quantParams->_src_quant.IsStatsSet()) {
+            return;
+        } else {
+            quantParams->_src_quant.CopyStats(srcQuantParams->_dst_quant);
+        }
+
+        if (!LayerInfo(layer).isWeightable() && !LayerInfo(layer).isEltwise() &&
+            !LayerInfo(layer).isActivation() && !LayerInfo(layer).isFakeQuantize()) {
+            auto doNotSetDstStats = false;
+            for (auto& l : nextLayers) {
+                if (LayerInfo(l).isFakeQuantize()) {
+                    doNotSetDstStats = true;
+                    continue;
+                }
+            }
+
+            if (doNotSetDstStats) {
+                return;
+            }
+
+            quantParams->_dst_quant.CopyStats(quantParams->_src_quant);
+
+            for (auto& l : nextLayers) {
+                if (LayerInfo(l).isFakeQuantize()) {
+                    continue;
+                }
+
+                propagateStatistics(quantParams, l);
+            }
+        }
+    };
+
     for (auto &&l : *pLayers) {
         if (!LayerInfo(l).isFakeQuantize()) {
             continue;
@@ -1956,28 +2030,56 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
 
         auto inputRange = fqLayer.getInputRange();
         auto outputRange = fqLayer.getOutputRange();
-        if (inputRange.second.size() != 1 || inputRange.second.size() != 1 ||
-            outputRange.second.size() != 1 || outputRange.second.size() != 1) {
+        if (inputRange.first.size() != 1 || inputRange.second.size() != 1 ||
+            outputRange.first.size() != 1 || outputRange.second.size() != 1) {
             THROW_GNA_LAYER_EXCEPTION(fqLayer) << " unsupported per-channel quantisation";
         }
 
+        if (!LayerInfo(prevLayer).isConst() &&
+            !fp32eq(inputRange.first.front(), outputRange.first.front()) &&
+            !fp32eq(inputRange.second.front(), outputRange.second.front())) {
+            THROW_GNA_LAYER_EXCEPTION(fqLayer) << " unsupported data range conversion. Input: (" <<
+                inputRange.first.front() << "," << inputRange.second.front() << "), output: (" <<
+                outputRange.first.front() << "," << outputRange.second.front() << ")";
+        }
+
         float fqLevels = fqLayer.getLevels();
-        float scaleOutputs = (fqLevels - 1) / (outputRange.second[0] - outputRange.first[0]);
 
         // Before FQ layer is removed, the previous layer has to be updated with its quantization data
         auto quantParamsPrevLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(prevLayer);
-        quantParamsPrevLayer->_dst_quant.SetScale(scaleOutputs);
         quantParamsPrevLayer->_dst_quant.SetLevels(fqLevels);
-        quantParamsPrevLayer->_dst_quant.SetMinValues({ inputRange.first[0] });
-        quantParamsPrevLayer->_dst_quant.SetMaxValues({ inputRange.second[0] });
+        quantParamsPrevLayer->_dst_quant.SetMinValues({ inputRange.first[0] }, true);
+        quantParamsPrevLayer->_dst_quant.SetMaxValues({ inputRange.second[0] }, true);
+        quantParamsPrevLayer->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
+        quantParamsPrevLayer->_dst_quant.SetMaxValues({ outputRange.second[0] }, false);
 
+        auto fqQauntParams = InferenceEngine::getInjectedData<QuantizedLayerParams>(l);
+        fqQauntParams->_dst_quant.SetLevels(fqLevels);
+        fqQauntParams->_dst_quant.SetMinValues({ inputRange.first[0] }, true);
+        fqQauntParams->_dst_quant.SetMaxValues({ inputRange.second[0] }, true);
+        fqQauntParams->_dst_quant.SetMinValues({ outputRange.first[0] }, false);
+        fqQauntParams->_dst_quant.SetMaxValues({ outputRange.second[0] }, false);
+        fqQauntParams->_src_quant = fqQauntParams->_dst_quant;
+
+        l->insData.resize(1);
+        if (!CNNNetHasPrevLayer(prevLayer.get())) {
+            quantParamsPrevLayer->_src_quant = quantParamsPrevLayer->_dst_quant;
+        }
+
+        // Allow FQ Fuse checks if FQ layer can be fused to a layer before or after.
+        // FQ Layer is fused only when previous layer is const or activation layer
+        // or a next layer is activation layer.
+        bool isFQFuseAllowed = allowFQFuse(l);
         auto prevData = prevLayer->outData.front();
-        getInputTo(prevLayer->outData.front()).clear();
 
         // Find all output layers connected to FQ
         auto nextLayers = CNNNetGetAllNextLayersSkipCertain(*fqLayer, -1, donotSkip);
         if (nextLayers.empty()) {
-            THROW_GNA_LAYER_EXCEPTION(fqLayer) << " fake quantize does not have any output layers connected";
+            return;
+        }
+
+        if (isFQFuseAllowed) {
+            getInputTo(prevLayer->outData.front()).clear();
         }
 
         // Connect all next layers after FQ to the layer that is before FQ
@@ -1989,16 +2091,12 @@ void MoveFakeQuantizeLayerIntoQuantParamsPass :: run() {
                     << LAYER_NAME(nextLayers[i]) << " is not correct";
             }
 
-            nextLayers[i]->insData[insDatas.front()] = prevData;
-            getInputTo(prevLayer->outData.front())[nextLayers[i]->name] = nextLayers[i];
+            if (isFQFuseAllowed) {
+                nextLayers[i]->insData[insDatas.front()] = prevData;
+                getInputTo(prevLayer->outData.front())[nextLayers[i]->name] = nextLayers[i];
+            }
 
-            // After layer gets removed lets absorb its params in QuantParams structure
-            // replacing scale factor from this fq layer
-            auto quantParamsNextLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(nextLayers[i]);
-            quantParamsNextLayer->_src_quant.SetScale(scaleOutputs);
-            quantParamsNextLayer->_src_quant.SetLevels(fqLevels);
-            quantParamsNextLayer->_src_quant.SetMinValues({ outputRange.first[0] });
-            quantParamsNextLayer->_src_quant.SetMaxValues({ outputRange.second[0] });
+            propagateStatistics(quantParamsPrevLayer, nextLayers[i]);
         }
     }
 }
@@ -2013,7 +2111,9 @@ int PassManager::run(int index) {
                                         ordered_properties &printed_properties,
                                         ordered_properties &node_properties) {});
 #endif
+#ifdef ENABLE_V7_SERIALIZE
         network.serialize(name + ".xml", name + ".bin");
+#endif
     };
 #else
     auto dumpNetworkAfterPass = [] (std::shared_ptr<Pass> ) {};
diff --git a/inference-engine/src/gna_plugin/runtime/pwl.cpp b/inference-engine/src/gna_plugin/runtime/pwl.cpp
index 4c2a07aa954..8d8528a0b11 100644
--- a/inference-engine/src/gna_plugin/runtime/pwl.cpp
+++ b/inference-engine/src/gna_plugin/runtime/pwl.cpp
@@ -499,22 +499,41 @@ std::vector<pwl_t> pwl_search(const DnnActivation& activation_type,
 void PwlDesignOpt16(const DnnActivation activation_type,
                     std::vector<gna_pwl_segment_t> &ptr_segment,
                     const float scale_in,
-                    const float scale_out) {
+                    const float scale_out,
+                    const float pwlMaxErrorPercent) {
     std::vector<pwl_t> pwl;
     double err_pct = 0.0;
+    auto minInputStats = 0.0f;
+    auto maxInputStats = 0.0f;
+    if (activation_type.srcFQParams.set) {
+        minInputStats = std::min(*activation_type.srcFQParams.input_low, *activation_type.srcFQParams.input_high) * 1.25f;
+        maxInputStats = std::max(*activation_type.srcFQParams.input_low, *activation_type.srcFQParams.input_high) * 1.25f;
+    }
     switch (activation_type) {
-        case kActSigmoid:
-            pwl = pwl_search(activation_type, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
-            make_gna_pwl(activation_type, pwl, -SIGMOID_DOMAIN, SIGMOID_DOMAIN, scale_in, scale_out, ptr_segment);
+        case kActSigmoid: {
+            auto absMax = std::max(std::abs(minInputStats), std::abs(maxInputStats));
+            auto minInput = (activation_type.srcFQParams.set && absMax < SIGMOID_DOMAIN) ? -absMax : -SIGMOID_DOMAIN;
+            auto maxInput = (activation_type.srcFQParams.set && absMax < SIGMOID_DOMAIN) ? absMax : SIGMOID_DOMAIN;
+            pwl = pwl_search(activation_type, minInput, maxInput, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
+            make_gna_pwl(activation_type, pwl, minInput, maxInput, scale_in, scale_out, ptr_segment);
             break;
-        case kActTanh:
-            pwl = pwl_search(activation_type, -TANH_DOMAIN, TANH_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
-            make_gna_pwl(activation_type, pwl, -TANH_DOMAIN, TANH_DOMAIN, scale_in, scale_out, ptr_segment);
+        }
+        case kActTanh: {
+            auto absMax = std::max(std::abs(minInputStats), std::abs(maxInputStats));
+            auto minInput = (activation_type.srcFQParams.set && absMax < TANH_DOMAIN) ? -absMax : -TANH_DOMAIN;
+            auto maxInput = (activation_type.srcFQParams.set && absMax < TANH_DOMAIN) ? absMax : TANH_DOMAIN;
+            pwl = pwl_search(activation_type, minInput, maxInput, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
+            make_gna_pwl(activation_type, pwl, minInput, maxInput, scale_in, scale_out, ptr_segment);
             break;
-        case kActSoftSign:
-            pwl = pwl_search(activation_type, -SOFTSIGN_DOMAIN, SOFTSIGN_DOMAIN, PWL_DESIGN_THRESHOLD, PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
-            make_gna_pwl(activation_type, pwl, -SOFTSIGN_DOMAIN, SOFTSIGN_DOMAIN, scale_in, scale_out, ptr_segment);
+        }
+        case kActSoftSign: {
+            auto absMax = std::max(std::abs(minInputStats), std::abs(maxInputStats));
+            auto minInput = (activation_type.srcFQParams.set && absMax < SOFTSIGN_DOMAIN) ? -absMax : -SOFTSIGN_DOMAIN;
+            auto maxInput = (activation_type.srcFQParams.set && absMax < SOFTSIGN_DOMAIN) ? absMax : SOFTSIGN_DOMAIN;
+            pwl = pwl_search(activation_type, minInput, maxInput, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
+            make_gna_pwl(activation_type, pwl, minInput, maxInput, scale_in, scale_out, ptr_segment);
             break;
+        }
         case kActRelu:
             make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
             break;
@@ -522,6 +541,7 @@ void PwlDesignOpt16(const DnnActivation activation_type,
             make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
             break;
         case kActIdentity:
+        case kActFakeQuantize:
             make_gna_pwl(activation_type, pwl, -1.0, 1.0, scale_in, scale_out, ptr_segment);
             break;
         case kActKaldiLstmClipping:
@@ -530,28 +550,28 @@ void PwlDesignOpt16(const DnnActivation activation_type,
         case kActLog: {
             double x_min = (1 + ~XBASEMASK) / scale_in;
             double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
-            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
             make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment);
             break;
         }
         case kActNegLog: {
             double x_min = (1 + ~XBASEMASK) / scale_in;
             double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
-            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
             make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment);
             break;
         }
         case kActNegHalfLog: {
             double x_min = (1 + ~XBASEMASK) / scale_in;
             double x_max = ((INT32_MAX / scale_in) < LOG_DOMAIN) ? (INT32_MAX / scale_in) : LOG_DOMAIN;
-            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.066*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
             make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment);
             break;
         }
         case kActExp: {
             double x_min = -log(scale_out);
             double x_max = x_min + log(INT16_MAX);
-            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.5*PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+            pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, pwlMaxErrorPercent, PWL_DESIGN_SAMPLES, err_pct);
             make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment);
             break;
         }
@@ -576,7 +596,8 @@ void PwlDesignOpt16(const DnnActivation activation_type,
             x_max = std::min(x_max, POW_DOMAIN);
 
             if (activation_type.args.pow.exponent != 0.0f && activation_type.args.pow.exponent != 1.0f) {
-                pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, 0.015 * PWL_MAX_ERR_PERCENT, PWL_DESIGN_SAMPLES, err_pct);
+                auto maxError = pwlMaxErrorPercent > 0.015f? 0.015f: pwlMaxErrorPercent;
+                pwl = pwl_search(activation_type, x_min, x_max, PWL_DESIGN_THRESHOLD, maxError, PWL_DESIGN_SAMPLES, err_pct);
             }
 
             make_gna_pwl(activation_type, pwl, x_min, x_max, scale_in, scale_out, ptr_segment);
@@ -980,15 +1001,14 @@ void PwlApply32(intel_dnn_component_t *component,
             break;
         case kActKaldiLstmClipping: {
             float upper_limit = component->op.pwl.func_id.args.clamp.high;
-            float lowwer_limit = component->op.pwl.func_id.args.clamp.low;
+            float lower_limit = component->op.pwl.func_id.args.clamp.low;
             for (uint32_t i = num_row_start; i <= num_row_end; i++) {
                 for (uint32_t j = num_col_start; j <= num_col_end; j++) {
                     float val = ptr_in[i * num_columns + j];
-
                     if (val > upper_limit) {
                         ptr_out[i * num_columns + j] = upper_limit;
-                    } else if (val < lowwer_limit) {
-                        ptr_out[i * num_columns + j] = lowwer_limit;
+                    } else if (val < lower_limit) {
+                        ptr_out[i * num_columns + j] = lower_limit;
                     } else {
                         ptr_out[i * num_columns + j] = val;
                     }
@@ -1050,32 +1070,36 @@ void PwlApply32(intel_dnn_component_t *component,
             }
             break;
         case kActFakeQuantize: {
-            auto levels  = transform->func_id.args.fakeQuantize.levels;
+            bool clamping = true;
+            double levels  = transform->func_id.fqParams.levels;
 
             for (uint32_t i = num_row_start; i <= num_row_end; i++) {
-                auto inputChannel  = transform->func_id.args.fakeQuantize.inputPerChannel ? i : 0;
-                auto outputChannel = transform->func_id.args.fakeQuantize.outputPerChannel ? i : 0;
+                auto inputChannel  = transform->func_id.fqParams.inputPerChannel ? i : 0;
+                auto outputChannel = transform->func_id.fqParams.outputPerChannel ? i : 0;
 
-                auto input_low   = transform->func_id.args.fakeQuantize.input_low[inputChannel];
-                auto input_high  = transform->func_id.args.fakeQuantize.input_high[inputChannel];
-                auto output_low  = transform->func_id.args.fakeQuantize.output_low[outputChannel];
-                auto output_high = transform->func_id.args.fakeQuantize.output_high[outputChannel];
+                double input_low   = transform->func_id.fqParams.input_low[inputChannel];
+                double input_high  = transform->func_id.fqParams.input_high[inputChannel];
+                double output_low  = transform->func_id.fqParams.output_low[outputChannel];
+                double output_high = transform->func_id.fqParams.output_high[outputChannel];
 
-                // TODO: this special modification for spedup-compute give different result with straight FQ formulae
-                // but this used in reference graph FakeQuantize implementations so we need to honor it for a while
-                float scaleInput = (input_high - input_low) / (levels-1);
-                float scaleOutputs = (output_high - output_low) / (levels-1);
+                auto scaleInput = (levels - 1) / (input_high - input_low);
+                auto scaleOutput = (levels - 1) / (output_high - output_low);
 
                 for (uint32_t j = num_col_start; j <= num_col_end; j++) {
                     auto offset = i * num_columns + j;
                     auto x = ptr_in[offset];
+                    if (!clamping) {
+                        ptr_out[offset] = ptr_in[offset] * scaleInput / scaleOutput;
+                        continue;
+                    }
 
-                    if (x < std::min(input_low, input_high)) {
+                    if (x <= std::min(input_low, input_high)) {
                         ptr_out[offset] = output_low;
                     } else if (x > std::max(input_low, input_high)) {
                         ptr_out[offset] = output_high;
                     } else {
-                        ptr_out[offset] = nearbyint((x - input_low) / scaleInput) * scaleOutputs + output_low;
+                        ptr_out[offset] = nearbyint((x - input_low) / (input_high - input_low) * (levels - 1)) /
+                            (levels - 1) * (output_high - output_low) + output_low;
                     }
                 }
             }
diff --git a/inference-engine/src/gna_plugin/runtime/pwl.h b/inference-engine/src/gna_plugin/runtime/pwl.h
index 86b3cfb93e7..7e8fbbb5a69 100644
--- a/inference-engine/src/gna_plugin/runtime/pwl.h
+++ b/inference-engine/src/gna_plugin/runtime/pwl.h
@@ -103,4 +103,5 @@ void PwlDesign16(const DnnActivation activation_type,
 void PwlDesignOpt16(const DnnActivation activation_type,
                 std::vector<gna_pwl_segment_t> &ptr_segment,
                 const float scale_in,
-                const float scale_out);
+                const float scale_out,
+                const float pwlMaxErrorPercent);
diff --git a/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
new file mode 100644
index 00000000000..e0e34949cf7
--- /dev/null
+++ b/inference-engine/tests/functional/plugin/gna/pass_tests/fq_activation.cpp
@@ -0,0 +1,134 @@
+// Copyright (C) 2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <vector>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <string>
+
+#include <ie_core.hpp>
+
+#include "common_test_utils/common_utils.hpp"
+#include "functional_test_utils/plugin_cache.hpp"
+#include "shared_test_classes/base/layer_test_utils.hpp"
+#include "functional_test_utils/blob_utils.hpp"
+#include "ngraph_functions/utils/ngraph_helpers.hpp"
+#include "ngraph_functions/builders.hpp"
+
+#include "ngraph_functions/pass/convert_prc.hpp"
+
+typedef std::tuple<
+    InferenceEngine::Precision,         // Network Precision
+    std::string,                        // Target Device
+    std::map<std::string, std::string>, // Configuration
+    std::vector<size_t>,                // Input Shape
+    std::pair<float, float>,            // Input Min and Max
+    size_t                              // Levels
+> fqActivationParams;
+
+namespace LayerTestsDefinitions {
+
+class FQActivation : public testing::WithParamInterface<fqActivationParams>,
+    public LayerTestsUtils::LayerTestsCommon {
+    float inputDataMin = 0.0f;
+    float inputDataMax = 0.0f;
+    float inputDataResolution = 1.0f;
+
+public:
+    static std::string getTestCaseName(testing::TestParamInfo<fqActivationParams> obj) {
+        InferenceEngine::Precision netPrecision;
+        std::string targetDevice;
+        std::map<std::string, std::string> configuration;
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = obj.param;
+
+        std::ostringstream result;
+        result << "netPRC=" << netPrecision.name() << "_";
+        result << "targetDevice=" << targetDevice << "_";
+        for (auto const& configItem : configuration) {
+            result << "_configItem=" << configItem.first << "_" << configItem.second;
+        }
+        result << "_inputShape=" << CommonTestUtils::vec2str(inputShape);
+        result << "_inputMinMax=(" << inputMinMax.first << ".." << inputMinMax.second << ")";
+        result << "_levels=" << levels;
+
+        return result.str();
+    }
+
+    InferenceEngine::Blob::Ptr GenerateInput(const InferenceEngine::InputInfo& info) const {
+        return FuncTestUtils::createAndFillBlob(info.getTensorDesc(), inputDataMax - inputDataMin, inputDataMin, 1 / inputDataResolution);
+    }
+
+protected:
+    void SetUp() override {
+        InferenceEngine::Precision netPrecision;
+
+        std::vector<size_t> inputShape;
+        std::pair<float, float> inputMinMax;
+        size_t levels = 0;
+        std::tie(netPrecision, targetDevice, configuration, inputShape, inputMinMax, levels) = this->GetParam();
+        auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
+
+        auto inputLowNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.first });
+        auto inputHighNode = ngraph::builder::makeConstant<float>(ngPrc, { 1 }, { inputMinMax.second });
+
+        auto inputVector = ngraph::builder::makeParams(ngPrc, { inputShape });
+        auto inputFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(inputVector[0],
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+
+        auto relu = ngraph::builder::makeActivation(inputFQNode, ngraph::element::f32, ngraph::helpers::ActivationTypes::Relu);
+        auto reluFQNode = std::make_shared<ngraph::opset1::FakeQuantize>(relu,
+            inputLowNode, inputHighNode, inputLowNode, inputHighNode, levels);
+
+        ngraph::ResultVector results{ std::make_shared<ngraph::opset1::Result>(reluFQNode) };
+        function = std::make_shared<ngraph::Function>(results, inputVector, "FQActivation");
+    }
+};
+
+
+TEST_P(FQActivation, CompareWithRefImpl) {
+    Run();
+};
+
+const std::vector<InferenceEngine::Precision> netPrecisions = {
+    InferenceEngine::Precision::FP32,
+    InferenceEngine::Precision::FP16
+};
+
+const std::vector<std::map<std::string, std::string>> configs = {
+    {
+        {"GNA_DEVICE_MODE", "GNA_SW_EXACT"},
+    }
+};
+
+const std::vector<std::vector<size_t>> inputShape = {
+    {1, 1024},
+};
+
+const std::vector<std::pair<float, float>> inputMinMax = {
+    {-0.5, 0.5},
+    {-2, 2},
+    {-8, 8},
+    {-16, 16},
+    {-50, 50},
+    {-100, 100},
+};
+
+const std::vector<size_t> levels = {
+    65535,
+};
+
+INSTANTIATE_TEST_CASE_P(smoke_fq_activation, FQActivation,
+    ::testing::Combine(
+        ::testing::ValuesIn(netPrecisions),
+        ::testing::Values(CommonTestUtils::DEVICE_GNA),
+        ::testing::ValuesIn(configs),
+        ::testing::ValuesIn(inputShape),
+        ::testing::ValuesIn(inputMinMax),
+        ::testing::ValuesIn(levels)),
+    FQActivation::getTestCaseName);
+} // namespace LayerTestsDefinitions
diff --git a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
index 394504414b3..7b9337a10fb 100644
--- a/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
+++ b/inference-engine/tests/unit/gna/gna_plugin_config_test.cpp
@@ -20,6 +20,7 @@ const std::map<std::string, std::string>  supportedConfigKeysWithDefaults = {
     {CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(NO)},
     {GNA_CONFIG_KEY(PRECISION), Precision(Precision::I16).name()},
     {GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), CONFIG_VALUE(NO)},
+    {GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT), "1.000000"},
     {CONFIG_KEY(PERF_COUNT), CONFIG_VALUE(NO)},
     {GNA_CONFIG_KEY(LIB_N_THREADS), "1"},
     {CONFIG_KEY(SINGLE_THREAD), CONFIG_VALUE(YES)}
@@ -153,6 +154,17 @@ TEST_F(GNAPluginConfigTest, GnaConfigPwlUniformDesignTest) {
                     config.gnaFlags.uniformPwlDesign);
 }
 
+TEST_F(GNAPluginConfigTest, GnaConfigPwlMaxErrorPercentTest) {
+    SetAndCompare(GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT), std::string("0.100000"));
+    EXPECT_FLOAT_EQ(config.gnaFlags.pwlMaxErrorPercent, 0.1f);
+    SetAndCompare(GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT), std::string("1.000000"));
+    EXPECT_FLOAT_EQ(config.gnaFlags.pwlMaxErrorPercent, 1);
+    SetAndCompare(GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT), std::string("5.000000"));
+    EXPECT_FLOAT_EQ(config.gnaFlags.pwlMaxErrorPercent, 5);
+    ExpectThrow(GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT), "-1");
+    ExpectThrow(GNA_CONFIG_KEY(PWL_MAX_ERROR_PERCENT), "100.1");
+}
+
 TEST_F(GNAPluginConfigTest, GnaConfigPerfCountTest) {
     SetAndCheckFlag(CONFIG_KEY(PERF_COUNT),
                     config.gnaFlags.performance_counting);