publish master branch snapshot, revision 59af1853ca21ea08acf17b177da0b239753deb46

2020-05-18 17:21:58 +03:00
parent 0a5a63bc0c
commit ba0a339888
223 changed files with 4329 additions and 1326 deletions
--- a/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests_deprecated/behavior/gna/shared_tests_instances/skip_tests_config.cpp
@@ -30,5 +30,7 @@ std::vector<std::string> disabledTestPatterns() {
            ".*BehaviorPluginTestInferRequest\\.returnDeviceBusyOnStartInferAfterAsyncInfer.*",
            ".*BehaviorPluginTestInferRequest\\.returnDeviceBusyOnGetUserDataAfterAsyncInfer.*",
            ".*BehaviorPluginTestInferRequest\\.returnDeviceBusyOnSetUserDataAfterAsyncInfer.*",
+            // TODO: FIX BUG 31661
+            ".*BehaviorPluginTestInferRequest\\.canStartSeveralAsyncInsideCompletionCallbackNoSafeDtorWithoutWait.*",
    };
 }
--- a/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request_callback.hpp
+++ b/inference-engine/tests_deprecated/behavior/shared_tests/plugin_tests/behavior_test_plugin_infer_request_callback.hpp
@@ -5,6 +5,7 @@
 #include "behavior_test_plugin.h"
 #include <mutex>
 #include <condition_variable>
+#include <atomic>

 using namespace std;
 using namespace ::testing;
@@ -117,21 +118,23 @@ TEST_P(BehaviorPluginTestInferRequestCallback, canStartAsyncInsideCompletionCall

 // test that can wait all callbacks on dtor
 TEST_P(BehaviorPluginTestInferRequestCallback, canStartSeveralAsyncInsideCompletionCallbackWithSafeDtor) {
-    TestEnv::Ptr testEnv;
-    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
    const int NUM_ITER = 10;
    struct TestUserData {
        int numIter = NUM_ITER;
        bool startAsyncOK = true;
        bool getDataOK = true;
-        int numIsCalled = 0;
+        std::atomic<int> numIsCalled{0};
        std::mutex mutex_block_emulation;
        std::condition_variable cv_block_emulation;
        bool isBlocked = true;
        string device;
    };
    TestUserData data;
+
+    TestEnv::Ptr testEnv;
+    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
    data.device = GetParam().device;
+
    testEnv->inferRequest->SetUserData(&data, nullptr);
    testEnv->inferRequest->SetCompletionCallback(
            [](InferenceEngine::IInferRequest::Ptr request, StatusCode status) {
@@ -153,7 +156,7 @@ TEST_P(BehaviorPluginTestInferRequestCallback, canStartSeveralAsyncInsideComplet
                userData->numIsCalled++;
                if (!userData->numIter) {
                    userData->isBlocked = false;
-                    userData->cv_block_emulation.notify_all();
+                    userData->cv_block_emulation.notify_one();
                }
            });

@@ -173,21 +176,22 @@ TEST_P(BehaviorPluginTestInferRequestCallback, canStartSeveralAsyncInsideComplet
 }

 // test that can wait all callbacks on dtor
-// FIXME: CVS-8956, dll is unloaded before finishing infer request
-TEST_P(BehaviorPluginTestInferRequestCallback, DISABLED_canStartSeveralAsyncInsideCompletionCallbackNoSafeDtor) {
-    TestEnv::Ptr testEnv;
-    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
+TEST_P(BehaviorPluginTestInferRequestCallback, canStartSeveralAsyncInsideCompletionCallbackNoSafeDtor) {
    const int NUM_ITER = 10;
    struct TestUserData {
        int numIter = NUM_ITER;
        bool startAsyncOK = true;
        bool getDataOK = true;
-        int numIsCalled = 0;
+        std::atomic<int> numIsCalled{0};
        std::mutex mutex_block_emulation;
        std::condition_variable cv_block_emulation;
        bool isBlocked = true;
    };
    TestUserData data;
+
+    TestEnv::Ptr testEnv;
+    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
+
    testEnv->inferRequest->SetUserData(&data, nullptr);
    testEnv->inferRequest->SetCompletionCallback(
            [](InferenceEngine::IInferRequest::Ptr request, StatusCode status) {
@@ -207,7 +211,7 @@ TEST_P(BehaviorPluginTestInferRequestCallback, DISABLED_canStartSeveralAsyncInsi
                userData->numIsCalled++;
                if (!userData->numIter) {
                    userData->isBlocked = false;
-                    userData->cv_block_emulation.notify_all();
+                    userData->cv_block_emulation.notify_one();
                }
            });

@@ -228,21 +232,23 @@ TEST_P(BehaviorPluginTestInferRequestCallback, DISABLED_canStartSeveralAsyncInsi
 }

 // test that can wait all callbacks on dtor
-// FIXME: CVS-8956, dll is unloaded before finishing infer request
-TEST_P(BehaviorPluginTestInferRequest, DISABLED_canStartSeveralAsyncInsideCompletionCallbackNoSafeDtorWithoutWait) {
-    TestEnv::Ptr testEnv;
-    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
+TEST_P(BehaviorPluginTestInferRequest, canStartSeveralAsyncInsideCompletionCallbackNoSafeDtorWithoutWait) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED();
    const int NUM_ITER = 1;
    struct TestUserData {
        int numIter = NUM_ITER;
        bool startAsyncOK = true;
        bool getDataOK = true;
-        int numIsCalled = 0;
+        std::atomic<int> numIsCalled{0};
        std::mutex mutex_block_emulation;
        std::condition_variable cv_block_emulation;
        bool isBlocked = true;
    };
    TestUserData data;
+
+    TestEnv::Ptr testEnv;
+    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
+
    testEnv->inferRequest->SetUserData(&data, nullptr);
    testEnv->inferRequest->SetCompletionCallback(
            [](InferenceEngine::IInferRequest::Ptr request, StatusCode status) {
@@ -262,11 +268,12 @@ TEST_P(BehaviorPluginTestInferRequest, DISABLED_canStartSeveralAsyncInsideComple
                userData->numIsCalled++;
                if (!userData->numIter) {
                    userData->isBlocked = false;
-                    userData->cv_block_emulation.notify_all();
+                    userData->cv_block_emulation.notify_one();
                }
            });

    sts = testEnv->inferRequest->StartAsync(nullptr);
+    auto cppRequest = testEnv->actualInferRequest; // need a request / plugin instance at least
    testEnv->inferRequest = nullptr;
    testEnv = nullptr;

@@ -336,8 +343,7 @@ TEST_P(BehaviorPluginTestInferRequestCallback, DISABLED_canStartSeveralAsyncInsi
    ASSERT_TRUE(data.waitOK);
 }

-// TODO: no, this is not correct test. callback throw exception and plugin shouldn't fail? user have to process this by himself.
-TEST_P(BehaviorPluginTestInferRequestCallback, DISABLED_returnGeneralErrorIfCallbackThrowException) {
+TEST_P(BehaviorPluginTestInferRequestCallback, returnGeneralErrorIfCallbackThrowException) {
    TestEnv::Ptr testEnv;
    ASSERT_NO_FATAL_FAILURE(_createAndCheckInferRequest(GetParam(), testEnv));
    testEnv->inferRequest->SetCompletionCallback(
@@ -377,6 +383,7 @@ TEST_P(BehaviorPluginTestInferRequestCallback, inferDoesNotCallCompletionCallbac
    ASSERT_FALSE(data.isCalled);
 }

+// ilavreno: test is invalid
 // TODO: develop test that request not released until request is done itself? (to check wait in dtor?)
 TEST_P(BehaviorPluginTestInferRequestCallback, DISABLED_requestNotReleasedUntilCallbackAreDone) {
    TestEnv::Ptr testEnv;
--- a/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/mkldnn/shared_tests_instance/transformations/low_precision_transformer_single_layer_tests.cpp
@@ -416,7 +416,55 @@ INSTANTIATE_TEST_CASE_P(
            "CPU",
            SingleLayerTestModel::Ptr(new ConcatTestModel(false, true, false, { 100, 1 })),
            { { 100, 1 }, { 100, 1 } },
-            { { 100, 2 } })
+            { { 100, 2 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, false, false, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, true, false, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, false, false, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, true, false, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, false, true, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(false, true, true, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, false, true, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } }),
+
+        SingleLayerTransformationsTestParams(
+            "CPU",
+            SingleLayerTestModel::Ptr(new ConcatWithPoolingTestModel(true, true, true, 2.0)),
+            { { 1, 3, 299, 299 }, { 1, 3, 299, 299 } },
+            { { 1, 6, 299, 299 } })
    ),
    SingleLayerTransformationsTestParams::getLowPrecisionTransformerSingleLayerTestName);

--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/common/validation.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/common/validation.cpp
@@ -4,6 +4,7 @@

 #include "validation.hpp"

+#include <algorithm>
 #include <gtest/gtest.h>
 #include <string>
 #include <vector>
@@ -266,9 +267,15 @@ void LowPrecisionTransformationValidation::validatePrecision(
        } else if ((layer->type == "Concat") || (layer->type == "Pooling")) {
            for (const DataPtr data : layer->outData) {
                if (params.updatePrecisions && (!CNNNetworkHelper::onWeights(*layer))) {
-                    ASSERT_TRUE((data->getPrecision() == Precision::U8) || (data->getPrecision() == Precision::I8)) <<
-                        layer->type << " layer, name '" <<
-                        layer->name << "' out data has unexpected precision " << data->getPrecision();
+                    const std::vector<CNNLayerPtr> parents = CNNNetworkHelper::getParentsRecursivelyExceptTypes(*layer, { "Pooling" });
+                    if (std::all_of(
+                        parents.begin(),
+                        parents.end(),
+                        [](const CNNLayerPtr parent) { return (parent->type != "FakeQuantize") || QuantizationDetails::outputLayoutIsSupported(*parent); })) {
+                        ASSERT_TRUE((data->getPrecision() == Precision::U8) || (data->getPrecision() == Precision::I8)) <<
+                            layer->type << " layer, name '" <<
+                            layer->name << "' out data has unexpected precision " << data->getPrecision();
+                    }
                }
                // ASSERT_EQ(params.updatePrecisions ? Precision::U8 : Precision::FP32, data->getPrecision()) << " " << layer->type << " out data has unexpected precision " << data->getPrecision();
            }
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/concat_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/concat_test.cpp
@@ -75,6 +75,9 @@ std::string ConcatTestModel::getName() const {
 }

 bool ConcatTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
+    // TODO: remove when updatePrecisions is configurable
+    params.updatePrecisions = true;
+
    LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);

    if (!multiChannel) {
@@ -163,5 +166,5 @@ float ConcatTestModel::getThreshold(const std::string& device_name, const Precis
        }
    }

-    return precision == Precision::FP16 ? 0.0005f : 0.0003f;
+    return SingleLayerTestModel::getThreshold(device_name, precision, params);
 }
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/concat_with_pooling_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/concat_with_pooling_test.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2018-2020 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "low_precision_transformer_single_layer_tests.hpp"
+#include "low_precision_transformations/concat.hpp"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+std::string ConcatWithPoolingTestModel::getModel(SingleLayerTransformationsTestParams& p) const {
+    size_t type_size = sizeof(PrecisionTrait<Precision::FP32>::value_type);
+    if (p._network_precision == "FP16")
+        type_size = sizeof(PrecisionTrait<Precision::FP16>::value_type);
+
+    std::map<std::string, std::string> constParams = {};
+    std::map<std::string, std::string> fakeQuantizeParams = { {"levels", "256"} };
+    std::map<std::string, std::string> concatParams = { {"axis", "1"} };
+    std::map<std::string, std::string> powerParams = { {"power", "1"}, {"scale", "1"}, {"shift", "0"} };
+    std::map<std::string, std::string> poolingParams = {
+        {"kernel", "1,1"},
+        {"pool-method", "max"},
+        {"exclude-pad", "false"}
+    };
+
+    CommonTestUtils::conv_common_params convolutionParams = { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "valid", 1, 3, false, false };
+    std::vector<size_t> weightsConstInputDims = { 3lu, 3lu, 1lu, 1lu };
+    std::vector<size_t> biasesConvolutionConstDims = { convolutionParams.out_c };
+
+    std::vector<std::pair<std::string, std::string>> edges = {
+        {"0,0", "11,17"}, {"1,2", "6,7"}, // Inputs
+        {"2,3", "6,8"}, {"3,4", "6,9"}, {"4,5", "6,10"}, {"5,6", "6,11"}, // Const layers
+        {"7,13", "11,18"}, {"8,14", "11,19"}, {"9,15", "11,20"}, {"10,16", "11,21"}, // Const layers
+        {"6,12", "17,33"}, {"11,22", "12,23"}, // Pooling12
+        {"12,24", "15,27"}, // Pooling12 -> Convolution15
+        {"13,25", "15,28"}, // Const13 -> Convolution15
+        {"14,26", "15,29"}, // Const14 -> Convolution15
+        {"15,30", "1,1"}, // Convolution15 -> Power
+        {"12,24", "16,31"}, // Pooling12 -> Pooling16
+        {"16,32", "17,34"}  // Pooling16 -> FakeQuantize20
+    };
+
+    auto modelBuilder = CommonTestUtils::DefaultNetBuilder::buildNetworkWithOneInput("ConcatWithPoolingTestModel", p.inputDimensions[0], p._network_precision)
+        // 1
+        //.addInputLayer(p._network_precision, p.inputDimensions[1])
+        .addLayer("Power", p._network_precision, &powerParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
+        // 2
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 3
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 4
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 5
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 6
+        .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[0], {1}, {1}, {1}, {1}}, {{p.inputDimensions[0]}} })
+        // 7
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 8
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 9
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 10
+        .addLayer("Const", p._network_precision, &constParams, { {}, {{1}} }, type_size, 0)
+        // 11
+        .addLayer("FakeQuantize", p._network_precision, &fakeQuantizeParams, { {p.inputDimensions[1], {1}, {1}, {1}, {1}}, {{p.inputDimensions[1]}} })
+        // 12
+        .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
+        // 13
+        .addLayer("Const", p._network_precision, &constParams, { {}, {weightsConstInputDims} },
+            std::accumulate(weightsConstInputDims.begin(), weightsConstInputDims.end(), 1lu, std::multiplies<size_t>()) * type_size)
+        // 14
+        .addLayer("Const", p._network_precision, &constParams, { {}, {biasesConvolutionConstDims} }, type_size * convolutionParams.out_c, 0)
+        // 15
+        .convolutionLayer(p._network_precision, { {p.inputDimensions[0], weightsConstInputDims, biasesConvolutionConstDims }, {p.inputDimensions[0]} }, convolutionParams)
+        // 16
+        .addLayer("Pooling", p._network_precision, &poolingParams, { {p.inputDimensions[1]}, {p.inputDimensions[1]} })
+        // 17
+        .addLayer("Concat", p._network_precision, &concatParams, { {p.inputDimensions[0], p.inputDimensions[1]}, {{p.outputDimensions[0]}} }, 0, 0);
+
+    auto modelString = modelBuilder.finish(&edges);
+    return modelString;
+}
+
+std::string ConcatWithPoolingTestModel::getName() const {
+    return std::string("ConcatWithPoolingTestModel") +
+        (multiChannel ? "_multiChannel" : "_oneChannel") +
+        (signedIntervals ? "_signedInterval" : "_notSignedInterval") +
+        (shift ? "_withShift" : "") +
+        "_" + std::to_string(dequantizationIntervalsDifference);
+}
+
+bool ConcatWithPoolingTestModel::transform(CNNNetwork& network, LayerTransformation::Params& params) const {
+    // TODO: remove when updatePrecisions is configurable
+    params.updatePrecisions = true;
+
+    LowPrecisionTransformations transformations = getLowPrecisionTransformations(params);
+    if (!multiChannel) {
+        // avoid ConcatMultiChannelsTransformation
+        transformations = transformations.
+            removeBranchSpecificTransformations("Concat").
+            addBranchSpecific<ConcatTransformation>(params, "Concat");
+    }
+
+    LowPrecisionTransformer transformer(transformations);
+    transformer.transform(network);
+
+    const std::string intermediateDequantizationLayerName = "Pooling12_ScaleShift_Convolution15";
+    const CNNLayerPtr intermediateDequantizationLayer = CNNNetworkHelper::getLayer(network, intermediateDequantizationLayerName);
+    if (intermediateDequantizationLayer == nullptr) {
+        THROW_IE_EXCEPTION << "DequantizationLayer '" << intermediateDequantizationLayerName << "' was not found";
+    }
+
+    return true;
+}
+
+void ConcatWithPoolingTestModel::resetTransformation(CNNNetwork& network) const {
+    const float low = signedIntervals ? -128 : 0.f;
+    const float high = signedIntervals ? 127 : 255.f;
+
+    const float coefficient1 = 10.f;
+    const float coefficient2 = coefficient1 * dequantizationIntervalsDifference;
+    const float shift1 = shift ? (low / coefficient1) / 3 : 0.f;
+    const float shift2 = shift ? (low / coefficient1) / 3 : 0.f;
+
+    fillData(getLayer(network, "Const2"), low / coefficient1 + shift1, "custom");
+    fillData(getLayer(network, "Const3"), high / coefficient1, "custom");
+    fillData(getLayer(network, "Const4"), low / coefficient1 + shift1, "custom");
+    fillData(getLayer(network, "Const5"), high / coefficient1, "custom");
+
+    fillData(getLayer(network, "Const7"), low / coefficient2 + shift2, "custom");
+    fillData(getLayer(network, "Const8"), high / coefficient2, "custom");
+    fillData(getLayer(network, "Const9"), low / coefficient2 + shift2, "custom");
+    fillData(getLayer(network, "Const10"), high / coefficient2, "custom");
+
+    fillData(getLayer(network, "Const13"), 3.f, "custom");
+    fillData(getLayer(network, "Const14"), 2.f, "custom");
+}
+
+float ConcatWithPoolingTestModel::getThreshold(
+    const std::string& pluginName,
+    const Precision precision,
+    LayerTransformation::Params& params) const {
+    if (params.quantizeOutputs && signedIntervals && shift && (dequantizationIntervalsDifference != 0.f)) {
+        return 0.0153;
+    }
+
+    return SingleLayerTestModel::getThreshold(pluginName, precision, params);
+}
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/eltwise_with_pooling_test.cpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/eltwise_with_pooling_test.cpp
@@ -112,63 +112,65 @@ bool EltwiseWithPoolingTestModel::transform(CNNNetwork& network, LayerTransforma
    LowPrecisionTransformer transformer(transformations);
    transformer.transform(network);

-    if (params.quantizeOutputs && params.updatePrecisions) {
-        // INT8 way
-        const CNNLayerPtr fakeQuantize11 = getLayer(network, "FakeQuantize11");
-        if ((fakeQuantize11->outData[0]->getPrecision() != Precision::U8) && (fakeQuantize11->outData[0]->getPrecision() != Precision::I8)) {
-            THROW_IE_EXCEPTION <<
-                "layer " << fakeQuantize11->type << " " << fakeQuantize11->name <<
-                " was not quantized " << fakeQuantize11->outData[0]->getPrecision();
-        }
-
-        const CNNLayerPtr pooling12 = getLayer(network, "Pooling16");
-        if ((pooling12->outData[0]->getPrecision() != Precision::U8) && (pooling12->outData[0]->getPrecision() != Precision::I8)) {
-            THROW_IE_EXCEPTION <<
-                "layer " << pooling12->type << " " << pooling12->name <<
-                " was not quantized " << pooling12->outData[0]->getPrecision();
-        }
-
-        const CNNLayerPtr pooling16 = getLayer(network, "Pooling16");
-        if ((pooling16->outData[0]->getPrecision() != Precision::U8) && (pooling16->outData[0]->getPrecision() != Precision::I8)) {
-            THROW_IE_EXCEPTION <<
-                "layer " << pooling16->type << " " << pooling16->name <<
-                " was not quantized " << pooling16->outData[0]->getPrecision();
-        }
-
-        if (operation == "sum") {
-            const CNNLayerPtr eltwise = getLayer(network, "Eltwise17_original");
-            if (eltwise->type != "Eltwise") {
-                THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
+    if (params.quantizeOutputs) {
+        if (params.updatePrecisions) {
+            // INT8 way
+            const CNNLayerPtr fakeQuantize11 = getLayer(network, "FakeQuantize11");
+            if ((fakeQuantize11->outData[0]->getPrecision() != Precision::U8) && (fakeQuantize11->outData[0]->getPrecision() != Precision::I8)) {
+                THROW_IE_EXCEPTION <<
+                    "layer " << fakeQuantize11->type << " " << fakeQuantize11->name <<
+                    " was not quantized " << fakeQuantize11->outData[0]->getPrecision();
            }

-            if ((eltwise->outData[0]->getPrecision() != Precision::FP32) && (eltwise->outData[0]->getPrecision() != Precision::FP16)) {
-                THROW_IE_EXCEPTION << "layer " << eltwise->type << " " << eltwise->name << " output port precision is not correct";
+            const CNNLayerPtr pooling12 = getLayer(network, "Pooling16");
+            if ((pooling12->outData[0]->getPrecision() != Precision::U8) && (pooling12->outData[0]->getPrecision() != Precision::I8)) {
+                THROW_IE_EXCEPTION <<
+                    "layer " << pooling12->type << " " << pooling12->name <<
+                    " was not quantized " << pooling12->outData[0]->getPrecision();
            }

-            const CNNLayerPtr dequantizationScaleShift = getLayer(network, "Eltwise17");
-            if (dequantizationScaleShift == nullptr) {
-                THROW_IE_EXCEPTION << "dequantization layer was not found";
+            const CNNLayerPtr pooling16 = getLayer(network, "Pooling16");
+            if ((pooling16->outData[0]->getPrecision() != Precision::U8) && (pooling16->outData[0]->getPrecision() != Precision::I8)) {
+                THROW_IE_EXCEPTION <<
+                    "layer " << pooling16->type << " " << pooling16->name <<
+                    " was not quantized " << pooling16->outData[0]->getPrecision();
            }

-            Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(dequantizationScaleShift, "biases");
-            const auto shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
-            const size_t shiftsBlobSize = shiftsBlob->size();
-            for (size_t i = 0; i < shiftsBlobSize; ++i) {
-                if (shiftsBuffer.get()[i] != 0.f) {
-                    THROW_IE_EXCEPTION << "unexpected shift value " << shiftsBuffer.get()[i] << " for dequantization layer";
+            if (operation == "sum") {
+                const CNNLayerPtr eltwise = getLayer(network, "Eltwise17_original");
+                if (eltwise->type != "Eltwise") {
+                    THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
                }
-            }
-        } else if ((operation == "mul") || (operation == "prod")) {
-            const CNNLayerPtr eltwise = getLayer(network, "Eltwise17");
-            if (eltwise->type != "Eltwise") {
-                THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
-            }

-            const CNNLayerPtr dequantizationScaleShift = getLayer(network, "Eltwise17_original");
-            if (dequantizationScaleShift != nullptr) {
-                THROW_IE_EXCEPTION
-                    << "dequantization layer " << dequantizationScaleShift->type << " " << dequantizationScaleShift->name
-                    << " has to be absent (moved to full path branch)";
+                if ((eltwise->outData[0]->getPrecision() != Precision::FP32) && (eltwise->outData[0]->getPrecision() != Precision::FP16)) {
+                    THROW_IE_EXCEPTION << "layer " << eltwise->type << " " << eltwise->name << " output port precision is not correct";
+                }
+
+                const CNNLayerPtr dequantizationScaleShift = getLayer(network, "Eltwise17");
+                if (dequantizationScaleShift == nullptr) {
+                    THROW_IE_EXCEPTION << "dequantization layer was not found";
+                }
+
+                Blob::Ptr shiftsBlob = CNNNetworkHelper::getBlob(dequantizationScaleShift, "biases");
+                const auto shiftsBuffer = CNNNetworkHelper::getFloatData(shiftsBlob);
+                const size_t shiftsBlobSize = shiftsBlob->size();
+                for (size_t i = 0; i < shiftsBlobSize; ++i) {
+                    if (shiftsBuffer.get()[i] != 0.f) {
+                        THROW_IE_EXCEPTION << "unexpected shift value " << shiftsBuffer.get()[i] << " for dequantization layer";
+                    }
+                }
+            } else if ((operation == "mul") || (operation == "prod")) {
+                const CNNLayerPtr eltwise = getLayer(network, "Eltwise17");
+                if (eltwise->type != "Eltwise") {
+                    THROW_IE_EXCEPTION << "layer type " << eltwise->type << " " << eltwise->name << " is not correct";
+                }
+
+                const CNNLayerPtr dequantizationScaleShift = getLayer(network, "Eltwise17_original");
+                if (dequantizationScaleShift != nullptr) {
+                    THROW_IE_EXCEPTION
+                        << "dequantization layer " << dequantizationScaleShift->type << " " << dequantizationScaleShift->name
+                        << " has to be absent (moved to full path branch)";
+                }
            }
        }
    } else {
--- a/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
+++ b/inference-engine/tests_deprecated/functional/shared_tests/transformations/low_precision_transformer_single_layer_tests.hpp
@@ -1680,6 +1680,32 @@ private:
    const std::vector<size_t> constInputDimentions;
 };

+class ConcatWithPoolingTestModel : public SingleLayerTestModel {
+public:
+    ConcatWithPoolingTestModel(
+        const bool multiChannel,
+        const bool signedIntervals,
+        const bool shift,
+        const float dequantizationIntervalsDifference) :
+        SingleLayerTestModel(),
+        multiChannel(multiChannel),
+        signedIntervals(signedIntervals),
+        shift(shift),
+        dequantizationIntervalsDifference(dequantizationIntervalsDifference) {}
+
+    std::string getModel(SingleLayerTransformationsTestParams& p) const override;
+    std::string getName() const override;
+    bool transform(CNNNetwork& network, LayerTransformation::Params& params) const override;
+    void resetTransformation(CNNNetwork& network) const override;
+    float getThreshold(const std::string& pluginName, const Precision precision, LayerTransformation::Params& params) const override;
+
+private:
+    const bool multiChannel;
+    const bool signedIntervals;
+    const bool shift;
+    const float dequantizationIntervalsDifference;
+};
+
 class ConcatMultiChannelTestModel : public SingleLayerTestModel {
 public:
    std::string getModel(SingleLayerTransformationsTestParams& p) const override;
--- a/inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/gna/gna_api_stub.cpp
@@ -83,6 +83,10 @@ GNA2_API enum Gna2Status Gna2ModelRelease(
    return Gna2StatusSuccess;
 }

+GNA2_API enum Gna2Status Gna2ModelGetLastError(struct Gna2ModelError* error) {
+    return Gna2StatusSuccess;
+}
+
 GNA2_API enum Gna2Status Gna2RequestConfigCreate(
    uint32_t modelId,
    uint32_t * requestConfigId) {
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/dumper_test.cpp
@@ -66,7 +66,9 @@ TEST(MKLDNNLayersTests, DumpSimpleGraph) {
    auto net = NetGen().net();
    MKLDNNGraph graph;
    MKLDNNExtensionManager::Ptr extMgr;
-    graph.CreateGraph(net, extMgr);
+    MKLDNNWeightsSharing::Ptr cache;
+
+    graph.CreateGraph(net, extMgr, cache);

    auto dump_net = dump_graph_as_ie_net(graph);
    auto layers = details::CNNNetSortTopologically(*dump_net);
@@ -82,7 +84,8 @@ TEST(MKLDNNLayersTests, DumpSimpleGraphToDot) {
    auto net = NetGen().net();
    MKLDNNGraph graph;
    MKLDNNExtensionManager::Ptr extMgr;
-    graph.CreateGraph(net, extMgr);
+    MKLDNNWeightsSharing::Ptr cache;
+    graph.CreateGraph(net, extMgr, cache);

    std::stringstream buff;
    dump_graph_as_dot(graph, buff);
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
@@ -492,7 +492,8 @@ TEST_F(MKLDNNGraphGenericTests, canGetPrimitiveDescriptorsList) {
    layerPtr->outData.push_back(dataPtr);

    mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));
-    node.reset(MKLDNNPlugin::MKLDNNNode::CreateNode(layerPtr, eng, extMgr));
+    MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
+    node.reset(MKLDNNPlugin::MKLDNNNode::CreateNode(layerPtr, eng, extMgr, cache));
    ASSERT_EQ(MKLDNNPlugin::Type::Generic, node->getType());

    ASSERT_NO_THROW(node->getSupportedDescriptors());
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
@@ -19,8 +19,6 @@ using namespace mkldnn;

 class MKLDNNTestExecNetwork: public MKLDNNPlugin::MKLDNNExecNetwork {
 public:
-    MKLDNNTestExecNetwork(InferenceEngine::ICNNNetwork &network, const MKLDNNPlugin::Config &cfg)
-            : MKLDNNExecNetwork(network, cfg, {}) {}
    MKLDNNPlugin::MKLDNNGraph& getGraph() {
        return *(_graphs.begin()->get());
    }
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
@@ -29,7 +29,8 @@ TEST_F(MKLDNNGraphReorderTests, cannotCreatePrimitiveDescriprorsWithoutOtherLaye
    mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0));

    InferenceEngine::CNNLayerPtr layer(new InferenceEngine::CNNLayer({"TestReorder", "Reorder", InferenceEngine::Precision::FP32}));
-    node.reset(MKLDNNPlugin::MKLDNNNode::CreateNode(layer, eng, {}));
+    MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
+    node.reset(MKLDNNPlugin::MKLDNNNode::CreateNode(layer, eng, {}, cache));
    ASSERT_EQ(MKLDNNPlugin::Type::Reorder, node->getType());

    ASSERT_THROW(node->getSupportedDescriptors(), InferenceEngine::details::InferenceEngineException);
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
@@ -18,7 +18,10 @@ using namespace ::testing;
 using namespace std;
 using namespace mkldnn;

-class MKLDNNGraphStructureTests: public TestsCommon {};
+class MKLDNNGraphStructureTests: public TestsCommon {
+protected:
+    MKLDNNPlugin::NumaNodesWeights cache;
+};

 TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReorders) {
    std::string model = R"V0G0N(
@@ -1198,7 +1201,7 @@ TEST_F(MKLDNNGraphStructureTests, TestOutputAfterInplacePlusConcat) {
    InferenceEngine::Core core;
    InferenceEngine::CNNNetwork network;
    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}));
+    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
    execNetwork->setNetworkInputs(_networkInputs);
@@ -1714,7 +1717,7 @@ TEST_F(MKLDNNGraphStructureTests, TestResnetPart) {
    InferenceEngine::CNNNetwork network;
    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));

-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}));
+    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
    execNetwork->setNetworkInputs(_networkInputs);
@@ -1864,7 +1867,7 @@ TEST_F(MKLDNNGraphStructureTests, TestConcatAfterConcat) {
    InferenceEngine::Core core;
    InferenceEngine::CNNNetwork network;
    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}));
+    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
    execNetwork->setNetworkInputs(_networkInputs);
@@ -2045,7 +2048,7 @@ TEST_F(MKLDNNGraphStructureTests, Test2ConcatFromConcat) {
    InferenceEngine::Core core;
    InferenceEngine::CNNNetwork network;
    ASSERT_NO_THROW(network = core.ReadNetwork(model, InferenceEngine::Blob::CPtr()));
-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}));
+    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
    execNetwork->setNetworkInputs(_networkInputs);
@@ -2377,7 +2380,7 @@ TEST_F(MKLDNNGraphStructureTests, TestLoadTopologyWithConstLayer) {
    InferenceEngine::CNNNetwork network;
    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));

-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}));
+    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
    execNetwork->setNetworkInputs(_networkInputs);
@@ -2525,7 +2528,7 @@ TEST_F(MKLDNNGraphStructureTests, TestLoadTopologyWithEltwiseBeforeConcat) {
    InferenceEngine::CNNNetwork network;
    ASSERT_NO_THROW(network = core.ReadNetwork(model, weights_ptr));

-    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}));
+    MKLDNNPlugin::MKLDNNExecNetwork::Ptr execNetwork(new MKLDNNPlugin::MKLDNNExecNetwork(network, {}, {}, cache));
    InferenceEngine::InputsDataMap _networkInputs = network.getInputsInfo();
    InferenceEngine::OutputsDataMap _networkOutputs = network.getOutputsInfo();
    execNetwork->setNetworkInputs(_networkInputs);
--- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
+++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp
@@ -197,12 +197,14 @@ public:
        return graphNodes;
    }

-    void CreateGraph(InferenceEngine::ICNNNetwork &network, const MKLDNNPlugin::MKLDNNExtensionManager::Ptr& extMgr) {
-        MKLDNNGraph::CreateGraph(network, extMgr);
+    void CreateGraph(InferenceEngine::ICNNNetwork &network, const MKLDNNPlugin::MKLDNNExtensionManager::Ptr& extMgr,
+            MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache = {}) {
+        MKLDNNGraph::CreateGraph(network, extMgr, cache);
    }

    void CreateGraph(InferenceEngine::ICNNNetwork &network) {
-        CreateGraph(network, extensionManager);
+        MKLDNNPlugin::MKLDNNWeightsSharing::Ptr cache;
+        MKLDNNGraph::CreateGraph(network, extensionManager, cache);
    }

    void checkDynBatch(InferenceEngine::BlobMap& srcs, InferenceEngine::BlobMap& outputBlobs, int batch, size_t MB,
--- a/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/pthread_semaphore_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/mvnc/pthread_semaphore_tests.cpp
@@ -279,7 +279,7 @@ TEST_P(PThreadBinSemaphoreTest, TimedWaitFinallysucceed) {
    ASSERT_EQ(-1, result = invoke_wait(0.1));  // right now sema gets occupied and resulted of a timeout
    ASSERT_EQ(ETIMEDOUT, errno);
    int i = 0;
-    for (i = 0; i < 10; i++) {
+    for (i = 0; i < 100; i++) {
        result = invoke_wait(0.1);
        if (0 == result) {
            break;
@@ -287,7 +287,7 @@ TEST_P(PThreadBinSemaphoreTest, TimedWaitFinallysucceed) {
        ASSERT_EQ(ETIMEDOUT, errno) << "actual errno value=" << result;

    }
-    // so 10 x 100 ms timeout should be enough
+    // so 100 x 100 ms timeout should be enough
    ASSERT_EQ(result, 0);

    th.join();