[VPU] vpu_scale changes (#7512)

2021-11-17 10:29:43 +03:00 · 2021-11-17 10:29:43 +03:00 · 4c27bdb2ee
commit 4c27bdb2ee
parent 27901a87af
15 changed files with 283 additions and 196 deletions
--- a/inference-engine/src/vpu/common/include/vpu/configuration/options/ir_with_scales_directory.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/configuration/options/ir_with_scales_directory.hpp
@ -19,8 +19,8 @@ enum class Category;

 class PluginConfiguration;

-struct IRWithScalesDirectoryOption : public AsParameterEnabler {
-    using value_type = std::string;
+struct VPUScalesOption : public AsParameterEnabler {
+    using value_type = std::map<std::string, float>;

    static std::string key();
    static void validate(const std::string&);
--- a/inference-engine/src/vpu/common/include/vpu/private_plugin_config.hpp
+++ b/inference-engine/src/vpu/common/include/vpu/private_plugin_config.hpp
@ -21,7 +21,7 @@ DECLARE_VPU_CONFIG(MYRIAD_TILING_CMX_LIMIT_KB);

 DECLARE_VPU_CONFIG(MYRIAD_TENSOR_STRIDES);

-DECLARE_VPU_CONFIG(MYRIAD_IR_WITH_SCALES_DIRECTORY);
+DECLARE_VPU_CONFIG(MYRIAD_SCALES_PATTERN);
 DECLARE_VPU_CONFIG(MYRIAD_DETECT_NETWORK_BATCH);
 DECLARE_VPU_CONFIG(MYRIAD_COPY_OPTIMIZATION);
 DECLARE_VPU_CONFIG(MYRIAD_HW_INJECT_STAGES);
--- a/inference-engine/src/vpu/common/src/configuration/options/ir_with_scales_directory.cpp
+++ b/inference-engine/src/vpu/common/src/configuration/options/ir_with_scales_directory.cpp
@ -1,38 +0,0 @@
-// Copyright (C) 2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "vpu/private_plugin_config.hpp"
-#include "vpu/configuration/options/ir_with_scales_directory.hpp"
-#include "vpu/utils/containers.hpp"
-#include "vpu/configuration/plugin_configuration.hpp"
-
-namespace vpu {
-
-void IRWithScalesDirectoryOption::validate(const std::string& value) {}
-
-void IRWithScalesDirectoryOption::validate(const PluginConfiguration& configuration) {
-    validate(configuration[key()]);
-}
-
-std::string IRWithScalesDirectoryOption::key() {
-    return InferenceEngine::MYRIAD_IR_WITH_SCALES_DIRECTORY;
-}
-
-details::Access IRWithScalesDirectoryOption::access() {
-    return details::Access::Private;
-}
-
-details::Category IRWithScalesDirectoryOption::category() {
-    return details::Category::CompileTime;
-}
-
-std::string IRWithScalesDirectoryOption::defaultValue() {
-    return std::string();
-}
-
-IRWithScalesDirectoryOption::value_type IRWithScalesDirectoryOption::parse(const std::string& value) {
-    return value;
-}
-
-}  // namespace vpu
--- a/inference-engine/src/vpu/common/src/configuration/options/vpu_scales_option.cpp
+++ b/inference-engine/src/vpu/common/src/configuration/options/vpu_scales_option.cpp
@ -0,0 +1,75 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vpu/private_plugin_config.hpp"
+#include "vpu/configuration/options/vpu_scales_option.hpp"
+#include "vpu/utils/containers.hpp"
+#include "vpu/configuration/plugin_configuration.hpp"
+
+#if defined(__GNUC__) && (__GNUC__ <= 4) && (__GNUC_MINOR__ < 9) && !defined(__clang__) && !defined(IE_GCC_4_8)
+# define IE_GCC_4_8
+#else
+# include <regex>
+#endif
+
+namespace vpu {
+
+void VPUScalesOption::validate(const std::string& value) {}
+
+void VPUScalesOption::validate(const PluginConfiguration& configuration) {
+    validate(configuration[key()]);
+}
+
+std::string VPUScalesOption::key() {
+    return InferenceEngine::MYRIAD_SCALES_PATTERN;
+}
+
+details::Access VPUScalesOption::access() {
+    return details::Access::Private;
+}
+
+details::Category VPUScalesOption::category() {
+    return details::Category::CompileTime;
+}
+
+std::string VPUScalesOption::defaultValue() {
+    return std::string();
+}
+
+VPUScalesOption::value_type VPUScalesOption::parse(const std::string& value) {
+    value_type vpuScaleMap;
+    #ifdef IE_GCC_4_8
+        VPU_THROW_UNLESS(value.empty(), "It is not possible to parse the 'scale' value from the config because you are using a gcc version less than 4.9.");
+    #else
+    std::vector<std::string> parsedStrings;
+
+    auto delimiterToken = std::regex(";");
+    auto regexScales =  std::sregex_token_iterator(value.begin(), value.end(), delimiterToken, -1);
+    std::sregex_token_iterator end;
+    for ( ; regexScales != end; ++regexScales) {
+        parsedStrings.push_back(*regexScales);
+    }
+
+    for (auto& paramStr : parsedStrings) {
+        paramStr.erase(
+            std::remove_if(paramStr.begin(), paramStr.end(), ::isspace),
+            paramStr.end());
+    }
+
+    parsedStrings.erase(
+        std::remove_if(parsedStrings.begin(), parsedStrings.end(),
+                       [](std::string str) { return str.empty(); }),
+        parsedStrings.end());
+    for (const auto& vpuScale : parsedStrings) {
+        const auto delimeterPos = vpuScale.find(':');
+        VPU_THROW_UNLESS(delimeterPos != std::string::npos, "Unable to parse string \"{}\"", vpuScale);
+        vpuScaleMap.insert({std::string(vpuScale.substr(0, delimeterPos)),
+                            std::stof(vpuScale.substr(delimeterPos + 1))});
+    }
+
+    #endif
+    return vpuScaleMap;
+}
+
+}  // namespace vpu
--- a/inference-engine/src/vpu/graph_transformer/src/frontend/unroll_loops.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/frontend/unroll_loops.cpp
@ -11,7 +11,7 @@

 #include <vpu/compile_env.hpp>

-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>
 #include <vpu/configuration/options/force_pure_tensor_iterator.hpp>
 #include <vpu/configuration/options/enable_tensor_iterator_unrolling.hpp>

@ -25,7 +25,7 @@ void FrontEnd::unrollLoops(ie::CNNNetwork& network) {
    env.log->trace("Unroll TensorIterator loops");
    VPU_LOGGER_SECTION(env.log);

-    if (!env.config.get<IRWithScalesDirectoryOption>().empty()) {
+    if (!env.config.get<VPUScalesOption>().empty()) {
        // TODO: Scale dumps does not work with IR, which contain Tensor Iterator layers, because we cannot serialize them. #-23429
        for (auto iterator = ie::details::CNNNetworkIterator(network); iterator != ie::details::CNNNetworkIterator(); ++iterator) {
            const auto& layer = *iterator;
--- a/inference-engine/src/vpu/graph_transformer/src/graph_transformer.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/graph_transformer.cpp
@ -50,7 +50,7 @@
 #include <vpu/configuration/options/number_of_shaves.hpp>
 #include <vpu/configuration/options/throughput_streams.hpp>
 #include <vpu/configuration/options/number_of_cmx_slices.hpp>
-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>

 namespace vpu {

@ -173,11 +173,6 @@ CompiledGraph::Ptr compileImpl(const ie::CNNNetwork& network, const std::shared_

    middleEnd->run(model);

-    if (!env.config.get<IRWithScalesDirectoryOption>().empty()) {
-        network.serialize(env.config.get<IRWithScalesDirectoryOption>() + "/" + network.getName() + "_scales.xml",
-                          env.config.get<IRWithScalesDirectoryOption>() + "/" + network.getName() + "_scales.bin");
-    }
-
    return backEnd->build(model, frontEnd->origLayers());
 }

--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/weights_analysis.cpp
@ -9,7 +9,7 @@
 #include <vpu/model/data_contents/replicated_data_content.hpp>
 #include <vpu/model/data_contents/scaled_content.hpp>

-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>
 #include <vpu/configuration/options/check_preprocessing_inside_model.hpp>

 #include <precision_utils.h>
@ -28,6 +28,17 @@
 #include <list>
 #include <set>

+#if defined(__GNUC__) && (__GNUC__ <= 4) && (__GNUC_MINOR__ < 9) && !defined(__clang__) && !defined(IE_GCC_4_8)
+# define IE_GCC_4_8
+#endif
+
+#ifndef IE_GCC_4_8
+# include <regex>
+# define STD_REGEX_SEARCH(SRC, PATTERN) std::regex_search(SRC, std::regex(PATTERN))
+#else
+# define STD_REGEX_SEARCH(SRC, PATTERN) false
+#endif
+
 namespace vpu {

 namespace {
@ -206,6 +217,16 @@ void scaleWeightableStage(const Model& model, const Stage& stage, float factor)
    stage->attrs().set<float>("scaleFactor", factor);
 }

+float getScaleValue(const std::string layerName, const std::map<std::string, float>& vpuScalemap) {
+    float scaleForAnyLayer = 0.0;
+    for (const auto& pair : vpuScalemap) {
+        if (STD_REGEX_SEARCH(layerName, pair.first)) {
+            return pair.second;
+        }
+    }
+    return scaleForAnyLayer;
+}
+
 class PassImpl final : public Pass {
 public:
    void run(const Model& model) override;
@ -227,9 +248,9 @@ void PassImpl::run(const Model& model) {
            continue;
        }
        IE_ASSERT(stage->origLayer() != nullptr);
-
-        // Get scale from IR, compute if it was absent
-        auto scale = stage->origLayer()->GetParamAsFloat("vpu_scale", 0);
+        // Get scale from config, compute if it was absent
+        const auto map = env.config.get<VPUScalesOption>();
+        auto scale = getScaleValue(stage->origLayerName(), map);
        if (!scale) {
            auto weights = stage->input(1);

@ -260,10 +281,6 @@ void PassImpl::run(const Model& model) {
            if (shift >= scaleThreshold) {
                scale = static_cast<float>(1ULL << static_cast<std::uint32_t>(shift));
            }
-
-            if (!env.config.get<IRWithScalesDirectoryOption>().empty()) {
-                stage->origLayer()->params["vpu_scale"] = toString(scale);
-            }
        }
        scaleWeightableStage(model, stage, scale);
    }
--- a/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
+++ b/inference-engine/src/vpu/myriad_plugin/myriad_plugin.cpp
@ -39,7 +39,7 @@
 #include <vpu/configuration/options/number_of_shaves.hpp>
 #include <vpu/configuration/options/number_of_cmx_slices.hpp>
 #include <vpu/configuration/options/throughput_streams.hpp>
-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>
 #include <vpu/configuration/options/tensor_strides.hpp>
 #include <vpu/configuration/options/ignore_unknown_layers.hpp>
 #include <vpu/configuration/options/force_pure_tensor_iterator.hpp>
@ -208,7 +208,7 @@ Engine::Engine(std::shared_ptr<IMvnc> mvnc) :
    _parsedConfig.registerOption<NumberOfSHAVEsOption>();
    _parsedConfig.registerOption<NumberOfCMXSlicesOption>();
    _parsedConfig.registerOption<ThroughputStreamsOption>();
-    _parsedConfig.registerOption<IRWithScalesDirectoryOption>();
+    _parsedConfig.registerOption<VPUScalesOption>();
    _parsedConfig.registerOption<TensorStridesOption>();
    _parsedConfig.registerOption<IgnoreUnknownLayersOption>();
    _parsedConfig.registerOption<ForcePureTensorIteratorOption>();
--- a/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/configuration_tests.cpp
+++ b/inference-engine/tests/functional/plugin/myriad/shared_tests_instances/behavior/plugin/configuration_tests.cpp
@ -282,7 +282,7 @@ const std::vector<std::pair<std::string, InferenceEngine::Parameter>>& getDefaul
        {InferenceEngine::MYRIAD_NUMBER_OF_SHAVES, {InferenceEngine::MYRIAD_NUMBER_OF_SHAVES_AUTO}},
        {InferenceEngine::MYRIAD_THROUGHPUT_STREAMS, {InferenceEngine::MYRIAD_THROUGHPUT_STREAMS_AUTO}},
        {InferenceEngine::MYRIAD_NUMBER_OF_CMX_SLICES, {InferenceEngine::MYRIAD_NUMBER_OF_CMX_SLICES_AUTO}},
-        {InferenceEngine::MYRIAD_IR_WITH_SCALES_DIRECTORY, {std::string()}},
+        {InferenceEngine::MYRIAD_SCALES_PATTERN, {std::string()}},
        {InferenceEngine::MYRIAD_TENSOR_STRIDES, {std::map<std::string, std::vector<int>>()}},
        {InferenceEngine::MYRIAD_IGNORE_UNKNOWN_LAYERS, {false}},
        {InferenceEngine::MYRIAD_FORCE_PURE_TENSOR_ITERATOR, {false}},
@ -433,7 +433,7 @@ const std::vector<std::tuple<std::string, std::string, InferenceEngine::Paramete
        std::make_tuple(InferenceEngine::MYRIAD_PACK_DATA_IN_CMX, InferenceEngine::PluginConfigParams::NO,
            InferenceEngine::Parameter{false}),

-        std::make_tuple(InferenceEngine::MYRIAD_IR_WITH_SCALES_DIRECTORY, "/.", InferenceEngine::Parameter{"/."}),
+        std::make_tuple(InferenceEngine::MYRIAD_SCALES_PATTERN, "", InferenceEngine::Parameter{""}),

        std::make_tuple(InferenceEngine::MYRIAD_TENSOR_STRIDES, "tensor[1,2,3,4]",
            InferenceEngine::Parameter{std::map<std::string, std::vector<int>>{{"tensor", {4, 3, 2, 1}}}}),
@ -614,7 +614,7 @@ const std::vector<std::string>& getPrivateOptions() {
        InferenceEngine::MYRIAD_WATCHDOG,
        InferenceEngine::MYRIAD_PERF_REPORT_MODE,
        InferenceEngine::MYRIAD_PACK_DATA_IN_CMX,
-        InferenceEngine::MYRIAD_IR_WITH_SCALES_DIRECTORY,
+        InferenceEngine::MYRIAD_SCALES_PATTERN,
        InferenceEngine::MYRIAD_TENSOR_STRIDES,
        InferenceEngine::MYRIAD_IGNORE_UNKNOWN_LAYERS,
        InferenceEngine::MYRIAD_FORCE_PURE_TENSOR_ITERATOR,
--- a/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
+++ b/inference-engine/tests/unit/vpu/base/graph_transformer_tests.cpp
@ -26,7 +26,7 @@
 #include <vpu/configuration/options/number_of_shaves.hpp>
 #include <vpu/configuration/options/number_of_cmx_slices.hpp>
 #include <vpu/configuration/options/throughput_streams.hpp>
-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>
 #include <vpu/configuration/options/tensor_strides.hpp>
 #include <vpu/configuration/options/ignore_unknown_layers.hpp>
 #include <vpu/configuration/options/force_pure_tensor_iterator.hpp>
@ -414,7 +414,7 @@ PluginConfiguration createConfiguration() {
    configuration.registerOption<NumberOfSHAVEsOption>();
    configuration.registerOption<NumberOfCMXSlicesOption>();
    configuration.registerOption<ThroughputStreamsOption>();
-    configuration.registerOption<IRWithScalesDirectoryOption>();
+    configuration.registerOption<VPUScalesOption>();
    configuration.registerOption<TensorStridesOption>();
    configuration.registerOption<IgnoreUnknownLayersOption>();
    configuration.registerOption<ForcePureTensorIteratorOption>();
--- a/inference-engine/tests/unit/vpu/middleend_tests/passes_tests/vpu_scale_tests.cpp
+++ b/inference-engine/tests/unit/vpu/middleend_tests/passes_tests/vpu_scale_tests.cpp
@ -0,0 +1,162 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "graph_transformer_tests.hpp"
+#include <vpu/stages/mx_stage.hpp>
+#include <vpu/middleend/hw/utility.hpp>
+#include "ngraph_functions/subgraph_builders.hpp"
+#include "vpu/private_plugin_config.hpp"
+#include "common_test_utils/common_utils.hpp"
+
+#if defined(__GNUC__) && (__GNUC__ <= 4) && (__GNUC_MINOR__ < 9) && !defined(__clang__) && !defined(IE_GCC_4_8)
+#define IE_GCC_4_8
+#endif
+
+using namespace vpu;
+IE_SUPPRESS_DEPRECATED_START
+namespace LayerTestsDefinitions {
+typedef std::tuple<
+    std::string
+> VpuScaleParams;
+
+class VpuScaleTest : public testing::WithParamInterface<VpuScaleParams>,
+                     public GraphTransformerTest {
+protected:
+    void SetUp() override;
+    void Compile() {
+        m_pipeline.run(m_testModel);
+    }
+
+protected:
+    std::string configValue = {};
+    Model m_testModel;
+
+private:
+    void InitModel() {
+        int kernelx = 16;
+        int kernely = 1;
+        int kernelStrideX = 1;
+        int kernelStrideY = 1;
+        int dilationX = 1;
+        int dilationY = 1;
+        int padx_begin = 7;
+        int pady_begin = 0;
+        int padx_end = 8;
+        int pady_end = 0;
+        m_testModel = CreateModel();
+        int inputX = 32;
+        int inputY = 32;
+
+        auto input = m_testModel->addInputData(
+            "Input",
+            DataDesc(DataType::FP16, DimsOrder::NCHW, {inputX, inputY, 8, 1}));
+        m_testModel->attrs().set<int>("numInputs", 1);
+
+        Data output;
+
+        output = m_testModel->addOutputData(
+            "Output",
+            DataDesc(
+                DataType::FP16, DimsOrder::NCHW,
+                {(inputX + padx_begin + padx_end - kernelx) / kernelStrideX + 1,
+                 (inputY + pady_begin + pady_end - kernely) / kernelStrideY + 1,
+                 8, 1}));
+
+        auto conv = std::make_shared<ie::ConvolutionLayer>(ie::LayerParams{"conv1", "Convolution", ie::Precision::FP16});
+        conv->_kernel_x = kernelx;
+        conv->_kernel_y = kernely;
+        conv->_stride_x = kernelStrideX;
+        conv->_stride_y = kernelStrideY;
+        conv->_dilation_x = dilationX;
+        conv->_dilation_x = dilationY;
+
+        conv->_padding.insert(0, padx_begin);
+        conv->_padding.insert(1, pady_begin);
+        conv->_pads_end.insert(0, padx_end);
+        conv->_pads_end.insert(1, pady_end);
+        conv->_auto_pad = "same_upper";
+
+        conv->_weights = ie::make_shared_blob<short>({ ie::Precision::FP16, {static_cast<size_t>(kernelx * kernely * 8 * 8)}, ie::Layout::C });
+        conv->_weights->allocate();
+
+        frontEnd->parseConvolution(m_testModel, conv, {input}, {output});
+        Data output2;
+
+        output2 = m_testModel->addOutputData(
+            "Output",
+            DataDesc(
+                DataType::FP16, DimsOrder::NCHW,
+                {(inputX + padx_begin + padx_end - kernelx) / kernelStrideX + 1,
+                 (inputY + pady_begin + pady_end - kernely) / kernelStrideY + 1,
+                 8, 1}));
+        auto conv2 = std::make_shared<ie::ConvolutionLayer>(ie::LayerParams{"conv2", "Convolution", ie::Precision::FP16});
+        conv2->_kernel_x = kernelx;
+        conv2->_kernel_y = kernely;
+        conv2->_stride_x = kernelStrideX;
+        conv2->_stride_y = kernelStrideY;
+        conv2->_dilation_x = dilationX;
+        conv2->_dilation_x = dilationY;
+
+        conv2->_padding.insert(0, padx_begin);
+        conv2->_padding.insert(1, pady_begin);
+        conv2->_pads_end.insert(0, padx_end);
+        conv2->_pads_end.insert(1, pady_end);
+        conv2->_auto_pad = "same_upper";
+
+        conv2->_weights = ie::make_shared_blob<short>({ ie::Precision::FP16, {static_cast<size_t>(kernelx * kernely * 8 * 8)}, ie::Layout::C });
+        conv2->_weights->allocate();
+
+        frontEnd->parseConvolution(m_testModel, conv2, {output}, {output2});
+    }
+
+    void InitPipeline() {
+        m_pipeline = PassSet();
+        m_pipeline.addPass(passManager->analyzeWeightableLayers());
+    }
+    PassSet m_pipeline;
+};
+
+void VpuScaleTest::SetUp() {
+    ASSERT_NO_FATAL_FAILURE(GraphTransformerTest::SetUp());
+    config.set(InferenceEngine::MYRIAD_SCALES_PATTERN, configValue);
+    ASSERT_NO_FATAL_FAILURE(InitCompileEnv());
+    ASSERT_NO_FATAL_FAILURE(InitPipeline());
+    ASSERT_NO_FATAL_FAILURE(InitModel());
+}
+
+TEST_F(VpuScaleTest, IsScaleWorkCorrectly) {
+    #ifdef IE_GCC_4_8
+        GTEST_SKIP();
+    #endif
+    configValue = "conv1:0.2; conv2:1.4";
+    SetUp();
+    ASSERT_NO_THROW(Compile());
+    for (const auto& stage : m_testModel->getStages()) {
+        auto scale = stage->attrs().getOrDefault<float>("scaleFactor");
+        if (stage->name() == "conv1") {
+            ASSERT_FLOAT_EQ(scale, 0.2);
+            continue;
+        }
+        if (stage->name() == "conv2") {
+            ASSERT_FLOAT_EQ(scale, 1.4);
+        }
+    }
+}
+
+TEST_F(VpuScaleTest, IsRegexInScaleWorksCorrectly) {
+    #ifdef IE_GCC_4_8
+        GTEST_SKIP();
+    #endif
+    configValue = "conv1:0.2";
+    SetUp();
+    ASSERT_NO_THROW(Compile());
+    for (const auto& stage : m_testModel->getStages()) {
+        auto scale = stage->attrs().getOrDefault<float>("scaleFactor");
+        if (stage->name() == "conv1") {
+            ASSERT_FLOAT_EQ(scale, 0.2);
+            continue;
+        }
+    }
+}
+} // namespace LayerTestsDefinitions
--- a/inference-engine/tests_deprecated/functional/vpu/CMakeLists.txt
+++ b/inference-engine/tests_deprecated/functional/vpu/CMakeLists.txt
@ -75,3 +75,7 @@ addIeTargetTest(
        VPU
        MYRIAD
    )
+# because vpu_graph_transformer is compiled with LTO
+if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0)
+    set_target_properties(VPUCommonTests PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
+endif()
--- a/inference-engine/tests_deprecated/functional/vpu/graph_transformer/gt_functional_tests.cpp
+++ b/inference-engine/tests_deprecated/functional/vpu/graph_transformer/gt_functional_tests.cpp
@ -26,7 +26,7 @@
 #include <vpu/configuration/options/number_of_shaves.hpp>
 #include <vpu/configuration/options/number_of_cmx_slices.hpp>
 #include <vpu/configuration/options/throughput_streams.hpp>
-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>
 #include <vpu/configuration/options/tensor_strides.hpp>
 #include <vpu/configuration/options/ignore_unknown_layers.hpp>
 #include <vpu/configuration/options/force_pure_tensor_iterator.hpp>
@ -108,7 +108,7 @@ void graphTransformerFunctionalTests::PrepareGraphCompilation() {
    _configuration.registerOption<NumberOfSHAVEsOption>();
    _configuration.registerOption<NumberOfCMXSlicesOption>();
    _configuration.registerOption<ThroughputStreamsOption>();
-    _configuration.registerOption<IRWithScalesDirectoryOption>();
+    _configuration.registerOption<VPUScalesOption>();
    _configuration.registerOption<TensorStridesOption>();
    _configuration.registerOption<IgnoreUnknownLayersOption>();
    _configuration.registerOption<ForcePureTensorIteratorOption>();
--- a/inference-engine/tests_deprecated/unit/engines/vpu/get_vpu_scale_from_ir_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/get_vpu_scale_from_ir_tests.cpp
@ -1,128 +0,0 @@
-// Copyright (C) 2018-2021 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "graph_transformer_tests.hpp"
-#include "tests_vpu_common.hpp"
-
-#include <cpp/ie_executable_network.hpp>
-
-#include <ngraph/type/element_type.hpp>
-#include <ngraph/op/parameter.hpp>
-#include <ngraph/ops.hpp>
-#include <legacy/ngraph_ops/fully_connected.hpp>
-#include <ngraph/op/constant.hpp>
-#include <ngraph/opsets/opset1.hpp>
-#include <limits>
-
-using namespace vpu;
-using namespace InferenceEngine;
-
-using VPU_AddVpuScaleTest = GraphTransformerTest;
-
-TEST_F(VPU_AddVpuScaleTest, CanAddVpuScaleToNetwork) {
-    InitCompileEnv();
-
-    auto& env = CompileEnv::get();
-    auto config = createConfiguration();
-    config.set(InferenceEngine::MYRIAD_IR_WITH_SCALES_DIRECTORY, "/");
-    env.updateConfig(config);
-
-    std::shared_ptr<ngraph::Function> function;
-
-    {
-        auto input = std::make_shared<ngraph::opset1::Parameter>(ngraph::element::f16, ngraph::Shape{4, 2, 2});
-        input->set_friendly_name("input");
-        auto weights = ngraph::opset1::Constant::create(ngraph::element::f16, ngraph::Shape{2, 2}, {1});
-        auto bias = ngraph::opset1::Constant::create(ngraph::element::f16, ngraph::Shape{2}, {1});
-        auto fc = std::make_shared<ngraph::op::FullyConnected>(input, weights, bias, ngraph::Shape{4, 2, 2});
-        fc->set_friendly_name("FullyConnected");
-        auto result = std::make_shared<ngraph::op::Result>(fc);
-        ngraph::ResultVector results { result };
-        ngraph::ParameterVector params {input };
-        function = std::make_shared<ngraph::Function>(results, params);
-    }
-
-    auto network = InferenceEngine::CNNNetwork(function);
-    auto model = frontEnd->buildInitialModel(network);
-
-    const auto getFullyConnectedStage = [model]() -> Stage {
-        const auto isFullyConnected = [](const Stage& stage) {
-            const auto& layer = stage->origLayer();
-            return layer && layer->type == "FullyConnected";
-        };
-        const auto stages = model->getStages();
-        const auto stageIt = std::find_if(begin(stages), end(stages), isFullyConnected);
-        return *stageIt;
-    };
-
-    const auto fcStage = getFullyConnectedStage();
-    EXPECT_EQ(fcStage->origLayer()->params.find("vpu_scale"), fcStage->origLayer()->params.end());
-
-    auto middleEnd = passManager->buildMiddleEnd();
-    middleEnd->run(model);
-
-    const auto fcStageAfterMiddleEnd = getFullyConnectedStage();
-    EXPECT_NE(fcStageAfterMiddleEnd->origLayer()->params.find("vpu_scale"), fcStageAfterMiddleEnd->origLayer()->params.end());
-}
-
-TEST_F(VPU_AddVpuScaleTest, VpuScaleFromIrChangesWeights) {
-    InitCompileEnv();
-    const auto& env = CompileEnv::get();
-    auto config = createConfiguration();
-    config.set(InferenceEngine::MYRIAD_IR_WITH_SCALES_DIRECTORY, "/");
-    env.updateConfig(config);
-
-    std::shared_ptr<ngraph::Function> function;
-    {
-        ngraph::element::Type elementType = ngraph::element::Type_t::f16;
-        ngraph::Shape shape { 1, 1, 4, 5 };
-        auto input = std::make_shared<ngraph::op::Parameter>(elementType, shape);
-        input->set_friendly_name("input");
-
-        auto weights = std::make_shared<ngraph::op::Constant>(
-                elementType, ngraph::Shape{1, 1, 1, 1}, std::vector<float>(1, 1.0f));
-        auto conv = std::make_shared<ngraph::op::v1::Convolution>(
-                input, weights, ngraph::Strides {1, 1},
-                ngraph::CoordinateDiff{0, 0}, ngraph::CoordinateDiff{0, 0}, ngraph::Strides{1, 1});
-        conv->set_friendly_name("Convolution");
-        auto result = std::make_shared<ngraph::op::Result>(conv);
-
-        ngraph::ResultVector results { result };
-        ngraph::ParameterVector params { input };
-        function = std::make_shared<ngraph::Function>(results, params);
-    }
-
-    auto network = InferenceEngine::CNNNetwork(function);
-    auto model = frontEnd->buildInitialModel(network);
-
-    auto middleEnd = passManager->buildMiddleEnd();
-    auto checkWeightWasChanged = [this, &network](const float scale) {
-        auto model = frontEnd->buildInitialModel(network);
-        for (const auto& stage : model->getStages()) {
-            if (stage->name() == "Convolution") {
-                stage->origLayer()->params["vpu_scale"] = toString(scale);
-            }
-        }
-
-        auto middleEnd = passManager->buildMiddleEnd();
-        middleEnd->run(model);
-        for (const auto& stage : model->getStages()) {
-            if (stage->name() == "Convolution") {
-                auto content = stage->input(1)->content()->get<ie_fp16>();
-                if (scale < 0) {
-                    EXPECT_EQ(scale, PrecisionUtils::f16tof32(content[0]));
-                } else {
-                    EXPECT_EQ(scale, fabs(PrecisionUtils::f16tof32(content[0])));
-                }
-            }
-        }
-    };
-
-    const auto maxVal = std::numeric_limits<float>::infinity();
-
-    checkWeightWasChanged(32);
-    checkWeightWasChanged(64);
-    checkWeightWasChanged(maxVal);
-
-}
--- a/inference-engine/tests_deprecated/unit/engines/vpu/graph_transformer_tests.cpp
+++ b/inference-engine/tests_deprecated/unit/engines/vpu/graph_transformer_tests.cpp
@ -28,7 +28,7 @@
 #include <vpu/configuration/options/number_of_shaves.hpp>
 #include <vpu/configuration/options/number_of_cmx_slices.hpp>
 #include <vpu/configuration/options/throughput_streams.hpp>
-#include <vpu/configuration/options/ir_with_scales_directory.hpp>
+#include <vpu/configuration/options/vpu_scales_option.hpp>
 #include <vpu/configuration/options/tensor_strides.hpp>
 #include <vpu/configuration/options/ignore_unknown_layers.hpp>
 #include <vpu/configuration/options/force_pure_tensor_iterator.hpp>
@ -226,7 +226,7 @@ PluginConfiguration createConfiguration() {
    configuration.registerOption<NumberOfSHAVEsOption>();
    configuration.registerOption<NumberOfCMXSlicesOption>();
    configuration.registerOption<ThroughputStreamsOption>();
-    configuration.registerOption<IRWithScalesDirectoryOption>();
+    configuration.registerOption<VPUScalesOption>();
    configuration.registerOption<TensorStridesOption>();
    configuration.registerOption<IgnoreUnknownLayersOption>();
    configuration.registerOption<ForcePureTensorIteratorOption>();