[IE][VPU]: Introduces ENABLE_EARLY_ELTWISE_RELU_FUSION option (#3343)

Unconditional Eltwise+ReLU merge makes some models take a bit more memory, so they could not be inferred on Myriad-X. With this change used can optionally disable this optimization and use Myriad-X.
2020-11-26 13:27:53 +03:00
parent bff2fe0b2f
commit c067ed1b9a
5 changed files with 25 additions and 13 deletions
--- a/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/graph_transformer.hpp
@@ -61,6 +61,7 @@ struct CompilationConfig final {
    bool mergeHwPoolToConv = true;
    bool hwDilation = false;
    bool forceDeprecatedCnnConversion = false;
+    bool enableEarlyEltwiseReLUFusion = true;

    std::map<std::string, std::vector<int>> ioStrides;

--- a/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
+++ b/inference-engine/src/vpu/graph_transformer/include/vpu/private_plugin_config.hpp
@@ -36,6 +36,7 @@ DECLARE_VPU_CONFIG(MYRIAD_PER_LAYER);
 DECLARE_VPU_CONFIG(MYRIAD_PER_STAGE);

 DECLARE_VPU_CONFIG(MYRIAD_ENABLE_MEMORY_TYPES_ANNOTATION);
+DECLARE_VPU_CONFIG(MYRIAD_ENABLE_EARLY_ELTWISE_RELU_FUSION);

 /**
 * @brief Used to disable analyzeWeightableLayers pass in cases where
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/pass_manager.cpp
@@ -249,8 +249,10 @@ PassSet::Ptr PassManager::buildMiddleEnd() {
    ADD_PASS(mergeReLUAndBias);
    ADD_DUMP_PASS("mergeReLUAndBias");

-    ADD_PASS(mergeEltwiseAndReLUDynamic);
-    ADD_DUMP_PASS("mergeEltwiseAndReLUDynamic");
+    if (env.config.enableEarlyEltwiseReLUFusion) {
+        ADD_PASS(mergeEltwiseAndReLUDynamic);
+        ADD_DUMP_PASS("mergeEltwiseAndReLUDynamic");
+    }

    //
    // Data layout adjustment
--- a/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_eltwise_and_relu.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/middleend/passes/merge_eltwise_and_relu.cpp
@@ -9,6 +9,8 @@

 #include <vpu/middleend/sw/utility.hpp>

+#include <vpu/compile_env.hpp>
+
 namespace vpu {

 namespace {
@@ -28,15 +30,18 @@ private:
 };

 void PassImpl::run(const Model& model) {
-    if (m_mode == MergeMode::DYNAMIC_NETWORK) {
-        VPU_PROFILE(mergeEltwiseAndReLUDynamic);
-        if (model->isStatic()) {
-            return;
-        }
-    } else if (m_mode == MergeMode::STATIC_NETWORK) {
-        VPU_PROFILE(mergeEltwiseAndReLUStatic);
-        if (model->isDynamic()) {
-            return;
+    const bool enableEarlyEltwiseReLUFusion = CompileEnv::get().config.enableEarlyEltwiseReLUFusion;
+    if (enableEarlyEltwiseReLUFusion) {
+        if (m_mode == MergeMode::DYNAMIC_NETWORK) {
+            VPU_PROFILE(mergeEltwiseAndReLUDynamic);
+            if (model->isStatic()) {
+                return;
+            }
+        } else if (m_mode == MergeMode::STATIC_NETWORK) {
+            VPU_PROFILE(mergeEltwiseAndReLUStatic);
+            if (model->isDynamic()) {
+                return;
+            }
        }
    }

@@ -80,7 +85,8 @@ void PassImpl::run(const Model& model) {
            auto reluInput = reluStage->input(0);
            auto reluOutput = reluStage->output(0);

-            if (model->isDynamic() || reluInput->strides() == reluOutput->strides() || reluOutput->checkStrides(StridesRequirement::compact())) {
+            const auto stridesAreSupported = reluInput->strides() == reluOutput->strides() || reluOutput->checkStrides(StridesRequirement::compact());
+            if ((enableEarlyEltwiseReLUFusion && (stridesAreSupported || model->isDynamic())) || (!enableEarlyEltwiseReLUFusion && stridesAreSupported)) {
                auto reluStageType = reluStage->type();
                auto reluStageName = reluStage->name();

--- a/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
+++ b/inference-engine/src/vpu/graph_transformer/src/parsed_config.cpp
@@ -68,6 +68,7 @@ IE_SUPPRESS_DEPRECATED_START
        ie::MYRIAD_FORCE_PURE_TENSOR_ITERATOR,
        ie::MYRIAD_DISABLE_CONVERT_STAGES,
        ie::MYRIAD_ENABLE_WEIGHTS_ANALYSIS,
+        ie::MYRIAD_ENABLE_EARLY_ELTWISE_RELU_FUSION,

        //
        // Debug options
@@ -183,7 +184,8 @@ void ParsedConfig::parse(const std::map<std::string, std::string>& config) {
    setOption(_compileConfig.enableTensorIteratorUnrolling,  switches, config, ie::MYRIAD_ENABLE_TENSOR_ITERATOR_UNROLLING);
    setOption(_compileConfig.forcePureTensorIterator,        switches, config, ie::MYRIAD_FORCE_PURE_TENSOR_ITERATOR);
    setOption(_compileConfig.disableConvertStages,           switches, config, ie::MYRIAD_DISABLE_CONVERT_STAGES);
-    setOption(_compileConfig.enableWeightsAnalysis,         switches, config, ie::MYRIAD_ENABLE_WEIGHTS_ANALYSIS);
+    setOption(_compileConfig.enableWeightsAnalysis,          switches, config, ie::MYRIAD_ENABLE_WEIGHTS_ANALYSIS);
+    setOption(_compileConfig.enableEarlyEltwiseReLUFusion,   switches, config, ie::MYRIAD_ENABLE_EARLY_ELTWISE_RELU_FUSION);

    setOption(_compileConfig.irWithVpuScalesDir,                       config, ie::MYRIAD_IR_WITH_SCALES_DIRECTORY);
    setOption(_compileConfig.noneLayers,                               config, ie::MYRIAD_NONE_LAYERS, parseStringSet);