[CPU][ARM] Enable multi-stream execution (#21009)

2023-11-28 14:41:56 +08:00 · 2023-11-28 14:41:56 +08:00 · acecf31642
commit acecf31642
parent 9320fa7c86
3 changed files with 31 additions and 14 deletions
--- a/src/plugins/intel_cpu/src/config.cpp
+++ b/src/plugins/intel_cpu/src/config.cpp
@ -343,11 +343,6 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
        streamExecutorConfig._streams_changed = true;
    }
 #if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
    // TODO: multi-stream execution has functional issues on ARM target
    streamExecutorConfig._streams = 1;
    streamExecutorConfig._streams_changed = true;
 #endif
    this->modelType = modelType;
    CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
--- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
+++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@ -444,22 +444,49 @@ int get_model_prefer_threads(const int num_streams,
        const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */);
        ov::MemBandwidthPressure networkToleranceForLowCache =
            ov::MemBandwidthPressureTolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA);
-        config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT;
+
 #if (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)
        config.modelPreferThreads = 1;
        if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
            if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
                (networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
                // all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
-                config.modelPreferThreads = 1;
+                config.modelPreferThreads = 4;
            }  // otherwise (no recognized layers) falling back to the default value
        } else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
            // network is below the ISA-specific threshold
            config.modelPreferThreads = 1;
        } else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
            // network is below general threshold
            config.modelPreferThreads = 1;
        } else if (networkToleranceForLowCache.ratio_mem_limited_deconvs > ov::MemBandwidthPressure::LIMITED &&
                   networkToleranceForLowCache.ratio_compute_convs < ov::MemBandwidthPressure::ALL) {
            config.modelPreferThreads = 4;
        } else if (networkToleranceForLowCache.ratio_mem_limited_deconvs <= ov::MemBandwidthPressure::LIMITED &&
                   networkToleranceForLowCache.ratio_mem_limited_convs <= ov::MemBandwidthPressure::LIMITED &&
                   networkToleranceForLowCache.ratio_compute_convs > ov::MemBandwidthPressure::LIMITED) {
            config.modelPreferThreads = 2;
        }
-        if (config.modelPreferThreads == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
+#endif
-            config.modelPreferThreads = 2;
+
        if (-1 == config.modelPreferThreads) {
            config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT;
            if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
                if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
                    (networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
                    // all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
                    config.modelPreferThreads = 1;
                }  // otherwise (no recognized layers) falling back to the default value
            } else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
                // network is below the ISA-specific threshold
                config.modelPreferThreads = 1;
            } else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
                // network is below general threshold
                config.modelPreferThreads = 2;
            }
            if (config.modelPreferThreads == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
                config.modelPreferThreads = 2;
            }
        }
    }
--- a/src/plugins/intel_cpu/tests/functional/behavior/ov_executable_network/properties.cpp
+++ b/src/plugins/intel_cpu/tests/functional/behavior/ov_executable_network/properties.cpp
@ -147,11 +147,6 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckModelZeroStreams) {
    ASSERT_NO_THROW(value = compiledModel.get_property(ov::num_streams));
 #if defined(OPENVINO_ARCH_ARM) || \
    defined(OPENVINO_ARCH_ARM64)  // Will be removed after multiple streams is supported on ARM
    streams = 1;
 #endif
    ASSERT_EQ(streams, value);
 }