[CPU][ARM] Enable multi-stream execution (#21009)

This commit is contained in:
Wanglei Shen 2023-11-28 14:41:56 +08:00 committed by GitHub
parent 9320fa7c86
commit acecf31642
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 14 deletions

View File

@ -343,11 +343,6 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
streamExecutorConfig._streams_changed = true;
}
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
// TODO: multi-stream execution has functional issues on ARM target
streamExecutorConfig._streams = 1;
streamExecutorConfig._streams_changed = true;
#endif
this->modelType = modelType;
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());

View File

@ -444,22 +444,49 @@ int get_model_prefer_threads(const int num_streams,
const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */);
ov::MemBandwidthPressure networkToleranceForLowCache =
ov::MemBandwidthPressureTolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA);
config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT;
#if (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)
config.modelPreferThreads = 1;
if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
(networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
// all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
config.modelPreferThreads = 1;
config.modelPreferThreads = 4;
} // otherwise (no recognized layers) falling back to the default value
} else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
// network is below the ISA-specific threshold
config.modelPreferThreads = 1;
} else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
// network is below general threshold
config.modelPreferThreads = 1;
} else if (networkToleranceForLowCache.ratio_mem_limited_deconvs > ov::MemBandwidthPressure::LIMITED &&
networkToleranceForLowCache.ratio_compute_convs < ov::MemBandwidthPressure::ALL) {
config.modelPreferThreads = 4;
} else if (networkToleranceForLowCache.ratio_mem_limited_deconvs <= ov::MemBandwidthPressure::LIMITED &&
networkToleranceForLowCache.ratio_mem_limited_convs <= ov::MemBandwidthPressure::LIMITED &&
networkToleranceForLowCache.ratio_compute_convs > ov::MemBandwidthPressure::LIMITED) {
config.modelPreferThreads = 2;
}
if (config.modelPreferThreads == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
config.modelPreferThreads = 2;
#endif
if (-1 == config.modelPreferThreads) {
config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT;
if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
(networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
// all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
config.modelPreferThreads = 1;
} // otherwise (no recognized layers) falling back to the default value
} else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
// network is below the ISA-specific threshold
config.modelPreferThreads = 1;
} else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
// network is below general threshold
config.modelPreferThreads = 2;
}
if (config.modelPreferThreads == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
config.modelPreferThreads = 2;
}
}
}

View File

@ -147,11 +147,6 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckModelZeroStreams) {
ASSERT_NO_THROW(value = compiledModel.get_property(ov::num_streams));
#if defined(OPENVINO_ARCH_ARM) || \
defined(OPENVINO_ARCH_ARM64) // Will be removed after multiple streams is supported on ARM
streams = 1;
#endif
ASSERT_EQ(streams, value);
}