[CPU][ARM] Enable multi-stream execution (#21009)
This commit is contained in:
parent
9320fa7c86
commit
acecf31642
@ -343,11 +343,6 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
|
|||||||
streamExecutorConfig._streams_changed = true;
|
streamExecutorConfig._streams_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
|
|
||||||
// TODO: multi-stream execution has functional issues on ARM target
|
|
||||||
streamExecutorConfig._streams = 1;
|
|
||||||
streamExecutorConfig._streams_changed = true;
|
|
||||||
#endif
|
|
||||||
this->modelType = modelType;
|
this->modelType = modelType;
|
||||||
|
|
||||||
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
|
CPU_DEBUG_CAP_ENABLE(applyDebugCapsProperties());
|
||||||
|
@ -444,22 +444,49 @@ int get_model_prefer_threads(const int num_streams,
|
|||||||
const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */);
|
const float L2_cache_size = dnnl::utils::get_cache_size(2 /*level*/, true /*per core */);
|
||||||
ov::MemBandwidthPressure networkToleranceForLowCache =
|
ov::MemBandwidthPressure networkToleranceForLowCache =
|
||||||
ov::MemBandwidthPressureTolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA);
|
ov::MemBandwidthPressureTolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA);
|
||||||
config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT;
|
|
||||||
|
#if (defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)
|
||||||
|
config.modelPreferThreads = 1;
|
||||||
if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
|
if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
|
||||||
if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
|
if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
|
||||||
(networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
|
(networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
|
||||||
// all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
|
// all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
|
||||||
config.modelPreferThreads = 1;
|
config.modelPreferThreads = 4;
|
||||||
} // otherwise (no recognized layers) falling back to the default value
|
} // otherwise (no recognized layers) falling back to the default value
|
||||||
} else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
|
} else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
|
||||||
// network is below the ISA-specific threshold
|
// network is below the ISA-specific threshold
|
||||||
config.modelPreferThreads = 1;
|
config.modelPreferThreads = 1;
|
||||||
} else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
|
} else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
|
||||||
// network is below general threshold
|
// network is below general threshold
|
||||||
|
config.modelPreferThreads = 1;
|
||||||
|
} else if (networkToleranceForLowCache.ratio_mem_limited_deconvs > ov::MemBandwidthPressure::LIMITED &&
|
||||||
|
networkToleranceForLowCache.ratio_compute_convs < ov::MemBandwidthPressure::ALL) {
|
||||||
|
config.modelPreferThreads = 4;
|
||||||
|
} else if (networkToleranceForLowCache.ratio_mem_limited_deconvs <= ov::MemBandwidthPressure::LIMITED &&
|
||||||
|
networkToleranceForLowCache.ratio_mem_limited_convs <= ov::MemBandwidthPressure::LIMITED &&
|
||||||
|
networkToleranceForLowCache.ratio_compute_convs > ov::MemBandwidthPressure::LIMITED) {
|
||||||
config.modelPreferThreads = 2;
|
config.modelPreferThreads = 2;
|
||||||
}
|
}
|
||||||
if (config.modelPreferThreads == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
|
#endif
|
||||||
config.modelPreferThreads = 2;
|
|
||||||
|
if (-1 == config.modelPreferThreads) {
|
||||||
|
config.modelPreferThreads = ov::threading::IStreamsExecutor::Config::StreamMode::DEFAULT;
|
||||||
|
if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) {
|
||||||
|
if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) ||
|
||||||
|
(networkToleranceForLowCache.ratio_compute_deconvs == ov::MemBandwidthPressure::ALL)) {
|
||||||
|
// all relevant layers (convs, etc) are compute-limited, the most aggressive val for #streams
|
||||||
|
config.modelPreferThreads = 1;
|
||||||
|
} // otherwise (no recognized layers) falling back to the default value
|
||||||
|
} else if (networkToleranceForLowCache.max_mem_tolerance > memThresholdAssumeLimitedForISA) {
|
||||||
|
// network is below the ISA-specific threshold
|
||||||
|
config.modelPreferThreads = 1;
|
||||||
|
} else if (networkToleranceForLowCache.max_mem_tolerance > ov::MemBandwidthPressure::LIMITED) {
|
||||||
|
// network is below general threshold
|
||||||
|
config.modelPreferThreads = 2;
|
||||||
|
}
|
||||||
|
if (config.modelPreferThreads == 1 && proc_type_table[0][EFFICIENT_CORE_PROC] == 0 && sockets == 1) {
|
||||||
|
config.modelPreferThreads = 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,11 +147,6 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkCheckModelZeroStreams) {
|
|||||||
|
|
||||||
ASSERT_NO_THROW(value = compiledModel.get_property(ov::num_streams));
|
ASSERT_NO_THROW(value = compiledModel.get_property(ov::num_streams));
|
||||||
|
|
||||||
#if defined(OPENVINO_ARCH_ARM) || \
|
|
||||||
defined(OPENVINO_ARCH_ARM64) // Will be removed after multiple streams is supported on ARM
|
|
||||||
streams = 1;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ASSERT_EQ(streams, value);
|
ASSERT_EQ(streams, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user