diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 938f508b3c4..924153c25a5 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -600,11 +600,29 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std // config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA); // } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) { // const float memThresholdAssumeLimitedMuch = NetworkPerfStats::memThresholdAssumeLimited/8; - const float memThresholdAssumeLimitedForISA = NetworkPerfStats::memThresholdAssumeLimited/(hasAVX512()?2:1); - // const float memLimitedRatioThresholdForISA = hasAVX512() ? 0.10f : NetworkPerfStats::NONE; - - float L2_cache_size = mkldnn::utils::get_cache_size(2 /*level*/, true /*per core */); - float L3_cache_size = mkldnn::utils::get_cache_size(3, false); + const auto isa = dnnl::get_effective_cpu_isa(); + float isaSpecificThreshold = 1.0; + switch (isa) { + case dnnl::cpu_isa::sse41 : + isaSpecificThreshold = 0.5f; + break; + case dnnl::cpu_isa::avx2: + case dnnl::cpu_isa::avx512_core: + isaSpecificThreshold = 1.0f; + break; + case dnnl::cpu_isa::avx512_core_vnni: + case dnnl::cpu_isa::avx2_vnni: + isaSpecificThreshold = 2.0f; + break; + case dnnl::cpu_isa::avx512_core_amx: + isaSpecificThreshold = 4.0f; + break; + } + // the more "capable" the CPU in general, the more streams we may want to keep to keep it utilized + const float memThresholdAssumeLimitedForISA = NetworkPerfStats::memThresholdAssumeLimited/isaSpecificThreshold; + std::cout << "isa:" << static_cast(isa) << ", isaSpecificThreshold: " << isaSpecificThreshold << std::endl; + const float L2_cache_size = mkldnn::utils::get_cache_size(2 /*level*/, true /*per core */); + const float L3_cache_size = mkldnn::utils::get_cache_size(3, false); std::cout<< "L3_cache_size " << L3_cache_size << std::endl; Engine::NetworkPerfStats NetworkToleranceForLowCache = NetworkMemBandwidthTolerance(clonedNetwork,