isa-based threshold logic

This commit is contained in:
myshevts 2021-07-13 19:12:38 +03:00
parent f2b972171b
commit b218457e1a

View File

@ -600,11 +600,29 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
// config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA); // config[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = CONFIG_VALUE(CPU_THROUGHPUT_NUMA);
// } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) { // } else if (mode_name == CONFIG_VALUE(THROUGHPUT)) {
// const float memThresholdAssumeLimitedMuch = NetworkPerfStats::memThresholdAssumeLimited/8; // const float memThresholdAssumeLimitedMuch = NetworkPerfStats::memThresholdAssumeLimited/8;
const float memThresholdAssumeLimitedForISA = NetworkPerfStats::memThresholdAssumeLimited/(hasAVX512()?2:1); const auto isa = dnnl::get_effective_cpu_isa();
// const float memLimitedRatioThresholdForISA = hasAVX512() ? 0.10f : NetworkPerfStats::NONE; float isaSpecificThreshold = 1.0;
switch (isa) {
float L2_cache_size = mkldnn::utils::get_cache_size(2 /*level*/, true /*per core */); case dnnl::cpu_isa::sse41 :
float L3_cache_size = mkldnn::utils::get_cache_size(3, false); isaSpecificThreshold = 0.5f;
break;
case dnnl::cpu_isa::avx2:
case dnnl::cpu_isa::avx512_core:
isaSpecificThreshold = 1.0f;
break;
case dnnl::cpu_isa::avx512_core_vnni:
case dnnl::cpu_isa::avx2_vnni:
isaSpecificThreshold = 2.0f;
break;
case dnnl::cpu_isa::avx512_core_amx:
isaSpecificThreshold = 4.0f;
break;
}
// the more "capable" the CPU in general, the more streams we may want to keep to keep it utilized
const float memThresholdAssumeLimitedForISA = NetworkPerfStats::memThresholdAssumeLimited/isaSpecificThreshold;
std::cout << "isa:" << static_cast<int>(isa) << ", isaSpecificThreshold: " << isaSpecificThreshold << std::endl;
const float L2_cache_size = mkldnn::utils::get_cache_size(2 /*level*/, true /*per core */);
const float L3_cache_size = mkldnn::utils::get_cache_size(3, false);
std::cout<< "L3_cache_size " << L3_cache_size << std::endl; std::cout<< "L3_cache_size " << L3_cache_size << std::endl;
Engine::NetworkPerfStats NetworkToleranceForLowCache = NetworkMemBandwidthTolerance(clonedNetwork, Engine::NetworkPerfStats NetworkToleranceForLowCache = NetworkMemBandwidthTolerance(clonedNetwork,