brushed the ratio compute convs to depend on the ISA, rather plain stupid #cores
This commit is contained in:
parent
6e7109317a
commit
2003fa3f46
@ -558,7 +558,7 @@ Engine::NetworkPerfStats Engine::NetworkMemBandwidthTolerance(const InferenceEng
|
|||||||
res.ratio_compute_deconvs = total_deconvs ? static_cast<float>(compute_deconvs)/total_deconvs : 0;
|
res.ratio_compute_deconvs = total_deconvs ? static_cast<float>(compute_deconvs)/total_deconvs : 0;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
static bool hasAVX512();
|
||||||
InferenceEngine::ExecutableNetworkInternal::Ptr
|
InferenceEngine::ExecutableNetworkInternal::Ptr
|
||||||
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
|
Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
|
||||||
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
|
OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
|
||||||
@ -624,16 +624,17 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
|
|||||||
int num_streams;
|
int num_streams;
|
||||||
bool considerNonLimited = false;
|
bool considerNonLimited = false;
|
||||||
if (NetworkToleranceForLowCache.maxMemTolerance > NetworkPerfStats::memThresholdNotLimited) {
|
if (NetworkToleranceForLowCache.maxMemTolerance > NetworkPerfStats::memThresholdNotLimited) {
|
||||||
std::cout << "case 1.0" <<std::endl;
|
std::cout << " case 1.0" <<std::endl;
|
||||||
considerNonLimited = true;
|
considerNonLimited = true;
|
||||||
}
|
}
|
||||||
if ((NetworkToleranceForLowCache.ratio_compute_convs == NetworkPerfStats::ALL)
|
if ((NetworkToleranceForLowCache.ratio_compute_convs == NetworkPerfStats::ALL)
|
||||||
|| (NetworkToleranceForLowCache.ratio_compute_deconvs == NetworkPerfStats::ALL)) {
|
|| (NetworkToleranceForLowCache.ratio_compute_deconvs == NetworkPerfStats::ALL)) {
|
||||||
std::cout << "case 1.1" <<std::endl;
|
std::cout << " case 1.1" <<std::endl;
|
||||||
considerNonLimited = true;
|
considerNonLimited = true;
|
||||||
}
|
}
|
||||||
if (num_cores >= 12 && (NetworkToleranceForLowCache.ratio_mem_limited_convs <= NetworkPerfStats::memLimitedRatioThreshold)) {
|
if (hasAVX512() && NetworkToleranceForLowCache.maxMemTolerance > NetworkPerfStats::memThresholdAssumeLimitedAVX512
|
||||||
std::cout << "case 1.2" <<std::endl;
|
&& (NetworkToleranceForLowCache.ratio_mem_limited_convs <= NetworkPerfStats::memLimitedRatioThresholdAVX512)) {
|
||||||
|
std::cout << " case 1.2" <<std::endl;
|
||||||
considerNonLimited = true;
|
considerNonLimited = true;
|
||||||
}
|
}
|
||||||
if (considerNonLimited) {
|
if (considerNonLimited) {
|
||||||
|
@ -50,11 +50,12 @@ private:
|
|||||||
float ratio_mem_limited_convs = 0;
|
float ratio_mem_limited_convs = 0;
|
||||||
float ratio_compute_deconvs = 0;
|
float ratio_compute_deconvs = 0;
|
||||||
|
|
||||||
// static constexpr float memComputeConvs = 0.25f;
|
|
||||||
static constexpr float memThresholdNotLimited = 1.0f;
|
static constexpr float memThresholdNotLimited = 1.0f;
|
||||||
static constexpr float memThresholdAssumeLimited = 0.5f;
|
static constexpr float memThresholdAssumeLimited = 0.5f;
|
||||||
|
static constexpr float memThresholdAssumeLimitedAVX512 = memThresholdAssumeLimited/2;
|
||||||
static constexpr float memThresholdUnknown = FLT_MAX;
|
static constexpr float memThresholdUnknown = FLT_MAX;
|
||||||
static constexpr float memLimitedRatioThreshold = 0.05;
|
|
||||||
|
static constexpr float memLimitedRatioThresholdAVX512 = 0.10;
|
||||||
static constexpr float ALL = 1.0f;
|
static constexpr float ALL = 1.0f;
|
||||||
};
|
};
|
||||||
static NetworkPerfStats NetworkMemBandwidthTolerance(const InferenceEngine::CNNNetwork &network);
|
static NetworkPerfStats NetworkMemBandwidthTolerance(const InferenceEngine::CNNNetwork &network);
|
||||||
|
Loading…
Reference in New Issue
Block a user