brushed the ratio compute convs to depend on the ISA, rather plain stupid #cores

2021-04-05 19:12:10 +03:00 · 2021-04-05 19:12:10 +03:00 · 2003fa3f46
commit 2003fa3f46
parent 6e7109317a
2 changed files with 9 additions and 7 deletions
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@ -558,7 +558,7 @@ Engine::NetworkPerfStats Engine::NetworkMemBandwidthTolerance(const InferenceEng
    res.ratio_compute_deconvs = total_deconvs ? static_cast<float>(compute_deconvs)/total_deconvs : 0;
    return res;
 }
-
+static bool hasAVX512();
 InferenceEngine::ExecutableNetworkInternal::Ptr
 Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &orig_config) {
    OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "Engine::LoadExeNetworkImpl");
@ -624,16 +624,17 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
                int num_streams;
                bool considerNonLimited = false;
                if (NetworkToleranceForLowCache.maxMemTolerance > NetworkPerfStats::memThresholdNotLimited) {
-                    std::cout << "case 1.0" <<std::endl;
+                    std::cout << "  case 1.0" <<std::endl;
                    considerNonLimited = true;
                }
                if ((NetworkToleranceForLowCache.ratio_compute_convs == NetworkPerfStats::ALL)
                                                || (NetworkToleranceForLowCache.ratio_compute_deconvs == NetworkPerfStats::ALL)) {
-                    std::cout << "case 1.1" <<std::endl;
+                    std::cout << "  case 1.1" <<std::endl;
                    considerNonLimited = true;
                }
-                if (num_cores >= 12 && (NetworkToleranceForLowCache.ratio_mem_limited_convs <= NetworkPerfStats::memLimitedRatioThreshold)) {
+                if (hasAVX512() && NetworkToleranceForLowCache.maxMemTolerance > NetworkPerfStats::memThresholdAssumeLimitedAVX512
-                    std::cout << "case 1.2" <<std::endl;
+                        && (NetworkToleranceForLowCache.ratio_mem_limited_convs <= NetworkPerfStats::memLimitedRatioThresholdAVX512)) {
                    std::cout << "  case 1.2" <<std::endl;
                    considerNonLimited = true;
                }
                if (considerNonLimited) {
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
@ -50,11 +50,12 @@ private:
        float ratio_mem_limited_convs = 0;
        float ratio_compute_deconvs = 0;
        // static constexpr float memComputeConvs = 0.25f;
        static constexpr float memThresholdNotLimited = 1.0f;
        static constexpr float memThresholdAssumeLimited = 0.5f;
        static constexpr float memThresholdAssumeLimitedAVX512 = memThresholdAssumeLimited/2;
        static constexpr float memThresholdUnknown = FLT_MAX;
-        static constexpr float memLimitedRatioThreshold = 0.05;
+
        static constexpr float memLimitedRatioThresholdAVX512 = 0.10;
        static constexpr float ALL = 1.0f;
    };
    static NetworkPerfStats NetworkMemBandwidthTolerance(const InferenceEngine::CNNNetwork &network);