[AUTOPLUGIN] add fps log (#10662)

* add fps log add format '%lf' for log add INFO_RUN and DEBUG_RUN, code only run when greater than special log level add fps log for device print device config info with DEBUG_RUN add mock test for DEBUG_RUN and INFO_RUN Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com> * use n / end -start instead of (n-1) / ((nst start) -(1st start)) Signed-off-by: Hu, Yuan2 <yuan2.hu@intel.com>
2022-03-16 09:55:41 +08:00
parent ea8f1d0344
commit 4f6dcc1d32
9 changed files with 148 additions and 26 deletions
--- a/src/plugins/auto/async_infer_request.cpp
+++ b/src/plugins/auto/async_infer_request.cpp
@@ -12,6 +12,7 @@
 #include <ie_icore.hpp>
 #include <ie_metric_helpers.hpp>
 #include <ie_plugin_config.hpp>
+#include "utils/log_util.hpp"

 namespace MultiDevicePlugin {
    using namespace InferenceEngine;
@@ -71,6 +72,9 @@ MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest(
         /*TaskExecutor*/ _multiDeviceExecutableNetwork, /*task*/ [this] {
               _workerInferRequest = MultiDeviceExecutableNetwork::_thisWorkerInferRequest;
               _inferRequest->SetBlobsToAnotherRequest(_workerInferRequest->_inferRequest);
+               INFO_RUN([this]() {
+                   _workerInferRequest->_startTimes.push_back(std::move(std::chrono::steady_clock::now()));
+               });
        }},
        // final task in the pipeline:
        { /*TaskExecutor*/std::make_shared<ThisRequestExecutor>(this), /*task*/ [this] {
@@ -79,7 +83,9 @@ MultiDeviceAsyncInferRequest::MultiDeviceAsyncInferRequest(
              }
              if (_needPerfCounters)
                  _perfMap = _workerInferRequest->_inferRequest->GetPerformanceCounts();
-              _workerInferRequest->_inferCount++;
+              INFO_RUN([this]() {
+                   _workerInferRequest->_endTimes.push_back(std::move(std::chrono::steady_clock::now()));
+              });
        }}
    };
 }
--- a/src/plugins/auto/async_infer_request.hpp
+++ b/src/plugins/auto/async_infer_request.hpp
@@ -14,6 +14,7 @@
 #include <cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp>
 #include "infer_request.hpp"
 #include "executable_network.hpp"
+#include "utils/log_util.hpp"

 #ifdef  MULTIUNITTEST
 #define MOCKTESTMACRO virtual
--- a/src/plugins/auto/executable_network.cpp
+++ b/src/plugins/auto/executable_network.cpp
@@ -84,6 +84,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
    _networksPerDevice{networksPerDevice},
    _config{config},
    _needPerfCounters{needPerfCounters} {
+    _cpuHelpReleaseTime = std::chrono::steady_clock::now();
    _taskExecutor.reset();
    for (auto&& networkValue : _networksPerDevice) {
        auto& device  = networkValue.first;
@@ -163,6 +164,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
                                                           , _workModeIsAUTO(true)
                                                           , _network(network) {
    LOG_INFO("[AUTOPLUGIN]ExecutableNetwork start");
+    // initialize cpuHelpReleasetime
+    _cpuHelpReleaseTime = std::chrono::steady_clock::now();
    if (_multiPlugin->GetCore() == nullptr) {
        IE_THROW() << "Please, work with " << _multiPlugin->GetName() << " device via InferencEngine::Core object";
    }
@@ -228,16 +231,16 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
                                  deviceName.c_str());
                          auto supported_config_keys =
                              _core->GetMetric(deviceName, METRIC_KEY(SUPPORTED_CONFIG_KEYS)).as<std::vector<std::string>>();
-                          // there is log mutex in LOG_DEBUG, add _configMutex just want to print them all together
-                          // toDo maybe neet to implement LOG_RUN(task, LOG_LEVEL) to run some debug code.
-                          std::lock_guard<std::mutex> lock(_confMutex);
-                          for (const auto& cfg : supported_config_keys) {
-                              try {
-                                  LOG_DEBUG("[AUTOPLUGIN]:device:%s, GetConfig:%s=%s", deviceName.c_str(),
-                                          cfg.c_str(), contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
-                              } catch (...) {
+                          DEBUG_RUN([this, &contextPtr, &deviceName, &supported_config_keys]{
+                              std::lock_guard<std::mutex> lock(_confMutex);
+                              for (const auto& cfg : supported_config_keys) {
+                                  try {
+                                      LOG_DEBUG("[AUTOPLUGIN]:device:%s, GetConfig:%s=%s", deviceName.c_str(),
+                                              cfg.c_str(), contextPtr->executableNetwork->GetConfig(cfg).as<std::string>().c_str());
+                                  } catch (...) {
+                                  }
                              }
-                          }
+                          });
                      }
                      contextPtr->promise.set_value();
                      // the first load network process finished
@@ -284,12 +287,35 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string&
                // second, check the idle queue if all requests are in place
                size_t destroynum = 0;
                std::pair<int, WorkerInferRequest*> worker;
+                std::list<Time> cpuHelpAllStartTimes;
+                std::list<Time> cpuHelpAllEndTimes;
                while (_idleWorkerRequests["CPU_HELP"].try_pop(worker)) {
                    destroynum++;
-                    _cpuHelpInferCount += worker.second->_inferCount;
+                    INFO_RUN([&cpuHelpAllStartTimes, &cpuHelpAllEndTimes, &worker]() {
+                        cpuHelpAllStartTimes.splice(cpuHelpAllStartTimes.end(), worker.second->_startTimes);
+                        cpuHelpAllEndTimes.splice(cpuHelpAllEndTimes.end(), worker.second->_endTimes);
+                    });
                }
+                INFO_RUN([this, &cpuHelpAllStartTimes, &cpuHelpAllEndTimes]() {
+                    cpuHelpAllStartTimes.sort(std::less<Time>());
+                    cpuHelpAllEndTimes.sort(std::less<Time>());
+                    _cpuHelpInferCount = cpuHelpAllStartTimes.size();
+                    IE_ASSERT(_cpuHelpInferCount == cpuHelpAllEndTimes.size());
+                });
                if (destroynum == _workerRequests["CPU_HELP"].size()) {
                    std::lock_guard<std::mutex> lock(_confMutex);
+                    INFO_RUN([this, &cpuHelpAllStartTimes, &cpuHelpAllEndTimes, &destroynum]() {
+                        _cpuHelpReleaseTime = std::chrono::steady_clock::now();
+                        if (cpuHelpAllStartTimes.size() >= destroynum + 1) {
+                            //remove last worksize num requests, so the fps will be more accuracy
+                            cpuHelpAllStartTimes.resize(_cpuHelpInferCount - destroynum);
+                            cpuHelpAllEndTimes.resize(_cpuHelpInferCount - destroynum);
+                            std::chrono::duration<double, std::milli> durtation =
+                                cpuHelpAllEndTimes.back() - cpuHelpAllStartTimes.front();
+                            _cpuHelpFps = cpuHelpAllStartTimes.size() * 1000 / durtation.count();
+                        }
+                    });
+                    LOG_INFO("[AUTOPLUGIN] release all work requests of CPU_HELP");
                    _workerRequests["CPU_HELP"].clear();
                    _loadContext[CPU].executableNetwork._ptr.reset();
                    _loadContext[CPU].executableNetwork._so.reset();
@@ -549,17 +575,49 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {
        // stop accepting any idle requests back (for re-scheduling)
        idleWorker.second.set_capacity(0);
    }
-    for (auto&& _workerRequest : _workerRequests) {
-         unsigned int count = 0;
-         for (auto& request : _workerRequest.second) {
-             count += request._inferCount;
-         }
-         if (_workerRequest.first == "CPU_HELP") {
-             LOG_INFO("[AUTOPLUGIN]CPU_HELP:infer:%ld", _cpuHelpInferCount + count);
-         } else {
-             LOG_INFO("[AUTOPLUGIN]%s:infer:%ld", _workerRequest.first.c_str(), count);
-         }
-    }
+    INFO_RUN([this] {
+        for (auto&& _workerRequest : _workerRequests) {
+            std::list<Time> reqAllStartTimes;
+            std::list<Time> reqAllEndTimes;
+            for (auto& request : _workerRequest.second) {
+                reqAllStartTimes.splice(reqAllStartTimes.end(), request._startTimes);
+                reqAllEndTimes.splice(reqAllEndTimes.end(), request._endTimes);
+            }
+            unsigned int count = reqAllStartTimes.size();
+            IE_ASSERT(count == reqAllEndTimes.size());
+            reqAllStartTimes.sort(std::less<Time>());
+            reqAllEndTimes.sort(std::less<Time>());
+            if (_workerRequest.first == "CPU_HELP") {
+                LOG_INFO("[AUTOPLUGIN]CPU_HELP:infer:%ld", _cpuHelpInferCount + count);
+                if (_cpuHelpFps > 0.0) {
+                    LOG_INFO("[AUTOPLUGIN]CPU_HELP:fps:%lf", _cpuHelpFps);
+                } else if (count >= 1) {
+                    std::chrono::duration<double, std::milli> durtation =
+                        reqAllEndTimes.back() - reqAllStartTimes.front();
+                    LOG_INFO("[AUTOPLUGIN]CPU_HELP:fps:%lf", count * 1000 / durtation.count());
+                }
+            } else {
+                LOG_INFO("[AUTOPLUGIN]%s:infer:%ld", _workerRequest.first.c_str(), count);
+                auto n = reqAllStartTimes.size();
+                Time time;
+                while (!reqAllStartTimes.empty()) {
+                    time = reqAllStartTimes.front();
+                    if (time < _cpuHelpReleaseTime) {
+                        reqAllStartTimes.pop_front();
+                        n--;
+                    } else {
+                        break;
+                    }
+                }
+                if (n >= 1) {
+                    std::chrono::duration<double, std::milli> durtation =
+                        reqAllEndTimes.back() - time;
+                    LOG_INFO("[AUTOPLUGIN]%s:fps:%lf", _workerRequest.first.c_str(),
+                        n * 1000 / durtation.count());
+                }
+            }
+        }
+    });
    {
        std::lock_guard<std::mutex> lock(_confMutex);
        _workerRequests.clear();
--- a/src/plugins/auto/executable_network.hpp
+++ b/src/plugins/auto/executable_network.hpp
@@ -75,9 +75,9 @@ enum AutoLoadContextIndex {
     CONTEXTNUM = 2
 };

+using Time = std::chrono::time_point<std::chrono::steady_clock>;
 template<typename T>
 using DeviceMap = std::unordered_map<DeviceName, T>;
-
 class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkThreadSafeDefault,
                                     public InferenceEngine::ITaskExecutor {
 public:
@@ -86,7 +86,8 @@ public:
        InferenceEngine::SoIInferRequestInternal  _inferRequest;
        InferenceEngine::Task                     _task;
        std::exception_ptr                        _exceptionPtr = nullptr;
-        unsigned int                              _inferCount = 0;
+        std::list<Time>                           _startTimes;
+        std::list<Time>                           _endTimes;
        int                                       _index = 0;
    };
    using NotBusyWorkerRequests = InferenceEngine::ThreadSafeBoundedPriorityQueue<std::pair<int, WorkerInferRequest*>>;
@@ -161,7 +162,9 @@ private:
    mutable std::mutex                                                  _confMutex;
    bool                                                                _exitFlag = {false};
    const InferenceEngine::CNNNetwork                                   _network;
-    int                                                                 _cpuHelpInferCount = 0;
+    unsigned int                                                        _cpuHelpInferCount = 0;
+    double                                                              _cpuHelpFps = 0.0;
+    Time                                                                _cpuHelpReleaseTime;
 };

 }  // namespace MultiDevicePlugin
--- a/src/plugins/auto/utils/log.cpp
+++ b/src/plugins/auto/utils/log.cpp
@@ -8,5 +8,5 @@

 namespace MultiDevicePlugin {
 uint32_t Log::defaultLogLevel = static_cast<uint32_t>(LogLevel::LOG_NONE);
-std::vector<std::string> Log::validFormat = {"u", "d", "s", "ld", "lu"};
+std::vector<std::string> Log::validFormat = {"u", "d", "s", "ld", "lu", "lf"};
 } // namespace MultiDevicePlugin
--- a/src/plugins/auto/utils/log.hpp
+++ b/src/plugins/auto/utils/log.hpp
@@ -14,6 +14,7 @@
 #include <vector>
 #include <stdexcept>
 #include <algorithm>
+#include <functional>

 #include "singleton.hpp"
 #include "time_utils.hpp"
@@ -86,6 +87,8 @@ enum class LogLevel : uint32_t {
    LOG_FREQUENT = static_cast<uint32_t>(LogLevel::FREQUENT) | static_cast<uint32_t>(LogLevel::LOG_TRACE)
 };

+using LogTask = std::function<void()>;
+
 class Log : public Singleton<Log> {
 public:
    void setPrefix(std::string prefix);
@@ -95,6 +98,7 @@ public:
    template <typename... Args>
    void doLog(bool on, bool isTraceCallStack, LogLevel level, const char* levelStr, const char* file,
        const char* func, long line, const char* tag, const char* fmt, Args... args);
+    void doRun(LogLevel level, const LogTask& task);
 #ifdef MULTIUNITTEST
    Log(std::string unittest):Log() {
    }
@@ -256,6 +260,13 @@ inline void Log::doLog(bool on, bool isTraceCallStack, LogLevel level, const cha
    print(stream);
 }

+inline void Log::doRun(LogLevel level, const LogTask& task) {
+    if (!(static_cast<uint32_t>(level) & static_cast<uint32_t>(logLevel))) {
+        return;
+    }
+    task();
+}
+
 inline std::string Log::colorBegin(MultiDevicePlugin::LogLevel logLevel) {
    if (logLevel == LogLevel::WARN) {
        return std::string(CYAN);
--- a/src/plugins/auto/utils/log_util.hpp
+++ b/src/plugins/auto/utils/log_util.hpp
@@ -53,6 +53,14 @@ inline bool setLogLevel(std::string logLevel) {
    }
 }

+inline void INFO_RUN(const LogTask& task) {
+   HLogger->doRun(MultiDevicePlugin::LogLevel::INFO, task);
+}
+
+inline void DEBUG_RUN(const LogTask& task) {
+   HLogger->doRun(MultiDevicePlugin::LogLevel::DEBUG, task);
+}
+
 } // namespace MultiDevicePlugin

 #endif //MULTIDEVICEPLUGIN_HLOG_H
--- a/src/tests/unit/auto/log_utils_format_test.cpp
+++ b/src/tests/unit/auto/log_utils_format_test.cpp
@@ -112,6 +112,18 @@ TEST_F(LogUtilsFormatTest, format_s_d_ld_u_lu2) {
    EXPECT_TRUE(std::regex_search(printResult, regex));
 }

+TEST_F(LogUtilsFormatTest, format_lf) {
+    std::string printResult = "";
+    std::string pattern{"abc"};
+    std::regex regex(pattern);
+    ON_CALL(*(HLogger), print(_)).WillByDefault([&](std::stringstream& stream) {
+            printResult = stream.str();
+            });
+    EXPECT_CALL(*(HLogger), print(_)).Times(1);
+    LOG_DEBUG("%lfabc", 1.33);
+    EXPECT_TRUE(std::regex_search(printResult, regex));
+}
+
 TEST_F(LogUtilsFormatTest, format_p) {
    std::string printResult = "";
    std::string pattern{"not valid"};
--- a/src/tests/unit/auto/log_utils_test.cpp
+++ b/src/tests/unit/auto/log_utils_test.cpp
@@ -85,6 +85,29 @@ TEST_P(LogUtilsTest, setLogLevel) {
    printLog();
 }

+TEST_P(LogUtilsTest, INFO_RUN) {
+    setLogLevel(_logLevel);
+    int a = 0;
+    INFO_RUN([&a](){a++;});
+    if (_logLevel == "LOG_INFO" || _logLevel == "LOG_DEBUG" ||
+            _logLevel == "LOG_TRACE") {
+        EXPECT_EQ(a, 1);
+    } else {
+        EXPECT_EQ(a, 0);
+    }
+}
+
+TEST_P(LogUtilsTest, DEBUG_RUN) {
+    setLogLevel(_logLevel);
+    int a = 0;
+    DEBUG_RUN([&a](){a++;});
+    if (_logLevel == "LOG_DEBUG" || _logLevel == "LOG_TRACE") {
+        EXPECT_EQ(a, 1);
+    } else {
+        EXPECT_EQ(a, 0);
+    }
+}
+
 #if 0
 TEST_P(LogUtilsTest, setEnvNotAffectSetLogLevel) {
    EXPECT_CALL(*(HLogger), print(_)).Times(_expectCallNum);