[GPU] Add GPU plugin metric to get statistics of GPU memory allocated by engine (#7758)

2021-10-13 11:40:51 +09:00 · 2021-10-13 11:40:51 +09:00 · 4e2cc3e370
commit 4e2cc3e370
parent 9e1231ac8d
7 changed files with 95 additions and 19 deletions
--- a/docs/IE_DG/InferenceEngine_QueryAPI.md
+++ b/docs/IE_DG/InferenceEngine_QueryAPI.md
@ -71,6 +71,10 @@ Or the current temperature of `MYRIAD` device:
@snippet snippets/InferenceEngine_QueryAPI4.cpp part4
 The code below demonstrates how to get memory statistics of `GPU` device:
@snippet snippets/InferenceEngine_QueryAPI6.cpp part6
 ### GetConfig()
 The method is used to get information about configuration values the executable network has been created with:
--- a/docs/snippets/InferenceEngine_QueryAPI6.cpp
+++ b/docs/snippets/InferenceEngine_QueryAPI6.cpp
@ -0,0 +1,12 @@
 #include <ie_core.hpp>
 int main() {
 using namespace InferenceEngine;
 //! [part6]
 InferenceEngine::Core core;
 auto network = core.ReadNetwork("sample.xml");
 auto exeNetwork = core.LoadNetwork(network, "GPU");
 std::map<std::string, uint64_t> statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS));
 //! [part6]
 return 0;
 }
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@ -128,6 +128,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
        metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
        metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
        metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
        metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
        IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
    } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
        std::vector<std::string> configKeys;
@ -139,6 +140,16 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
        if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
            nr *= 2;
        IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
    } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
        std::map<std::string, uint64_t> statistics;
        if (m_context != nullptr) {
            auto impl = getContextImpl(m_context);
            impl->acquire_lock();
            std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
            eng->get_memory_statistics(&statistics);
            impl->release_lock();
        }
        IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
    } else {
        IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
    }
--- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
+++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
@ -45,6 +45,12 @@ DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
 */
 DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
 /**
 * @brief Metric to get statistics of GPU memory allocated by engine for each allocation type
 * It contains information about both current and peak memory usage
 */
 DECLARE_GPU_METRIC_KEY(MEMORY_STATISTICS, std::map<std::string, uint64_t>);
 /**
 * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
 *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
@ -166,6 +166,28 @@ INSTANTIATE_TEST_SUITE_P(
 // Executable Network GetMetric
 //
 using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = IEClassBaseTestP;
 TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) {
    SKIP_IF_CURRENT_TEST_IS_DISABLED();
    Core ie;
    Parameter p;
    ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleNetwork, deviceName);
    ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)));
    std::map<std::string, uint64_t> t = p;
    std::cout << "Memory Statistics: " << std::endl;
    for (auto &&kv : t) {
        std::cout << kv.first << ": " << kv.second << " bytes" << std::endl;
    }
 }
 INSTANTIATE_TEST_SUITE_P(
        nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS,
        ::testing::Values("GPU")
 );
 INSTANTIATE_TEST_SUITE_P(
        nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
        ::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU")
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
@ -109,6 +109,10 @@ public:
    /// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
    uint64_t get_used_device_memory(allocation_type type) const;
    /// Returns statistics of GPU memory allocated by engine in current process for all allocation types.
    /// @note It contains information about both current and peak memory usage
    void get_memory_statistics(std::map<std::string, uint64_t>* statistics) const;
    /// Adds @p bytes count to currently used memory size of the specified allocation @p type
    void add_memory_used(uint64_t bytes, allocation_type type);
@ -153,9 +157,10 @@ protected:
    engine(const device::ptr device, const engine_configuration& configuration);
    const device::ptr _device;
    engine_configuration _configuration;
    mutable std::mutex _mutex;
-    std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map;
+    std::map<allocation_type, std::atomic<uint64_t>> _memory_usage_map;
-    std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map;
+    std::map<allocation_type, std::atomic<uint64_t>> _peak_memory_usage_map;
 };
 }  // namespace cldnn
--- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
@ -120,48 +120,64 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
 #endif  // _WIN32
 uint64_t engine::get_max_used_device_memory() const {
    std::lock_guard<std::mutex> guard(_mutex);
    uint64_t total_peak_memory_usage {0};
-    for (auto const& m : peak_memory_usage_map) {
+    for (auto const& m : _peak_memory_usage_map) {
        total_peak_memory_usage += m.second.load();
    }
    return total_peak_memory_usage;
 }
 uint64_t engine::get_max_used_device_memory(allocation_type type) const {
    std::lock_guard<std::mutex> guard(_mutex);
    uint64_t peak_memory_usage {0};
-    auto iter = peak_memory_usage_map.find(type);
+    auto iter = _peak_memory_usage_map.find(type);
-    if (iter != peak_memory_usage_map.end()) {
+    if (iter != _peak_memory_usage_map.end()) {
        peak_memory_usage = iter->second.load();
    }
    return peak_memory_usage;
 }
 uint64_t engine::get_used_device_memory(allocation_type type) const {
    std::lock_guard<std::mutex> guard(_mutex);
    uint64_t memory_usage {0};
-    auto iter = memory_usage_map.find(type);
+    auto iter = _memory_usage_map.find(type);
-    if (iter != memory_usage_map.end()) {
+    if (iter != _memory_usage_map.end()) {
        memory_usage = iter->second.load();
    }
    return memory_usage;
 }
-void engine::add_memory_used(size_t bytes, allocation_type type) {
+void engine::get_memory_statistics(std::map<std::string, uint64_t>* statistics) const {
-    if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) {
+    for (auto const& m : _memory_usage_map) {
-        static std::mutex m;
+        std::ostringstream oss;
-        std::lock_guard<std::mutex> guard(m);
+        oss << m.first << "_current";
-        memory_usage_map[type] = 0;
+        (*statistics)[oss.str()] = m.second.load();
        peak_memory_usage_map[type] = 0;
    }
-    memory_usage_map[type] += bytes;
+    for (auto const& m : _peak_memory_usage_map) {
-    if (memory_usage_map[type] > peak_memory_usage_map[type]) {
+        std::ostringstream oss;
-        peak_memory_usage_map[type] = memory_usage_map[type].load();
+        oss << m.first << "_peak";
        (*statistics)[oss.str()] = m.second.load();
    }
 }
 void engine::add_memory_used(size_t bytes, allocation_type type) {
    std::lock_guard<std::mutex> guard(_mutex);
    if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) {
        _memory_usage_map[type] = 0;
        _peak_memory_usage_map[type] = 0;
    }
    _memory_usage_map[type] += bytes;
    if (_memory_usage_map[type] > _peak_memory_usage_map[type]) {
        _peak_memory_usage_map[type] = _memory_usage_map[type].load();
    }
 }
 void engine::subtract_memory_used(size_t bytes, allocation_type type) {
-    auto iter = memory_usage_map.find(type);
+    std::lock_guard<std::mutex> guard(_mutex);
-    if (iter != memory_usage_map.end()) {
+    auto iter = _memory_usage_map.find(type);
-        memory_usage_map[type] -= bytes;
+    if (iter != _memory_usage_map.end()) {
        _memory_usage_map[type] -= bytes;
    } else {
        throw std::runtime_error("Attempt to free unallocated memory");
    }