From 9972410aa39977c0f7ab6edfc7da1863532a9e73 Mon Sep 17 00:00:00 2001
From: Andrew Kwangwoong Park <andrew.kwangwoong.park@intel.com>
Date: Thu, 18 Nov 2021 20:26:53 +0900
Subject: [PATCH] [GPU] Add IE Core GPU plugin metric to query overall memory
 statistics for GPU device (#8421)

---
 docs/IE_DG/InferenceEngine_QueryAPI.md        |   4 -
 docs/IE_DG/supported_plugins/GPU.md           |   5 +
 ...ceEngine_QueryAPI6.cpp => GPU_Metric0.cpp} |   6 +-
 .../src/cldnn_engine/cldnn_engine.cpp         |  38 +++-
 .../src/cldnn_engine/cldnn_engine.h           |   5 +-
 .../cldnn_engine/cldnn_executable_network.cpp |  11 -
 .../behavior/plugin/core_integration.cpp      | 204 ++++++++++++++++--
 .../behavior/plugin/core_integration.hpp      |   1 +
 .../clDNN/api/cldnn/runtime/engine.hpp        |   2 +-
 .../thirdparty/clDNN/runtime/engine.cpp       |   8 +-
 10 files changed, 239 insertions(+), 45 deletions(-)
 rename docs/snippets/{InferenceEngine_QueryAPI6.cpp => GPU_Metric0.cpp} (61%)
diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md
index 23360cc62ab..34579924bad 100644
--- a/docs/IE_DG/InferenceEngine_QueryAPI.md
+++ b/docs/IE_DG/InferenceEngine_QueryAPI.md
@@ -71,10 +71,6 @@ Or the current temperature of `MYRIAD` device:
 
 @snippet snippets/InferenceEngine_QueryAPI4.cpp part4
 
-The code below demonstrates how to get memory statistics of `GPU` device:
-
-@snippet snippets/InferenceEngine_QueryAPI6.cpp part6
-
 ### GetConfig()
 
 The method is used to get information about configuration values the executable network has been created with:
diff --git a/docs/IE_DG/supported_plugins/GPU.md b/docs/IE_DG/supported_plugins/GPU.md
index 1c4c17430bf..f96e3fcba99 100644
--- a/docs/IE_DG/supported_plugins/GPU.md
+++ b/docs/IE_DG/supported_plugins/GPU.md
@@ -122,6 +122,11 @@ When specifying key values as raw strings (that is, when using Python API), omit
 | `KEY_TUNING_MODE`     | `TUNING_DISABLED` <br /> `TUNING_CREATE` <br />  `TUNING_USE_EXISTING`            | `TUNING_DISABLED` | Disable inference kernel tuning     <br /> Create tuning file (expect much longer runtime)  <br />         Use an existing tuning file. **Deprecated**. Will be removed in the next release |
 | `KEY_TUNING_FILE`     | `"<filename>"`                  | `""`              | Tuning file to create / use. **Deprecated**. Will be removed in the next release |
 
+## Quering GPU specific metric keys
+* MEMORY_STATISTICS : Returns overall memory statistics of `GPU` device allocated by engine with allocation types. If the network has `TensorIterator` or `Loop` operation which is not unrolled, there will be additional allocation at the first inference phase. In such a case, querying for `MEMORY_STATISTICS` should be done after first inference for more accurate result. The code below demonstrates how to query overall memory statistics of `GPU` device:
+
+@snippet snippets/GPU_Metric0.cpp part0
+
 ## GPU Context and Video Memory Sharing RemoteBlob API
 
 See [RemoteBlob API of GPU Plugin](GPU_RemoteBlob_API.md)
diff --git a/docs/snippets/InferenceEngine_QueryAPI6.cpp b/docs/snippets/GPU_Metric0.cpp
similarity index 61%
rename from docs/snippets/InferenceEngine_QueryAPI6.cpp
rename to docs/snippets/GPU_Metric0.cpp
index b13812107bf..77de1bbcdbf 100644
--- a/docs/snippets/InferenceEngine_QueryAPI6.cpp
+++ b/docs/snippets/GPU_Metric0.cpp
@@ -2,11 +2,11 @@
 
 int main() {
 using namespace InferenceEngine;
-//! [part6]
+//! [part0]
 InferenceEngine::Core core;
 auto network = core.ReadNetwork("sample.xml");
 auto exeNetwork = core.LoadNetwork(network, "GPU");
-std::map<std::string, uint64_t> statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS));
-//! [part6]
+std::map<std::string, uint64_t> statistics_map = core.GetMetric("GPU", GPU_METRIC_KEY(MEMORY_STATISTICS));
+//! [part0]
 return 0;
 }
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index df5a68b6518..fe1d2724f1d 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -176,6 +176,25 @@ void clDNNEngine::UpdateConfig(CLDNNPlugin::Config& conf, const InferenceEngine:
     }
 }
 
+void clDNNEngine::UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const {
+    OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::UpdateStatistics");
+    {
+        std::lock_guard<std::mutex> lock(engine_mutex);
+
+        std::map<std::string, uint64_t> statistics;
+        auto impl = getContextImpl(context);
+        impl->acquire_lock();
+        std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
+        statistics = eng->get_memory_statistics();
+        impl->release_lock();
+
+        // if the same context exists, the statistics is replaced with the latest one
+        // (currently, memory usage is accumulated for several networks in the same context)
+        // if it does not exist, a new statistics is added
+        statistics_map[context] = statistics;
+    }
+}
+
 std::map<std::string, std::string> clDNNEngine::ConvertPerfHintsToConfig(
         const std::map<std::string, std::string>& network_config,
         const CLDNNPlugin::Config& plugin_config) const {
@@ -258,7 +277,9 @@ IExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceE
     auto transformedNetwork = CloneAndTransformNetwork(network, conf);
     {
         OV_ITT_SCOPED_TASK(itt::domains::CLDNNPlugin, "clDNNEngine::LoadExeNetworkImpl::CreateExeNetwork");
-        return std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
+        CLDNNExecNetwork::Ptr exeNetwork = std::make_shared<CLDNNExecNetwork>(transformedNetwork, context, conf);
+        UpdateStatistics(context);
+        return exeNetwork;
     }
 }
 
@@ -643,6 +664,7 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
         metrics.push_back(GPU_METRIC_KEY(DEVICE_TOTAL_MEM_SIZE));
         metrics.push_back(GPU_METRIC_KEY(UARCH_VERSION));
         metrics.push_back(GPU_METRIC_KEY(EXECUTION_UNITS_COUNT));
+        metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
 
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
     } else if (name == METRIC_KEY(AVAILABLE_DEVICES)) {
@@ -703,6 +725,20 @@ Parameter clDNNEngine::GetMetric(const std::string& name, const std::map<std::st
     } else if (name == METRIC_KEY(RANGE_FOR_STREAMS)) {
         std::tuple<unsigned int, unsigned int> range = std::make_tuple(1, 2);
         IE_SET_METRIC_RETURN(RANGE_FOR_STREAMS, range);
+    } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
+        std::map<std::string, uint64_t> statistics;
+        for (auto const &item : statistics_map) {
+            // Before collecting memory statistics of each context, it's updated with the latest memory statistics from engine.
+            UpdateStatistics(item.first);
+            for (auto const &kv : item.second) {
+                if (!statistics.count(kv.first)) {
+                    statistics[kv.first] = kv.second;
+                } else {
+                    statistics[kv.first] += kv.second;
+                }
+            }
+        }
+        IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
     } else {
         IE_THROW() << "Unsupported metric key " << name;
     }
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.h b/inference-engine/src/cldnn_engine/cldnn_engine.h
index 0818e136967..1e2cb5cf815 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.h
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.h
@@ -25,7 +25,9 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
 
     // key: device_id, value: cldnn device
     std::map<std::string, cldnn::device::ptr> device_map;
-    std::mutex engine_mutex;
+    // key: cldnn context, value: memory statistics
+    mutable std::map<CLDNNRemoteCLContext::Ptr, std::map<std::string, uint64_t>> statistics_map;
+    mutable std::mutex engine_mutex;
 
     mutable CLDNNRemoteCLContext::Ptr m_defaultContext;
 
@@ -38,6 +40,7 @@ class clDNNEngine : public InferenceEngine::IInferencePlugin,
 
     void RegisterPrimitives();
     void UpdateConfig(Config& conf, const InferenceEngine::CNNNetwork &network, const std::map<std::string, std::string> &params) const;
+    void UpdateStatistics(const CLDNNRemoteCLContext::Ptr& context) const;
 public:
     clDNNEngine();
 
diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
index 7e465f0f257..8c1eeef3e71 100644
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@@ -155,7 +155,6 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
         metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
-        metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
         std::vector<std::string> configKeys;
@@ -167,16 +166,6 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
         if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
             nr *= 2;
         IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
-    } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
-        std::map<std::string, uint64_t> statistics;
-        if (m_context != nullptr) {
-            auto impl = getContextImpl(m_context);
-            impl->acquire_lock();
-            std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
-            eng->get_memory_statistics(&statistics);
-            impl->release_lock();
-        }
-        IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
     } else {
         IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
     }
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp
index 38c4e5a808e..9452c2679c9 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/plugin/core_integration.cpp
@@ -153,6 +153,189 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values("GPU")
 );
 
+using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_DEFAULT = BehaviorTestsUtils::IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_DEFAULT, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    InferenceEngine::Core ie;
+    InferenceEngine::Parameter p;
+
+    InferenceEngine::ExecutableNetwork exec_net = ie.LoadNetwork(simpleCnnNetwork, deviceName);
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t = p;
+
+    ASSERT_FALSE(t.empty());
+    std::cout << "Memory Statistics: " << std::endl;
+    for (auto &&kv : t) {
+        ASSERT_NE(kv.second, 0);
+        std::cout << kv.first << ": " << kv.second << " bytes" << std::endl;
+    }
+
+    ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_DEFAULT,
+        ::testing::Values("GPU")
+);
+
+using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTIPLE_NETWORKS = BehaviorTestsUtils::IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTIPLE_NETWORKS, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    InferenceEngine::Core ie;
+    InferenceEngine::Parameter p;
+
+    InferenceEngine::ExecutableNetwork exec_net1 = ie.LoadNetwork(simpleCnnNetwork, deviceName);
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t1 = p;
+
+    ASSERT_FALSE(t1.empty());
+    for (auto &&kv : t1) {
+        ASSERT_NE(kv.second, 0);
+    }
+
+    InferenceEngine::ExecutableNetwork exec_net2 = ie.LoadNetwork(simpleCnnNetwork, deviceName);
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t2 = p;
+
+    ASSERT_FALSE(t2.empty());
+    for (auto &&kv : t2) {
+        ASSERT_NE(kv.second, 0);
+        auto iter = t1.find(kv.first);
+        if (iter != t1.end()) {
+            ASSERT_EQ(kv.second, t1[kv.first] * 2);
+        }
+    }
+
+    ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTIPLE_NETWORKS,
+        ::testing::Values("GPU")
+);
+
+using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_CHECK_VALUES = BehaviorTestsUtils::IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_CHECK_VALUES, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    InferenceEngine::Core ie;
+    InferenceEngine::Parameter p;
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t1 = p;
+    ASSERT_TRUE(t1.empty());
+
+    {
+        InferenceEngine::ExecutableNetwork exec_net1 = ie.LoadNetwork(simpleCnnNetwork, deviceName);
+
+        ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+        std::map<std::string, uint64_t> t2 = p;
+
+        ASSERT_FALSE(t2.empty());
+        for (auto &&kv : t2) {
+            ASSERT_NE(kv.second, 0);
+        }
+        {
+            InferenceEngine::ExecutableNetwork exec_net2 = ie.LoadNetwork(actualCnnNetwork, deviceName);
+
+            ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+            std::map<std::string, uint64_t> t3 = p;
+
+            ASSERT_FALSE(t3.empty());
+            for (auto &&kv : t3) {
+                ASSERT_NE(kv.second, 0);
+            }
+        }
+        ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+        std::map<std::string, uint64_t> t4 = p;
+
+        ASSERT_FALSE(t4.empty());
+        for (auto &&kv : t4) {
+            ASSERT_NE(kv.second, 0);
+            if (kv.first.find("_cur") != std::string::npos) {
+                auto iter = t2.find(kv.first);
+                if (iter != t2.end()) {
+                    ASSERT_EQ(t2[kv.first], kv.second);
+                }
+            }
+        }
+    }
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t5 = p;
+
+    ASSERT_FALSE(t5.empty());
+    for (auto &&kv : t5) {
+        if (kv.first.find("_cur") != std::string::npos) {
+            ASSERT_EQ(kv.second, 0);
+        }
+    }
+    ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_CHECK_VALUES,
+        ::testing::Values("GPU")
+);
+
+using IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTI_THREADS = BehaviorTestsUtils::IEClassBaseTestP;
+TEST_P(IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTI_THREADS, GetMetricAndPrintNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    InferenceEngine::Core ie;
+    InferenceEngine::Parameter p;
+
+    std::atomic<uint32_t> counter{0u};
+    std::vector<std::thread> threads(2);
+    // key: thread id, value: executable network
+    std::map<uint32_t, InferenceEngine::ExecutableNetwork> exec_net_map;
+    std::vector<InferenceEngine::CNNNetwork> networks;
+    networks.emplace_back(simpleCnnNetwork);
+    networks.emplace_back(simpleCnnNetwork);
+
+    InferenceEngine::ExecutableNetwork exec_net1 = ie.LoadNetwork(simpleCnnNetwork, deviceName);
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t1 = p;
+
+    ASSERT_FALSE(t1.empty());
+    for (auto &&kv : t1) {
+        ASSERT_NE(kv.second, 0);
+    }
+
+    for (auto & thread : threads) {
+        thread = std::thread([&](){
+            auto value = counter++;
+            exec_net_map[value] = ie.LoadNetwork(networks[value], deviceName);
+        });
+    }
+
+    for (auto & thread : threads) {
+        if (thread.joinable()) {
+            thread.join();
+        }
+    }
+
+    ASSERT_NO_THROW(p = ie.GetMetric(deviceName, GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t2 = p;
+
+    ASSERT_FALSE(t2.empty());
+    for (auto &&kv : t2) {
+        ASSERT_NE(kv.second, 0);
+        auto iter = t1.find(kv.first);
+        if (iter != t1.end()) {
+            ASSERT_EQ(kv.second, t1[kv.first] * 3);
+        }
+    }
+
+    ASSERT_METRIC_SUPPORTED_IE(GPU_METRIC_KEY(MEMORY_STATISTICS));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        nightly_IEClassGetMetricTest, IEClassGetMetricTest_GPU_MEMORY_STATISTICS_MULTI_THREADS,
+        ::testing::Values("GPU")
+);
+
 //
 // IE Class GetConfig
 //
@@ -162,27 +345,6 @@ INSTANTIATE_TEST_SUITE_P(
         ::testing::Values("GPU")
 );
 
-using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = BehaviorTestsUtils::IEClassBaseTestP;
-TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) {
-    InferenceEngine::Core ie = BehaviorTestsUtils::createIECoreWithTemplate();
-    InferenceEngine::Parameter p;
-
-    InferenceEngine::ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleCnnNetwork, deviceName);
-
-    ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)));
-    std::map<std::string, uint64_t> t = p;
-
-    std::cout << "Memory Statistics: " << std::endl;
-    for (auto &&kv : t) {
-        std::cout << kv.first << ": " << kv.second << " bytes" << std::endl;
-    }
-}
-
-INSTANTIATE_TEST_SUITE_P(
-        nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS,
-        ::testing::Values("GPU")
-);
-
 // IE Class Query network
 
 INSTANTIATE_TEST_SUITE_P(
diff --git a/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp b/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp
index 7213021011c..f1731a857f2 100644
--- a/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp
+++ b/inference-engine/tests/functional/plugin/shared/include/behavior/plugin/core_integration.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <fstream>
+#include <thread>
 
 #include "base/behavior_test_utils.hpp"
 #include "common_test_utils/common_utils.hpp"
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
index a40d6b7ed91..8114009dd9a 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
@@ -108,7 +108,7 @@ public:
 
     /// Returns statistics of GPU memory allocated by engine in current process for all allocation types.
     /// @note It contains information about both current and peak memory usage
-    void get_memory_statistics(std::map<std::string, uint64_t>* statistics) const;
+    std::map<std::string, uint64_t> get_memory_statistics() const;
 
     /// Adds @p bytes count to currently used memory size of the specified allocation @p type
     void add_memory_used(uint64_t bytes, allocation_type type);
diff --git a/inference-engine/thirdparty/clDNN/runtime/engine.cpp b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
index d9c4bf32308..7e40a4ebf52 100644
--- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
@@ -149,17 +149,19 @@ uint64_t engine::get_used_device_memory(allocation_type type) const {
     return memory_usage;
 }
 
-void engine::get_memory_statistics(std::map<std::string, uint64_t>* statistics) const {
+std::map<std::string, uint64_t> engine::get_memory_statistics() const {
+    std::map<std::string, uint64_t> statistics;
     for (auto const& m : _memory_usage_map) {
         std::ostringstream oss;
         oss << m.first << "_current";
-        (*statistics)[oss.str()] = m.second.load();
+        statistics[oss.str()] = m.second.load();
     }
     for (auto const& m : _peak_memory_usage_map) {
         std::ostringstream oss;
         oss << m.first << "_peak";
-        (*statistics)[oss.str()] = m.second.load();
+        statistics[oss.str()] = m.second.load();
     }
+    return statistics;
 }
 
 void engine::add_memory_used(size_t bytes, allocation_type type) {