diff --git a/docs/IE_DG/InferenceEngine_QueryAPI.md b/docs/IE_DG/InferenceEngine_QueryAPI.md
index 90fa6bfb2cb..f5b9399a240 100644
--- a/docs/IE_DG/InferenceEngine_QueryAPI.md
+++ b/docs/IE_DG/InferenceEngine_QueryAPI.md
@@ -71,6 +71,10 @@ Or the current temperature of `MYRIAD` device:
 
 @snippet snippets/InferenceEngine_QueryAPI4.cpp part4
 
+The code below demonstrates how to get memory statistics of `GPU` device:
+
+@snippet snippets/InferenceEngine_QueryAPI6.cpp part6
+
 ### GetConfig()
 
 The method is used to get information about configuration values the executable network has been created with:
diff --git a/docs/snippets/InferenceEngine_QueryAPI6.cpp b/docs/snippets/InferenceEngine_QueryAPI6.cpp
new file mode 100644
index 00000000000..b13812107bf
--- /dev/null
+++ b/docs/snippets/InferenceEngine_QueryAPI6.cpp
@@ -0,0 +1,12 @@
+#include <ie_core.hpp>
+
+int main() {
+using namespace InferenceEngine;
+//! [part6]
+InferenceEngine::Core core;
+auto network = core.ReadNetwork("sample.xml");
+auto exeNetwork = core.LoadNetwork(network, "GPU");
+std::map<std::string, uint64_t> statistics_map = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS));
+//! [part6]
+return 0;
+}
diff --git a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
index be871ed5ab2..31c31ac2e6f 100644
--- a/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_executable_network.cpp
@@ -128,6 +128,7 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
         metrics.push_back(METRIC_KEY(SUPPORTED_METRICS));
         metrics.push_back(METRIC_KEY(SUPPORTED_CONFIG_KEYS));
         metrics.push_back(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS));
+        metrics.push_back(GPU_METRIC_KEY(MEMORY_STATISTICS));
         IE_SET_METRIC_RETURN(SUPPORTED_METRICS, metrics);
     } else if (name == METRIC_KEY(SUPPORTED_CONFIG_KEYS)) {
         std::vector<std::string> configKeys;
@@ -139,6 +140,16 @@ InferenceEngine::Parameter CLDNNExecNetwork::GetMetric(const std::string &name)
         if (m_config.perfHintsConfig.ovPerfHint != CONFIG_VALUE(LATENCY))
             nr *= 2;
         IE_SET_METRIC_RETURN(OPTIMAL_NUMBER_OF_INFER_REQUESTS, nr);
+    } else if (name == GPU_METRIC_KEY(MEMORY_STATISTICS)) {
+        std::map<std::string, uint64_t> statistics;
+        if (m_context != nullptr) {
+            auto impl = getContextImpl(m_context);
+            impl->acquire_lock();
+            std::shared_ptr<cldnn::engine> eng = impl->GetEngine();
+            eng->get_memory_statistics(&statistics);
+            impl->release_lock();
+        }
+        IE_SET_METRIC_RETURN(GPU_MEMORY_STATISTICS, statistics);
     } else {
         IE_THROW() << "Unsupported ExecutableNetwork metric: " << name;
     }
diff --git a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
index 003af2bb689..5a9dc1f0628 100644
--- a/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
+++ b/inference-engine/src/inference_engine/include/ie/gpu/gpu_config.hpp
@@ -45,6 +45,12 @@ DECLARE_GPU_METRIC_KEY(UARCH_VERSION, std::string);
  */
 DECLARE_GPU_METRIC_KEY(EXECUTION_UNITS_COUNT, int);
 
+/**
+ * @brief Metric to get statistics of GPU memory allocated by engine for each allocation type
+ * It contains information about both current and peak memory usage
+ */
+DECLARE_GPU_METRIC_KEY(MEMORY_STATISTICS, std::map<std::string, uint64_t>);
+
 /**
  * @brief Possible return value for OPTIMIZATION_CAPABILITIES metric
  *  - "HW_MATMUL" - Defines if device has hardware block for matrix multiplication
diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
index a260d3a28d3..b4957b704c6 100644
--- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
+++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/core_integration.cpp
@@ -166,6 +166,28 @@ INSTANTIATE_TEST_SUITE_P(
 // Executable Network GetMetric
 //
 
+using IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS = IEClassBaseTestP;
+TEST_P(IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS, GetMetricNoThrow) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED();
+    Core ie;
+    Parameter p;
+
+    ExecutableNetwork exeNetwork = ie.LoadNetwork(simpleNetwork, deviceName);
+
+    ASSERT_NO_THROW(p = exeNetwork.GetMetric(GPU_METRIC_KEY(MEMORY_STATISTICS)));
+    std::map<std::string, uint64_t> t = p;
+
+    std::cout << "Memory Statistics: " << std::endl;
+    for (auto &&kv : t) {
+        std::cout << kv.first << ": " << kv.second << " bytes" << std::endl;
+    }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+        nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_GPU_MEMORY_STATISTICS,
+        ::testing::Values("GPU")
+);
+
 INSTANTIATE_TEST_SUITE_P(
         nightly_IEClassExecutableNetworkGetMetricTest, IEClassExecutableNetworkGetMetricTest_OPTIMAL_NUMBER_OF_INFER_REQUESTS,
         ::testing::Values("GPU", "MULTI:GPU", "HETERO:GPU", "AUTO:GPU,CPU")
diff --git a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
index acec6f58e58..6f9ebf75e41 100644
--- a/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
+++ b/inference-engine/thirdparty/clDNN/api/cldnn/runtime/engine.hpp
@@ -109,6 +109,10 @@ public:
     /// Returns the amount of GPU memory specified allocation @p type that currently used by the engine
     uint64_t get_used_device_memory(allocation_type type) const;
 
+    /// Returns statistics of GPU memory allocated by engine in current process for all allocation types.
+    /// @note It contains information about both current and peak memory usage
+    void get_memory_statistics(std::map<std::string, uint64_t>* statistics) const;
+
     /// Adds @p bytes count to currently used memory size of the specified allocation @p type
     void add_memory_used(uint64_t bytes, allocation_type type);
 
@@ -153,9 +157,10 @@ protected:
     engine(const device::ptr device, const engine_configuration& configuration);
     const device::ptr _device;
     engine_configuration _configuration;
+    mutable std::mutex _mutex;
 
-    std::map<allocation_type, std::atomic<uint64_t>> memory_usage_map;
-    std::map<allocation_type, std::atomic<uint64_t>> peak_memory_usage_map;
+    std::map<allocation_type, std::atomic<uint64_t>> _memory_usage_map;
+    std::map<allocation_type, std::atomic<uint64_t>> _peak_memory_usage_map;
 };
 
 }  // namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/runtime/engine.cpp b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
index 3738ec2ae9d..e0650bc161b 100644
--- a/inference-engine/thirdparty/clDNN/runtime/engine.cpp
+++ b/inference-engine/thirdparty/clDNN/runtime/engine.cpp
@@ -120,48 +120,64 @@ memory_ptr engine::share_surface(const layout& layout, shared_surface surf, uint
 #endif  // _WIN32
 
 uint64_t engine::get_max_used_device_memory() const {
+    std::lock_guard<std::mutex> guard(_mutex);
     uint64_t total_peak_memory_usage {0};
-    for (auto const& m : peak_memory_usage_map) {
+    for (auto const& m : _peak_memory_usage_map) {
         total_peak_memory_usage += m.second.load();
     }
     return total_peak_memory_usage;
 }
 
 uint64_t engine::get_max_used_device_memory(allocation_type type) const {
+    std::lock_guard<std::mutex> guard(_mutex);
     uint64_t peak_memory_usage {0};
-    auto iter = peak_memory_usage_map.find(type);
-    if (iter != peak_memory_usage_map.end()) {
+    auto iter = _peak_memory_usage_map.find(type);
+    if (iter != _peak_memory_usage_map.end()) {
         peak_memory_usage = iter->second.load();
     }
     return peak_memory_usage;
 }
 
 uint64_t engine::get_used_device_memory(allocation_type type) const {
+    std::lock_guard<std::mutex> guard(_mutex);
     uint64_t memory_usage {0};
-    auto iter = memory_usage_map.find(type);
-    if (iter != memory_usage_map.end()) {
+    auto iter = _memory_usage_map.find(type);
+    if (iter != _memory_usage_map.end()) {
         memory_usage = iter->second.load();
     }
     return memory_usage;
 }
 
-void engine::add_memory_used(size_t bytes, allocation_type type) {
-    if (!memory_usage_map.count(type) && !peak_memory_usage_map.count(type)) {
-        static std::mutex m;
-        std::lock_guard<std::mutex> guard(m);
-        memory_usage_map[type] = 0;
-        peak_memory_usage_map[type] = 0;
+void engine::get_memory_statistics(std::map<std::string, uint64_t>* statistics) const {
+    for (auto const& m : _memory_usage_map) {
+        std::ostringstream oss;
+        oss << m.first << "_current";
+        (*statistics)[oss.str()] = m.second.load();
     }
-    memory_usage_map[type] += bytes;
-    if (memory_usage_map[type] > peak_memory_usage_map[type]) {
-        peak_memory_usage_map[type] = memory_usage_map[type].load();
+    for (auto const& m : _peak_memory_usage_map) {
+        std::ostringstream oss;
+        oss << m.first << "_peak";
+        (*statistics)[oss.str()] = m.second.load();
+    }
+}
+
+void engine::add_memory_used(size_t bytes, allocation_type type) {
+    std::lock_guard<std::mutex> guard(_mutex);
+    if (!_memory_usage_map.count(type) && !_peak_memory_usage_map.count(type)) {
+        _memory_usage_map[type] = 0;
+        _peak_memory_usage_map[type] = 0;
+    }
+    _memory_usage_map[type] += bytes;
+    if (_memory_usage_map[type] > _peak_memory_usage_map[type]) {
+        _peak_memory_usage_map[type] = _memory_usage_map[type].load();
     }
 }
 
 void engine::subtract_memory_used(size_t bytes, allocation_type type) {
-    auto iter = memory_usage_map.find(type);
-    if (iter != memory_usage_map.end()) {
-        memory_usage_map[type] -= bytes;
+    std::lock_guard<std::mutex> guard(_mutex);
+    auto iter = _memory_usage_map.find(type);
+    if (iter != _memory_usage_map.end()) {
+        _memory_usage_map[type] -= bytes;
     } else {
         throw std::runtime_error("Attempt to free unallocated memory");
     }